Skip to content
Snippets Groups Projects
Commit d7d19545 authored by BRONES Romain's avatar BRONES Romain
Browse files

Commit initial

* Ajout de deux scripts python
* ArchiveExtractor pour communiquer avec les devices T/HDBExtractor
* SoleilTools contient des fonctions utiles pour parser les fichier
  mambo ou filertrend
parents
No related branches found
No related tags found
No related merge requests found
#!/usr/Local/pyroot/PyTangoRoot/bin/python
"""
Python module for extracting attribute from Arhive Extractor Device.
Includes a Command Line Interface.
Can be imported as is to use function in user script.
"""
import logging
import datetime
import numpy as np
import PyTango as tango
# Name the logger after the filename
logger = logging.getLogger("ArchiveExtractor")
##########################################################################
""" Commodity variables """
# Extractor date format for GetAttDataBetweenDates
DBDFMT = "%Y-%m-%d %H:%M:%S"
# Extractor date format for GetNearestValue
DBDFMT2 = "%d-%m-%Y %H:%M:%S"
##---------------------------------------------------------------------------##
def dateparse(datestr):
"""
Convenient function to parse date strings.
Global format is %Y-%m-%d-%H:%M:%S and it can be reduced to be less precise.
Parameters
---------
datestr : string
Date as a string, format %Y-%m-%d-%H:%M:%S or less precise.
Exceptions
----------
ValueError
If the parsing failed.
Returns
-------
date : datetime.datetime
Parsed date
"""
logger.info("Parse date '%s'"%datestr)
fmt = [
"%Y-%m-%d-%H:%M:%S",
"%Y-%m-%d-%H:%M",
"%Y-%m-%d-%H",
"%Y-%m-%d",
"%Y-%m",
]
date = None
for f in fmt:
logger.debug("Try format '%s'"%f)
try:
date = datetime.datetime.strptime(datestr, f)
except ValueError:
logger.debug("Parsing failed")
if date is None:
logger.error("Could not parse date")
raise ValueError
return date
##---------------------------------------------------------------------------##
def query_ADB_BetweenDates(attr,
dateStart,
dateStop=datetime.datetime.now(),
extractor="archiving/TDBExtractor/4"):
"""
Query attribute data from an archiver database, get all points between dates.
Use GetAttDataBetweenDates.
Warning : if the time interval gives a huge set of data, it can stall.
Parameters
----------
attr : String
Name of the attribute. Full Tango name i.e. "test/dg/panda/current".
dateStart : datetime.datetime
Start date for extraction.
dateStop : datetime.datetime
Stop date for extraction.
Default is now (datetime.datetime.now())
extractor : String
Name of the DB Extractor device.
Default is "archiving/TDBExtractor/4"
Exceptions
----------
ValueError
The attribute is not found in the database.
Returns
-------
[date, value] : array
date : numpy.ndarray of datetime.datime objects
Dates of the values
value : numpy.ndarray
Archived values
"""
# Max number of point per extraction chunks
Nmax = 100000
# Device Proxy to DB
logger.debug("Instantiate proxy to %s"%extractor)
ADB = tango.DeviceProxy(extractor)
# Give the DB extractor 3 seconds timeout
ADB.set_timeout_millis(3000)
# Check that the attribute is in the database
logger.debug("Check that %s is archived."%attr)
if not ADB.IsArchived(attr):
logger.error("Attribute '%s' is not archived in DB %s"%(attr, extractor))
raise ValueError("Attribute '%s' is not archived in DB %s"%(attr, extractor))
# Get its sampling period in seconds
samplingPeriod = int(ADB.GetArchivingMode(attr)[1])*10**-3
logger.debug("Attribute is sampled every %g seconds"%samplingPeriod)
# Evaluate the number of points
est_N = (dateStop-dateStart).total_seconds()/samplingPeriod
logger.debug("Which leads to %d points to extract."%est_N)
# If data chunk is too much, we need to cut it
if est_N > Nmax:
dt = datetime.timedelta(seconds=samplingPeriod)*Nmax
cdates = [dateStart]
while cdates[-1] < dateStop:
cdates.append(cdates[-1]+dt)
cdates[-1] = dateStop
logger.debug("Cutting access to %d little chunks of time, %s each."%(len(cdates)-1, dt))
else:
cdates=[dateStart, dateStop]
# Arrays to hold every chunks
value = []
date = []
# For each date chunk
for i_d in range(len(cdates)-1):
# Make retrieval request
logger.debug("Perform GetAttDataBetweenDates (%s, %s, %s)"%(
attr,
cdates[i_d].strftime(DBDFMT),
cdates[i_d+1].strftime(DBDFMT))
)
[N,], [name,] = ADB.GetAttDataBetweenDates([
attr,
cdates[i_d].strftime(DBDFMT),
cdates[i_d+1].strftime(DBDFMT)
])
# Read the history
logger.debug("Retrieve hystory of %d values. Dynamic attribute named %s."%(N, name))
attrHist = ADB.attribute_history(name, N)
# Transform to datetime - value arrays
_value = np.empty(N, dtype=float)
_date = np.empty(N, dtype=object)
for i_h in range(N):
_value[i_h]=attrHist[i_h].value
_date[i_h]=attrHist[i_h].time.todatetime()
# Remove dynamic attribute
logger.debug("Remove dynamic attribute %s."%name)
ADB.RemoveDynamicAttribute(name)
value.append(_value)
date.append(_date)
logger.debug("Concatenate chunks")
value = np.concatenate(value)
date = np.concatenate(date)
logger.debug("Extraction done for %s."%attr)
return [date, value]
##---------------------------------------------------------------------------##
def query_ADB_NearestValue(attr,
dates,
extractor="archiving/TDBExtractor/4"):
"""
Query attribute data from an archiver database, get nearest points from dates.
Use GetNearestValue and perform multiple calls.
For each date in dates, it read the closest sampled value.
Return the real dates of the samples.
Parameters
----------
attr : String
Name of the attribute. Full Tango name i.e. "test/dg/panda/current".
dates : numpy.ndarray of datetime.datetime
Dates for extraction.
extractor : String
Name of the DB Extractor device.
Default is "archiving/TDBExtractor/4"
Exceptions
----------
ValueError
The attribute is not found in the database.
Returns
-------
[realdate, value] : array
realdate : numpy.ndarray of datetime.datime objects
Dates of the values
value : numpy.ndarray
Archived values
"""
# Device Proxy to DB
ADB = tango.DeviceProxy(extractor)
# Give the DB extractor 3 seconds timeout
ADB.set_timeout_millis(3000)
# Check that the attribute is in the database
if not ADB.IsArchived(attr):
raise ValueError("Attribute '%s' is not archived in DB %s"%(attr, extractor))
# Prepare arrays
value = np.empty(len(dates), dtype=float)
realdate = np.empty(len(dates), dtype=object)
# Loop on dates
for i in range(len(dates)):
# Make retrieval
answ = ADB.GetNearestValue([attr, dates[i].strftime(DBDFMT2)])
answ = answ.split(";")
realdate[i] = datetime.datetime.fromtimestamp(int(answ[0])/1000)
value[i] = answ[1]
return [realdate, value]
##########################################################################
""" Command Line Interface """
if __name__ == "__main__":
# Default stop date
dateStop = datetime.datetime.now()
# Default stop date
dateStart = datetime.datetime.now()-datetime.timedelta(days=1)
#######################################################
# Install argument parser
import argparse
parser = argparse.ArgumentParser(description="Extract attributes from the extractor devices.")
parser.add_argument("--from", type=dateparse, dest="dateStart",
help="Start date for extraction, format '1990-12-13-22:33:45'. "+
"It is possible to be less precise and drop, seconds, minutes, hours or even day."+
" Default is one day ago",
default=dateStart)
parser.add_argument("--to", type=dateparse, dest="dateStop",
help="Stop date for extraction, format '1990-12-13-22:33:45'. It is possible to be less precise and drop, seconds, minutes, hours or even day."+
" Default is now.",
default=dateStop)
parser.add_argument("--DB", choices=["H", "T"],
default="T", help="Database to extract from. HDB (H) or TDB (T), default: %(default)s")
parser.add_argument("--DBN", type=int, default=2,
help="Extractor device number, default: %(default)s")
parser.add_argument("--fileout", type=str, default="extracted_%s.npy"%datetime.datetime.now().strftime("%Y%m%d_%H%M%S"),
help="filename of the extraction destination. Default: %(default)s"),
parser.add_argument('--log', type=str, default="INFO",
help="Log level. Default: %(default)s.")
parser.add_argument('attributes', type=str, nargs='+',
help="List of attributes to extract. Full tango path.")
args = parser.parse_args()
#######################################################
# Configure logger
# Add a stream handler
s_handler = logging.StreamHandler()
s_handler.setFormatter(logging.Formatter("%(levelname)s\t[%(funcName)s] \t%(message)s"))
# Set level according to command line attribute
s_handler.setLevel(level=getattr(logging, args.log.upper()))
logger.setLevel(level=getattr(logging, args.log.upper()))
logger.addHandler(s_handler)
logger.debug("Parsed arguments: %s"%args)
#######################################################
# Select Extractor
extractor = "archiving/%sDBExtractor/%d"%(args.DB, args.DBN)
#######################################################
# Prepare dictionnary for result
results = dict()
#######################################################
# Extract from database
logger.info("Extract from %s to %s."%(args.dateStart, args.dateStop))
for attr in args.attributes:
logger.info("Extracting attribute %s..."%attr)
try:
datevalue = query_ADB_BetweenDates(attr, args.dateStart, args.dateStop, extractor)
# Add to result dictionnary
results[attr] = datevalue
except ValueError:
logger.warning("Failed to extract %s. Skipping..."%attr)
except tango.CommunicationFailed:
logger.warning("Failed to extract %s. Skipping..."%attr)
logger.error("The device %s might have crash.\n"+
"You should check with Jive and probably restart with Astor.\n")
# Save all at each step
np.save(args.fileout, results)
# -*- coding: utf-8 -*-
"""
Tools for Soleil Synchrotron
@author: broucquart
"""
import numpy as np
import logging
import datetime
import matplotlib.colors as mcol
import pickle
import matplotlib
logger=logging.getLogger(__name__)
###############################################################################
# VECTORIZED DATE FUNCTIONS
###############################################################################
ArrayTimeStampToDatetime = np.vectorize(datetime.datetime.fromtimestamp)
ArrayDatetimeToTimeStamp = np.vectorize(datetime.datetime.timestamp)
ArrayStrpToDateTime = np.vectorize(lambda x : datetime.datetime.strptime(x, "%Y/%m/%d %H:%M:%S.%f"))
###############################################################################
# DATA IMPORTATION
###############################################################################
##---------------------------------------------------------------------------##
def load_filer_trend(filename, delimiter='\t'):
"""
Load data from a file generated by atkfilertrend.
Delimiter must be comma ','.
Parameters
----------
filename : String
Path to the file to load.
Returns
-------
ddata : dict
Dictionary of data. Key is the attribute tango path, data is the numpy
array of data.
The special key "Time" hold the timestamps.
"""
# Load the file data
logger.info("Load file %s"%filename)
data = np.genfromtxt(filename, skip_header=1, skip_footer=1, delimiter=delimiter).transpose()
logger.debug("data shape : %s"%str(data.shape))
# Read the first line and parse attribute names
with open(filename, 'r') as fp:
head = fp.readline()
# Split head
logger.debug("read head : %s"%head)
head = head.split(delimiter)
logger.debug("parsed head : %s"%str(head))
# Create the dictionnary
# Convert microsecond to seconds
# Convert timestamps to datetime
ddata = {"Time":ArrayTimeStampToDatetime(data[0]/1000)}
# Attach data to key in dict.
for i in range(1, len(head)-1):
ddata[head[i]] = data[i]
return ddata
##---------------------------------------------------------------------------##
def load_mambo_file(filename):
"""
Load data from a file extracted from Mambo.
Parameters
----------
filename : string
Filepath.
Returns
-------
ddata : dict
Dictionary of data. Key is the attribute tango path, data is a tuple of
two numpy arrays. First array is datetime values, second is attribute
value.
"""
# Load the file data as string
logger.info("Load file %s"%filename)
data = np.genfromtxt(filename, delimiter='\t', skip_header=1, dtype=str).transpose()
logger.debug("data shape : %s"%str(data.shape))
# Read the first line and parse attribute names
with open(filename, 'r') as fp:
head = fp.readline()
# Split head, remove last char (newline)
logger.debug("read head : %s"%head)
head = head[:-1].split('\t')
logger.debug("parsed head : %s"%str(head))
# Convert string to datetime
tdata = ArrayStrpToDateTime(data[0])
ddata = dict()
# Find correct values for each dataset (ignore "*")
# Add to dictionnary, key is the attribute tango path, value is tuple of
# time array and value array
for n in range(1, len(data)):
good=np.where(data[n]!="*")[0]
ddata[head[n]] = (tdata[good], data[n][good].astype(np.float))
return ddata
###############################################################################
# SIGNAL PROCESSING
###############################################################################
##---------------------------------------------------------------------------##
def MM(datax, datay, N, DEC=1):
"""
Mobile Mean along x. Averaging window of N points.
Parameters
----------
datax : numpy.ndarray
X axis, will only be cut at edge to match the length of mean Y.
Set to "None" if no X-axis
datay : numpy.ndarray
Y axis, will be averaged.
N : int
Averaging window length in points.
Returns
-------
Tuple of numpy.ndarray
(X axis, Y axis) averaged data.
"""
if datax is None:
return (np.arange(N//2, len(datay)-N//2+1)[::DEC],
np.convolve(datay, np.ones(N)/N, mode='valid')[::DEC])
return (np.asarray(datax[N//2:-N//2+1])[::DEC],
np.convolve(datay, np.ones(N)/N, mode='valid')[::DEC])
##---------------------------------------------------------------------------##
def meanstdmaxmin(x, y, N):
"""
Compute mean, max, min and +- std over block of N points on the Y axis.
Return arrays on length len(x)//N points.
Parameters
----------
x : numpy.ndarray
X vector, i.e sampling times.
y : numpy.ndarray
Y vector, i.e. values.
N : int
Number on points to average.
Returns
-------
xmean : numpy.ndarray
New x vector.
ymean : numpy.ndarray
Means of Y.
ystd : numpy.ndarray
Std of Y.
ymax : numpy.ndarray
Maxes of Y.
ymin : numpy.ndarray
Mins of Y..
"""
# If x vector is datetime, convert to timestamps
if type(x[0]) is datetime.datetime:
xIsDatetime=True
x = ArrayDatetimeToTimeStamp(x)
else:
xIsDatetime=False
# Quick verification on the X data vector jitter.
period = np.mean(x[1:]-x[:-1])
jitter = np.std(x[1:]-x[:-1])
if jitter > 0.01*period:
logger.warning("On X data vector : sampling jitter is over 1%% of the period. (j=%.3g, p=%.3g)"%(jitter, period))
# Get number of block of N points
_L=len(y)//N
# Reshape the arrays.
# Drop last points that does not fill a block of N points.
_x=np.reshape(x[:_L*N], (_L, N))
_y=np.reshape(y[:_L*N], (_L, N))
# compute the new x vector.
# Use mean to compute new absciss position
xmean = np.mean(_x, axis=1)
if xIsDatetime:
xmean = ArrayTimeStampToDatetime(xmean)
# Compute parameters
ymean = np.mean(_y, axis=1)
ystd = np.std(_y, axis=1)
ymin = np.min(_y, axis=1)
ymax = np.max(_y, axis=1)
return (xmean, ymean, ystd, ymax, ymin)
###############################################################################
## PLOTTING
###############################################################################
##---------------------------------------------------------------------------##
def plot_meanstdmaxmin(ax, datax, datay, N,
c=None, label=None):
"""
Plot on a ax the representation in mean, +- std and min max.
Parameters
----------
ax : matplotlib.axes._base._AxesBase
Ax on wich to plot.
datax : numpy.ndarray
X axis.
datay : numpy.ndarray
Y axis.
N : int
Number on points to average.
c : TYPE, optional
Color. The default is None.
label : TYPE, optional
Label. The default is None.
Returns
-------
lines : TYPE
DESCRIPTION.
"""
# For the first plot, consider the whole data range.
# Compute the averaging ratio. Minimum ratio is 1
ratio = max(len(datax)//N, 1)
# Compute new data
xmean, ymean, ystd, ymax, ymin = meanstdmaxmin(datax, datay, ratio)
lines=[]
# First, plot the mean with the given attributes
lines.append(ax.plot(xmean, ymean, color=c, label=label)[0])
# Retrieve the color, usefull if c was None
c=lines[0].get_color()
# Add max, min and std area
lines.append(ax.plot(xmean, ymax, linestyle='-', color=mcol.to_rgba(c, 0.4))[0])
lines.append(ax.plot(xmean, ymin, linestyle='-', color=mcol.to_rgba(c, 0.4))[0])
lines.append(ax.fill_between(xmean, ymean-ystd, ymean+ystd, color=mcol.to_rgba(c, 0.4)))
return lines
##---------------------------------------------------------------------------##
def plot_MM(ax, datax, datay, N, DEC=1,
c=None, label=None):
"""
Plot a signal with its mobile mean. The signal is plotted with transparency.
Parameters
----------
ax : matplotlib.axes._base._AxesBase
Axe on which to plot.
datax : numpy.ndarray, None
X axis data.
datay : numpy.ndarray
Y axis data.
N : int
Averaging window length in points.
c : TYPE, optional
Line color. The default is None.
label : str, optional
Line label. The default is None.
Returns
-------
lines : TYPE
DESCRIPTION.
"""
# To collect lines
lines=[]
# Plot mobile mean
_l=ax.plot(*MM(datax, datay, N, DEC), c=c, label=label)[0]
lines.append(_l)
# Retrieve the color, usefull if c was None
c=lines[0].get_color()
# Plot entire signal
if datax is None:
# Case no xaxis data
_l=ax.plot(datay, c=mcol.to_rgba(c, 0.4))[0]
else:
_l=ax.plot(datax, datay, c=mcol.to_rgba(c, 0.4))[0]
return lines
###############################################################################
## PLOT MANIPULATION
###############################################################################
##---------------------------------------------------------------------------##
def get_current_ax_zoom(ax):
"""
Get the current ax zoom setup and print the python command to set it exactly.
Parameters
----------
ax : numpy.ndarray
Array of ax.
Raises
------
NotImplementedError
When the type is not implemented. It is time to implement it !
Returns
-------
None.
"""
if type(ax) is np.ndarray:
for i in range(len(ax)):
print("ax[%d].set_xlim"%i+str(ax[i].get_xlim()))
print("ax[%d].set_ylim"%i+str(ax[i].get_ylim()))
return
raise NotImplementedError("Type is %s"%type(ax))
###############################################################################
## DATE PROCESSING
###############################################################################
##---------------------------------------------------------------------------##
def get_time_region(t, startDate, endDate):
"""
Return a range of index selecting the ones between the start and stop date.
Parameters
----------
t : numpy.ndarray
An array of datetime objects.
startDate : datetime.datetime
Start date.
endDate : datetime.datetime
Stop date.
Returns
-------
zone : numpy.ndarray
A numpy arange between both index.
"""
iT1 = np.searchsorted(t, startDate)
iT2 = np.searchsorted(t, endDate)
zone = np.arange(iT1, iT2)
if len(zone)==0:
logging.warning("Time zone is empty.")
return zone
###############################################################################
# DATA EXPORTATION
###############################################################################
##---------------------------------------------------------------------------##
def export_mpl(fig, filename):
"""
Export figure to .mpl file.
Parameters
----------
fig : matplotlib.figure.Figure
Figure to export.
filename : str
Filename, without extension.
Returns
-------
None.
"""
if not type(fig) is matplotlib.figure.Figure:
raise TypeError("Parameter fig should be a matplotlib figure (type matplotlib.figure.Figure).")
with open(filename+".mpl", 'wb') as fp:
pickle.dump(fig, fp)
\ No newline at end of file
0% Loading or .
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment