Newer
Older
"""
Python module for extracting attribute from Arhive Extractor Device.
"""
import logging
import datetime
import numpy as np
import PyTango as tango
__version__ = "1.0.1"
##########################################################################
### Install logger for the module ###
##########################################################################
logger = logging.getLogger(__name__)
#logger.setLevel(getattr(logging, logger.upper()))
if not logger.hasHandlers():
# No handlers, create one
sh = logging.StreamHandler()
sh.setLevel(logger.level)
sh.setFormatter(logging.Formatter("%(levelname)s:%(message)s"))
logger.addHandler(sh)
##########################################################################
### Commodity private variables ###
##########################################################################
_DBDFMT = "%Y-%m-%d %H:%M:%S"
_DBDFMT2 = "%d-%m-%Y %H:%M:%S"
##########################################################################
### Commodity private functions ###
##########################################################################
_ArrayTimeStampToDatetime = np.vectorize(datetime.datetime.fromtimestamp)
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
def _check_initialized():
"""
Check if the module is initialized, raise exception RuntimeError if not.
"""
global _extractors
if None in _extractors:
logger.error("Module {0} is not initialied. You should run {0}.init().".format(__name__))
raise RuntimeError("Module not initialized")
def _dateparse(datestr):
"""
Convenient function to parse date strings.
Global format is %Y-%m-%d-%H:%M:%S and it can be reduced to be less precise.
Parameters
---------
datestr : string
Date as a string, format %Y-%m-%d-%H:%M:%S or less precise.
Exceptions
----------
ValueError
If the parsing failed.
Returns
-------
date : datetime.datetime
Parsed date
"""
# This gives all format that will be tried, in order.
# Stop on first parse success. Raise error if none succeed.
fmt = [
"%Y-%m-%d-%H:%M:%S",
"%Y-%m-%d-%H:%M",
"%Y-%m-%d-%H",
"%Y-%m-%d",
"%Y-%m",
]
date = None
for f in fmt:
try:
date = datetime.datetime.strptime(datestr, f)
except ValueError:
continue
else:
break
else:
raise ValueError("Could not parse argument to a date")
return date
##########################################################################
### Module private variables ###
##########################################################################
# Tuple of extractor for HDB and TDB
_extractors = (None, None)
# Tuple for attribute tables
_AttrTables = (None, None)
##########################################################################
### Module initialisation functions ###
##########################################################################
def init(
HdbExtractorPath="archiving/hdbextractor/2",
TdbExtractorPath="archiving/tdbextractor/2",
):
"""
Initialize the module.
Instanciate tango.DeviceProxy for extractors (TDB and HDB)
HdbExtractorPath, TdbExtractorPath: string
Tango path to the extractors.
"""
global _extractors
logger.debug("Instanciating extractors device proxy...")
_extractors = (tango.DeviceProxy(HdbExtractorPath), tango.DeviceProxy(TdbExtractorPath))
logger.debug("{} and {} instanciated.".format(*_extractors))
logger.debug("Configuring extractors device proxy...")
for e in _extractors:
# set timeout to 3 sec
e.set_timeout_millis(3000)
logger.debug("Filling attributes lookup tables...")
_AttrTables = tuple(e.getattnameall() for e in _extractors)
logger.debug("HDB: {} TDB: {} attributes counted".format(len(_AttrTables[0]), len(_AttrTables[1])))
##########################################################################
def __init__(
self,
extractorKind='H', extractorNumber=2,
extractorPath=None,
):
"""
Constructor function
Parameters
----------
extractorKind: char
Either 'H' or 'T' for HDB or TDB.
extractorNumber: int
Number of the archive extractor instance to use.
extractorPath: string
Tango path to the extractor.
If this argument is given, it takes precedence over extractorKind and extractorNumber.
logger: logging.Logger, str
Logger object to use.
If string, can be a log level. A basic logger with stream handler will be instanciated.
Default to 'info'.
Return
------
ArchiveExtractor
"""
#######################################################
if type(logger) == logging.Logger:
self.logger = logger
else:
self.logger = logging.getLogger(__name__)
self.logger.setLevel(getattr(logging, logger.upper()))
if not self.logger.hasHandlers():
# No handlers, create one
sh = logging.StreamHandler()
sh.setLevel(self.logger.level)
sh.setFormatter(logging.Formatter("%(levelname)s:%(message)s"))
self.logger.addHandler(sh)
#######################################################
# Select Extractor
if extractorPath is None:
"archiving/%sDBExtractor/%d"%(extractorKind, extractorNumber)
self.extractor = tango.DeviceProxy(extractorPath)
self.extractor.set_timeout_millis(3000)
self.logger.debug("Archive Extractor %s used."%self.extractor.name())
# Get the attribute table
self.attr_table = self.extractor.getattnameall()
201
202
203
204
205
206
207
208
209
210
211
212
213
214
215
216
217
218
219
220
221
222
223
224
225
226
227
228
229
230
231
232
233
234
235
236
237
238
239
240
241
242
243
##---------------------------------------------------------------------------##
@staticmethod
def dateparse(datestr):
"""
Convenient function to parse date strings.
Global format is %Y-%m-%d-%H:%M:%S and it can be reduced to be less precise.
Parameters
---------
datestr : string
Date as a string, format %Y-%m-%d-%H:%M:%S or less precise.
Exceptions
----------
ValueError
If the parsing failed.
Returns
-------
date : datetime.datetime
Parsed date
"""
# This gives all format that will be tried, in order.
# Stop on first parse success. Raise error if none succeed.
fmt = [
"%Y-%m-%d-%H:%M:%S",
"%Y-%m-%d-%H:%M",
"%Y-%m-%d-%H",
"%Y-%m-%d",
"%Y-%m",
]
date = None
for f in fmt:
try:
date = datetime.datetime.strptime(datestr, f)
except ValueError:
continue
else:
break
else:
raise ValueError("Could not parse argument to a date")
##---------------------------------------------------------------------------##
):
"""
Query attribute data from an archiver database, get all points between dates.
Use ExtractBetweenDates.
Parameters
----------
attribute : String
Name of the attribute. Full Tango name i.e. "test/dg/panda/current".
dateStart : datetime.datetime, string
Start date for extraction. If string, it will be parsed.
Example of string format %Y-%m-%d-%H:%M:%S or less precise.
dateStop : datetime.datetime, string, None
Stop date for extraction.
If string, it will be parsed.
Example of string format %Y-%m-%d-%H:%M:%S or less precise.
If None, it takes the current date and time.
Default is None (now).
Exceptions
----------
ValueError
The attribute is not found in the database.
Returns
-------
[date, value] : array
date : numpy.ndarray of datetime.datime objects
Dates of the values
value : numpy.ndarray
Archived values
"""
# Parse date if it is string
if type(dateStart) is str:
dateStart = self.dateparse(dateStart)
if dateStop is None:
dateStop = datetime.datetime.now()
if type(dateStop) is str:
dateStop = self.dateparse(dateStop)
# Uncapitalize attribute
attribute = attribute.lower()
# Check attribute is in database
self._check_attribute(attribute)
# Get info about the attribute
info=self.infoattr(attribute)
self.logger.debug("Attribute information \n%s"%info)
# Detect spectrum
attrtype="scalar"
if int(info["max_dim_x"]) > 1:
if int(info["max_dim_y"]) > 0:
self.logger.warning("Attribute %s is a (%s; %s) vector. This is poorly handled by this script."%(
attribute, info["max_dim_x"], info["max_dim_y"]))
attrtype="multi"
else:
self.logger.info("Attribute %s is a 1D vector, dimension = %s."%(
attribute, info["max_dim_x"]))
attrtype="vector"
# Cut the time horizon in chunks
cdates = self.chunkerize(attribute, dateStart, dateStop)
# Arrays to hold every chunks
value = []
date = []
# For each date chunk
for i_d in range(len(cdates)-1):
# =============
# For now we handle multi dimension the same way as scalar, which will get only the first element
if (attrtype=="scalar") or (attrtype=="multi"):
# Inform on retrieval request
self.logger.info("Perform ExtractBetweenDates (%s, %s, %s)"%(
attribute,
cdates[i_d].strftime(_DBDFMT),
cdates[i_d+1].strftime(_DBDFMT))
)
cmdreturn = self._cmd_with_retry("ExtractBetweenDates", [
attribute,
cdates[i_d].strftime(_DBDFMT),
cdates[i_d+1].strftime(_DBDFMT)
])
# Check command return
if cmdreturn is None:
logger.error("Could not extract this chunk. Check the device extractor")
return None
# Unpack return
_date, _value = cmdreturn
# Transform to datetime - value arrays
# NOTE: it is faster than using pandas.to_datetime()
_value = np.asarray(_value, dtype=float)
if len(_date) > 0:
_date = _ArrayTimeStampToDatetime(_date/1000.0)
value.append(_value)
date.append(_date)
# =============
if attrtype=="vector":
self.logger.info("Perform GetAttDataBetweenDates (%s, %s, %s)"%(
attribute,
cdates[i_d].strftime(_DBDFMT),
cdates[i_d+1].strftime(_DBDFMT)
))
[N,], [name,] = self.extractor.GetAttDataBetweenDates([
attribute,
cdates[i_d].strftime(_DBDFMT),
cdates[i_d+1].strftime(_DBDFMT)
# Read the history
self.logger.debug("Retrieve history of %d values. Dynamic attribute named %s."%(N, name))
attrHist = self.extractor.attribute_history(name, N)
# Transform to datetime - value arrays
_value = np.empty((N, int(info["max_dim_x"])), dtype=float)
_value[:] = np.nan
_date = np.empty(N, dtype=object)
for i_h in range(N):
_value[i_h,:attrHist[i_h].dim_x]=attrHist[i_h].value
_date[i_h]=attrHist[i_h].time.todatetime()
# Remove dynamic attribute
self.logger.debug("Remove dynamic attribute %s."%name)
self.extractor.RemoveDynamicAttribute(name)
value.append(_value)
date.append(_date)
self.logger.debug("Concatenate chunks")
value = np.concatenate(value)
date = np.concatenate(date)
self.logger.debug("Extraction done for %s."%attribute)
if attrtype=="vector":
return pd.DataFrame(index=date, data=value).dropna(axis=1, how='all')
else:
return pd.Series(index=date, data=value)
##---------------------------------------------------------------------------##
def betweenDates_MinMaxMean(
self,
attribute,
dateStart,
dateStop=datetime.datetime.now(),
Query attribute data from an archiver database, get all points between dates.
Use ExtractBetweenDates.
Parameters
----------
attribute : String
Name of the attribute. Full Tango name i.e. "test/dg/panda/current".
dateStart : datetime.datetime, string
Start date for extraction. If string, it will be parsed.
Example of string format %Y-%m-%d-%H:%M:%S or less precise.
dateStop : datetime.datetime, string
Stop date for extraction. If string, it will be parsed.
Example of string format %Y-%m-%d-%H:%M:%S or less precise.
Default is now (datetime.datetime.now())
timeInterval: datetime.timedelta, string
Time interval used to perform min,max and mean.
Can be a string with a number and a unit in "d", "h", "m" or "s"
Exceptions
----------
ValueError
The attribute is not found in the database.
Returns
-------
[mdates, value_min, value_max, value_mean] : array
mdates : numpy.ndarray of datetime.datime objects
Dates of the values, middle of timeInterval windows
value_min : numpy.ndarray
Minimum of the value on the interval
value_max : numpy.ndarray
Maximum of the value on the interval
value_mean : numpy.ndarray
Mean of the value on the interval
# Parse date if it is string
if type(dateStart) is str:
dateStart = self.dateparse(dateStart)
if type(dateStop) is str:
dateStop = self.dateparse(dateStop)
# Parse timeInterval if string
if type(timeInterval) is str:
try:
mul = {'s':1, 'm':60, 'h':60*60, 'd':60*60*24}[timeInterval[-1]]
except KeyError:
self.logger.error("timeInterval could not be parsed")
raise ValueError("timeInterval could not be parsed")
timeInterval= datetime.timedelta(seconds=int(timeInterval[:-1])*mul)
# Check that the attribute is in the database
self.logger.debug("Check that %s is archived."%attribute)
if not self.extractor.IsArchived(attribute):
self.logger.error("Attribute '%s' is not archived in DB %s"%(attribute, extractor))
raise ValueError("Attribute '%s' is not archived in DB %s"%(attribute, extractor))
# Get info about the attribute
info=self.infoattr(attribute)
self.logger.debug("Attribute information \n%s"%info)
# Detect spectrum
attrtype="scalar"
if int(info["max_dim_x"]) > 1:
self.logger.error("Attribute is not a scalar. Cannot perform this kind of operation.")
return None
# Cut data range in time chunks
cdates = [dateStart]
while cdates[-1] < dateStop:
cdates.append(cdates[-1]+timeInterval)
mdates = np.asarray(cdates[:-1])+timeInterval/2
self.logger.debug("Cutting time range to %d chunks of time, %s each."%(len(cdates)-1, timeInterval))
value_min = np.empty(len(cdates)-1)
value_max = np.empty(len(cdates)-1)
value_mean = np.empty(len(cdates)-1)
# For each time chunk
for i_d in range(len(cdates)-1):
for func, arr in zip(
["Max", "Min", "Avg"],
[value_max, value_min, value_mean],
):
# Make requests
self.logger.debug("Perform GetAttData%sBetweenDates (%s, %s, %s)"%(
func,
attribute,
cdates[i_d].strftime(_DBDFMT2),
cdates[i_d+1].strftime(_DBDFMT2))
_val =getattr(self.extractor, "GetAttData%sBetweenDates"%func)([
attribute,
cdates[i_d].strftime(_DBDFMT2),
cdates[i_d+1].strftime(_DBDFMT2)
self.logger.debug("Extraction done for %s."%attribute)
return [mdates, value_min, value_max, value_mean]
def _check_attribute(self, attribute):
"""
Check that the attribute is in the database
Parameters
----------
attribute : String
Name of the attribute. Full Tango name i.e. "test/dg/panda/current".
"""
self.logger.debug("Check that %s is archived."%attribute)
if not self.extractor.IsArchived(attribute):
self.logger.error("Attribute '%s' is not archived in DB %s"%(attribute, self.extractor))
raise ValueError("Attribute '%s' is not archived in DB %s"%(attribute, self.extractor))
def _cmd_with_retry(self, cmd, arg, retry=2):
"""
Run a command on extractor tango proxy, retrying on DevFailed.
Parameters
----------
cmd : str
Command to executte on the extractor
arg : list
Attribute to pass to the command
retry : int
Number of command retry on DevFailed
Returns
-------
cmdreturn :
Whatever the command returns.
None if failed after the amount of retries.
"""
# Make retrieval request
self.logger.debug("Execute %s (%s)"%(cmd, arg))
try:
cmdreturn = getattr(self.extractor, cmd)(arg)
except tango.DevFailed as e:
self.logger.warning("The extractor device returned the following error:")
self.logger.warning(e)
if i == retry-1:
logger.error("Could not execute command %s (%s). Check the device extractor"%(cmd, arg))
return None
574
575
576
577
578
579
580
581
582
583
584
585
586
587
588
589
590
591
592
593
594
595
596
597
598
599
self.logger.warning("Retrying...")
continue
break
return cmdreturn
def chunkerize(self, attribute, dateStart, dateStop, Nmax=100000):
"""
Parameters
----------
attribute : String
Name of the attribute. Full Tango name i.e. "test/dg/panda/current".
dateStart : datetime.datetime
Start date for extraction.
dateStop : datetime.datetime
Stop date for extraction.
Returns
-------
cdates : list
List of datetime giving the limit of each chunks.
For N chunks, there is N+1 elements in cdates, as the start and end boundaries are included.
"""
info=self.infoattr(attribute)
self.logger.debug("Attribute information \n%s"%info)
# Get the number of points
N=self.extractor.GetAttDataBetweenDatesCount([
attribute,
dateStart.strftime(_DBDFMT2),
dateStop.strftime(_DBDFMT2)
])
self.logger.debug("On the period, there is %d entries"%N)
dx=int(info["max_dim_x"])
if dx > 1:
self.logger.debug("Attribute is a vector with max dimension = %s"%dx)
N=N*dx
# If data chunk is too much, we need to cut it
if N > Nmax:
dt = (dateStop-dateStart)/(N//Nmax)
cdates = [dateStart]
while cdates[-1] < dateStop:
cdates.append(cdates[-1]+dt)
cdates[-1] = dateStop
self.logger.debug("Cutting access to %d little chunks of time, %s each."%(len(cdates)-1, dt))
else:
cdates=[dateStart, dateStop]
def findattr(self, pattern):
"""
Search for an attribute path using the pattern given.
Case insensitive.
Parameters:
-----------
pattern: str
Pattern to search, wildchar * accepted.
example "dg*dcct*current"
Returns:
--------
results: (str,)
List of string match
"""
keywords=pattern.lower().split('*')
matches = [attr for attr in self.attr_table if all(k in attr.lower() for k in keywords)]
return matches
652
653
654
655
656
657
658
659
660
661
662
663
664
665
666
667
668
669
670
671
672
673
674
675
676
677
678
679
def infoattr(self, attribute):
"""
Get informations for an attribute and pack it into a python dict.
Parameters
----------
attribute : String
Name of the attribute. Full Tango name i.e. "test/dg/panda/current".
Returns
-------
info : dict
Dictionnary of propertyname:propertyvalue
"""
info = dict()
for func in ("GetAttDefinitionData", "GetAttPropertiesData"):
R=getattr(self.extractor, func)(attribute)
if not R is None:
for i in R:
_s=i.split("::")
info[_s[0]]=_s[1]
else:
self.logger.warning("Function %s on extractor returned None"%func)
return info