Newer
Older
"""
Python module for extracting attribute from Arhive Extractor Device.
"""
import logging
import datetime
import numpy as np
import PyTango as tango
__version__ = "1.0.1"
##########################################################################
### Install logger for the module ###
##########################################################################
logger = logging.getLogger(__name__)
#logger.setLevel(getattr(logging, logger.upper()))
if not logger.hasHandlers():
# No handlers, create one
sh = logging.StreamHandler()
sh.setLevel(logger.level)
sh.setFormatter(logging.Formatter("%(levelname)s:%(message)s"))
logger.addHandler(sh)
##########################################################################
### Commodity private variables ###
##########################################################################
_DBDFMT = "%Y-%m-%d %H:%M:%S"
_DBDFMT2 = "%d-%m-%Y %H:%M:%S"
##########################################################################
### Commodity private functions ###
##########################################################################
# NOTE: it is faster than using pandas.to_datetime()
_ArrayTimeStampToDatetime = np.vectorize(datetime.datetime.fromtimestamp)
Check if the module is initialized.
Returns
-------
success : boolean
"""
global _extractors
if None in _extractors:
logger.error("Module {0} is not initialied. You should run {0}.init().".format(__name__))
##----------------------------------------------------------------------##
Convenient function to parse date or duration strings.
Exact date format is %Y-%m-%d-%H:%M:%S and it can be reduced to be less precise.
Duration format is 'Xu' where X is a number and u is a unit in ('m':minutes, 'h':hours, 'd':days, 'M':months)
Parameters
---------
datestr : string
Date as a string, format %Y-%m-%d-%H:%M:%S or less precise.
Duration as a string, format 'Xu' where X is a number and u is a unit in ('m':minutes, 'h':hours, 'd':days, 'M':months)
Exceptions
----------
ValueError
If the parsing failed.
Returns
-------
date : datetime.datetime or datetime.timedelta
Parsed date or duration
logger.debug("Parsing date string '%s'"%datestr)
# Determine date/duration by looking at the last char
if datestr[-1] in "mhdM":
# Duration
logger.debug("Assuming a duration")
q=float(datestr[:-1])
except ValueError as e:
logger.error("Failed to parse date string. Given the last character, a duration was assumed.")
raise Exception("Could not parse argument to a date") from e
# Convert all in minutes
minutes = q*{'m':1, 'h':60, 'd':60*24, 'm':30*60*24}[datestr[-1]]
return datetime.timedelta(minutes=minutes)
# Probably a date string
# This gives all format that will be tried, in order.
# Stop on first parse success. Raise error if none succeed.
fmt = [
"%Y-%m-%d-%H:%M:%S",
"%Y-%m-%d-%H:%M",
"%Y-%m-%d-%H",
"%Y-%m-%d",
"%Y-%m",
]
date = None
for f in fmt:
try:
date = datetime.datetime.strptime(datestr, f)
except ValueError:
continue
else:
break
else:
raise ValueError("Could not parse argument to a date")
return date
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
161
162
163
164
165
166
167
168
169
170
171
172
173
174
175
176
177
178
179
180
181
182
183
184
185
186
187
188
189
190
191
192
193
194
195
196
197
198
199
200
201
202
203
204
205
206
207
208
209
210
211
212
213
214
215
216
217
218
219
220
221
222
223
224
225
226
227
##----------------------------------------------------------------------##
def _check_attribute(attribute, db):
"""
Check that the attribute is in the database
Parameters
----------
attribute : String
Name of the attribute. Full Tango name i.e. "test/dg/panda/current".
db: str
Which database to look in, 'H' or 'T'.
"""
global _extractors
logger.debug("Check that %s is archived."%attribute)
if not _extractors[{'H':0, 'T':1}[db]].IsArchived(attribute):
logger.error("Attribute '%s' is not archived in DB %s"%(attribute, _extractors[{'H':0, 'T':1}[db]]))
raise ValueError("Attribute '%s' is not archived in DB %s"%(attribute, _extractors[{'H':0, 'T':1}[db]]))
##----------------------------------------------------------------------##
def _chunkerize(attribute, dateStart, dateStop, db, Nmax=100000):
"""
Parameters
----------
attribute : String
Name of the attribute. Full Tango name i.e. "test/dg/panda/current".
dateStart : datetime.datetime
Start date for extraction.
dateStop : datetime.datetime
Stop date for extraction.
db: str
Which database to look in, 'H' or 'T'.
Nmax: int
Max number of atoms in one chunk. Default 100000.
Returns
-------
cdates : list
List of datetime giving the limit of each chunks.
For N chunks, there is N+1 elements in cdates, as the start and end boundaries are included.
"""
info=infoattr(attribute, db=db)
logger.debug("Attribute information \n%s"%info)
# Get the number of points
N=_extractors[{'H':0, 'T':1}[db]].GetAttDataBetweenDatesCount([
attribute,
dateStart.strftime(_DBDFMT2),
dateStop.strftime(_DBDFMT2)
])
logger.debug("On the period, there is %d entries"%N)
dx=int(info["max_dim_x"])
if dx > 1:
logger.debug("Attribute is a vector with max dimension = %s"%dx)
N=N*dx
# If data chunk is too much, we need to cut it
if N > Nmax:
dt = (dateStop-dateStart)/(N//Nmax)
cdates = [dateStart]
while cdates[-1] < dateStop:
cdates.append(cdates[-1]+dt)
cdates[-1] = dateStop
logger.debug("Cutting access to %d little chunks of time, %s each."%(len(cdates)-1, dt))
else:
cdates=[dateStart, dateStop]
return cdates
##----------------------------------------------------------------------##
def _cmd_with_retry(dp, cmd, arg, retry=2):
"""
Run a command on tango.DeviceProxy, retrying on DevFailed.
Parameters
----------
dp: tango.DeviceProxy
Device proxy to try command onto.
cmd : str
Command to executte on the extractor
arg : list
Attribute to pass to the command
retry : int
Number of command retry on DevFailed
Returns
-------
cmdreturn :
Whatever the command returns.
None if failed after the amount of retries.
"""
logger.info("Perform Command {} {}".format(cmd, arg))
for i in range(retry):
# Make retrieval request
logger.debug("Execute %s (%s)"%(cmd, arg))
try:
cmdreturn = getattr(dp, cmd)(arg)
except tango.DevFailed as e:
logger.warning("The extractor device returned the following error:")
logger.warning(e)
if i == retry-1:
logger.error("Could not execute command %s (%s). Check the device extractor"%(cmd, arg))
return None
logger.warning("Retrying...")
continue
break
return cmdreturn
##########################################################################
### Module private variables ###
##########################################################################
# Tuple of extractor for HDB and TDB
_extractors = (None, None)
# Tuple for attribute tables
_AttrTables = (None, None)
##########################################################################
### Module initialisation functions ###
##########################################################################
def init(
HdbExtractorPath="archiving/hdbextractor/2",
TdbExtractorPath="archiving/tdbextractor/2",
):
"""
Initialize the module.
Instanciate tango.DeviceProxy for extractors (TDB and HDB)
Parameters:
-----------
HdbExtractorPath, TdbExtractorPath: string
Tango path to the extractors.
loglevel: string
loglevel to pass to logging.Logger
try:
logger.setLevel(getattr(logging, loglevel.upper()))
except AttributeError:
logger.error("Wrong log level specified: {}".format(loglevel.upper()))
logger.debug("Instanciating extractors device proxy...")
_extractors = (tango.DeviceProxy(HdbExtractorPath), tango.DeviceProxy(TdbExtractorPath))
logger.debug("{} and {} instanciated.".format(*_extractors))
logger.debug("Configuring extractors device proxy...")
for e in _extractors:
# set timeout to 3 sec
e.set_timeout_millis(3000)
logger.debug("Filling attributes lookup tables...")
_AttrTables = tuple(e.getattnameall() for e in _extractors)
logger.debug("HDB: {} TDB: {} attributes counted".format(len(_AttrTables[0]), len(_AttrTables[1])))
298
299
300
301
302
303
304
305
306
307
308
309
310
311
312
313
314
315
316
317
318
319
320
321
322
323
324
325
326
327
328
329
330
331
332
333
334
335
336
337
338
339
340
341
342
343
344
345
346
347
348
349
350
351
352
353
354
355
356
357
358
359
360
361
362
363
364
365
366
367
368
369
370
371
372
373
374
375
376
377
378
379
380
381
382
383
384
385
386
##########################################################################
### Module access functions ###
##########################################################################
def extract(
attr,
date1, date2=None,
method="nearest",
db='H',
):
"""
Access function to perform extraction between date1 and date2.
Can extract one or several attributes.
date1 and date2 can be both exact date, or one of two can be a time interval that will be taken relative to the other.
Parameters:
-----------
attr: string, list, dict
Attribute(s) to extract.
If string, extract the given attribute, returning a pandas.Series.
If list, extract attributes and return a list of pandas.Series.
If a dict, extract attributes and return a dict of pandas.Series with same keys.
date1, date2: string, datetime.datetime, datetime.timedelta, None
Exact date, or duration relative to date2.
If string, it will be parsed.
A start date can be given with string format '%Y-%m-%d-%H:%M:%S' or less precise (ie '2021-02', '2022-11-03' '2022-05-10-21:00'.i..).
A duration can be given with string format 'Xu' where X is a number and u is a unit in ('m':minutes, 'h':hours, 'd':days, 'M':months)
A datetime.datetime object or datetime.timedelta object will be used as is.
date2 can be None. In that case it is replaced by the current time.
method: str
Method of extraction
'nearest': Retrieve nearest value of date1, date2 is ignored.
'between': Retrive data between date1 and date2.
db: str
Which database to look in, 'H' or 'T'.
"""
## _-_-_-_-_-_-_-_-_-_-_-_-_-_-_-_-_-_-_-_-_-_-_-_-_-_-_-_-_-_-_-_-_-_-_-_-_
# Perform a few sanity checks
if not _check_initialized():
# Stop here, the function has produced a message if necessary
return
if not db in ("H", "T"):
raise ValueError("Attribute 'db' should be 'H' or 'T'")
allowedmethods=("nearest", "between", "minmaxmean")
if not method in allowedmethods:
raise ValueError("Attribute 'method' should be in {}".format(str(allowedmethods)))
## _-_-_-_-_-_-_-_-_-_-_-_-_-_-_-_-_-_-_-_-_-_-_-_-_-_-_-_-_-_-_-_-_-_-_-_-_
# Work with dates
if not type(date1) in (datetime.datetime, datetime.timedelta):
date1 = _dateparse(date1)
if date2 is None:
date2 = datetime.datetime.now()
else:
if not type(date2) in (datetime.datetime, datetime.timedelta):
date2 = _dateparse(date2)
if not datetime.datetime in (type(date1), type(date2)):
logger.error("One of date1 date2 should be an exact date.\nGot {} {}".format(date1, date2))
raise ValueError("date1 and date2 not valid")
# Use timedelta relative to the other date. date1 is always before date2
if type(date1) is datetime.timedelta:
date1 = date2-date1
if type(date2) is datetime.timedelta:
date2 = date1+date2
if date1 > date2:
logger.error("date1 must precede date2.\nGot {} {}".format(date1, date2))
raise ValueError("date1 and date2 not valid")
## _-_-_-_-_-_-_-_-_-_-_-_-_-_-_-_-_-_-_-_-_-_-_-_-_-_-_-_-_-_-_-_-_-_-_-_-_
# Perform extraction and return
if type(attr) is dict:
d=dict()
for k,v in attr.items():
try:
d.update({k:_extract_attribute(v, method, date1, date2, db)})
except Exception as e:
logger.debug("Exception in _extract_attribute(): "+str(e))
logger.debug(traceback.print_tb(e.__traceback__))
logger.error("Could not extract {}.".format(v))
return d
if type(attr) in (list,tuple):
d=[]
for v in attr:
try:
d.append(_extract_attribute(v, method, date1, date2, db))
except Exception as e:
logger.debug("Exception in _extract_attribute(): "+str(e))
logger.debug(traceback.print_tb(e.__traceback__))
logger.error("Could not extract {}.".format(v))
return d
try:
d=_extract_attribute(attr, method, date1, date2, db)
except Exception as e:
logger.debug("Exception in _extract_attribute(): "+str(e))
logger.debug(traceback.print_tb(e.__traceback__))
logger.error("Could not extract {}.".format(attr))
return None
return d
416
417
418
419
420
421
422
423
424
425
426
427
428
429
430
431
432
433
434
435
436
437
438
439
440
441
442
443
444
445
446
447
448
449
450
451
452
453
454
455
456
457
458
459
460
461
462
463
464
465
466
467
468
469
470
471
472
473
474
475
476
477
478
479
480
481
482
483
484
485
486
487
488
489
##----------------------------------------------------------------------##
def findattr(pattern, db="H"):
"""
Search for an attribute path using the pattern given.
Case insensitive.
Parameters:
-----------
pattern: str
Pattern to search, wildchar * accepted.
example "dg*dcct*current"
db: str
Which database to look in, 'H' or 'T'.
Returns:
--------
results: (str,)
List of string match
"""
if not _check_initialized():
return
if not db in ("H", "T"):
raise AttributeError("Attribute db should be 'H' or 'T'")
global _AttrTables
keywords=pattern.lower().split('*')
# Select DB
attr_table = _AttrTables[{'H':0, 'T':1}[db]]
matches = [attr for attr in attr_table if all(k in attr.lower() for k in keywords)]
return matches
##----------------------------------------------------------------------##
def infoattr(attribute, db='H'):
"""
Get informations for an attribute and pack it into a python dict.
Parameters
----------
attribute : String
Name of the attribute. Full Tango name i.e. "test/dg/panda/current".
db: str
Which database to look in, 'H' or 'T'.
Returns
-------
info : dict
Dictionnary of propertyname:propertyvalue
"""
if not _check_initialized():
return
if not db in ("H", "T"):
raise AttributeError("Attribute db should be 'H' or 'T'")
info = dict()
for func in ("GetAttDefinitionData", "GetAttPropertiesData"):
R=getattr(_extractors[{'H':0, 'T':1}[db]], func)(attribute)
if not R is None:
for i in R:
_s=i.split("::")
info[_s[0]]=_s[1]
else:
logger.warning("Function %s on extractor returned None"%func)
return info
##########################################################################
### Module core functions ###
##########################################################################
def _extract_attribute(attribute, method, date1, date2, db):
"""
Check if exists, check scalar or spectrum and dispatch
# Uncapitalize attribute
attribute = attribute.lower()
# Get info about the attribute
info=infoattr(attribute, db=db)
logger.debug("Attribute information \n%s"%info)
# Detect spectrum
attrtype="scalar"
if int(info["max_dim_x"]) > 1:
if int(info["max_dim_y"]) > 0:
logger.warning("Attribute %s is a (%s; %s) vector. This is poorly handled by this module."%(
attribute, info["max_dim_x"], info["max_dim_y"]))
attrtype="multi"
else:
logger.info("Attribute %s is a 1D vector, dimension = %s."%(
attribute, info["max_dim_x"]))
attrtype="vector"
# =============
# For now we handle multi dimension the same way as scalar, which will get only the first element
if (attrtype=="scalar") or (attrtype=="multi"):
return _extract_scalar(attribute, method, date1, date2, db)
if attrtype=="vector":
return _extract_vector(attribute, method, date1, date2, db)
##---------------------------------------------------------------------------##
def _extract_scalar(attribute, method, date1, date2, db):
# =====================
if method == "nearest":
cmdreturn = _cmd_with_retry(_extractors[{'H':0, 'T':1}[db]], "GetNearestValue", [
attribute,
date1.strftime(_DBDFMT),
])
except TypeError:
logger.error("Could not extract this chunk. Check the device extractor")
return None
# Fabricate return pandas.Series
d=pd.Series(index=[datetime.datetime.fromtimestamp(int(_date)/1000),], data=[float(_value),], name=attribute)
return d
# =====================
if method == "between":
# Cut the time horizon in chunks
cdates = _chunkerize(attribute, date1, date2, db)
# Array to hold data
data = []
# For each date chunk
for i_d in range(len(cdates)-1):
cmdreturn = _cmd_with_retry(_extractors[{'H':0, 'T':1}[db]], "ExtractBetweenDates", [
attribute,
cdates[i_d].strftime(_DBDFMT),
cdates[i_d+1].strftime(_DBDFMT)
])
# Unpack return
try:
_date, _value = cmdreturn
except TypeError:
logger.error("Could not extract this chunk. Check the device extractor")
return None
# Transform to datetime - value arrays
_value = np.asarray(_value, dtype=float)
if len(_date) > 0:
_date = _ArrayTimeStampToDatetime(_date/1000.0)
data.append(pd.Series(index=_date, data=_value,name=attribute))
# Concatenate chunks
return pd.concat(data)
# ========================
if method == "minmaxmean":
# If we are here, the method is not implemented
logger.error("Method {} is not implemented for scalars.".format(method))
raise NotImplemented
##---------------------------------------------------------------------------##
def _extract_vector(attribute, method, date1, date2, db):
# Get info about the attribute
info=infoattr(attribute, db=db)
598
599
600
601
602
603
604
605
606
607
608
609
610
611
612
613
614
615
616
617
618
619
620
621
622
623
624
625
626
627
628
629
630
631
632
633
634
635
636
637
638
639
640
641
642
643
644
645
646
647
648
649
650
651
652
653
654
655
# =====================
if method == "nearest":
# Get nearest does not work with vector.
# Make a between date with surounding dates.
# Dynamically find surounding
cnt=0
dt=datetime.timedelta(seconds=10)
while cnt<1:
logger.debug("Seeking points in {} to {}".format(date1-dt,date1+dt))
cnt=_extractors[{'H':0, 'T':1}[db]].GetAttDataBetweenDatesCount([
attribute,
(date1-dt).strftime(_DBDFMT2),
(date1+dt).strftime(_DBDFMT2)
])
dt=dt*1.5
logger.debug("Found {} points in a +- {} interval".format(cnt,str(dt/1.5)))
# For vector, we have to use the GetAttxxx commands
cmdreturn = _cmd_with_retry(_extractors[{'H':0, 'T':1}[db]], "GetAttDataBetweenDates", [
attribute,
(date1-dt).strftime(_DBDFMT),
(date1+dt).strftime(_DBDFMT),
])
# Unpack return
try:
[N,], [name,] = cmdreturn
N=int(N)
except TypeError:
logger.error("Could not extract this attribute. Check the device extractor")
return None
# Read the history
logger.debug("Retrieve history of %d values. Dynamic attribute named %s."%(N, name))
attrHist = _extractors[{'H':0, 'T':1}[db]].attribute_history(name, N)
# Transform to datetime - value arrays
_value = np.empty((N, int(info["max_dim_x"])), dtype=float)
_value[:] = np.nan
_date = np.empty(N, dtype=object)
for i_h in range(N):
_value[i_h,:attrHist[i_h].dim_x]=attrHist[i_h].value
_date[i_h]=attrHist[i_h].time.todatetime()
# Seeking nearest entry
idx=np.argmin(abs(_date-date1))
logger.debug("Found nearest value at index {}: {}".format(idx, _date[idx]))
# Fabricate return pandas.Series
d=pd.Series(index=[_date[idx],], data=[_value[idx],], name=attribute)
return d
# If we are here, the method is not implemented
logger.error("Method {} is not implemented for vectors.".format(method))
raise NotImplemented
##---------------------------------------------------------------------------##
def ExtrBetweenDates_MinMaxMean(
attribute,
dateStart,
timeInterval=datetime.timedelta(seconds=60),
db='H',
):
"""
Query attribute data from an archiver database, get all points between dates.
Use ExtractBetweenDates.
Parameters
----------
attribute : String
Name of the attribute. Full Tango name i.e. "test/dg/panda/current".
dateStart : datetime.datetime, string
Start date for extraction. If string, it will be parsed.
Example of string format %Y-%m-%d-%H:%M:%S or less precise.
dateStop : datetime.datetime, string
Stop date for extraction. If string, it will be parsed.
Example of string format %Y-%m-%d-%H:%M:%S or less precise.
Default is now (datetime.datetime.now())
timeInterval: datetime.timedelta, string
Time interval used to perform min,max and mean.
Can be a string with a number and a unit in "d", "h", "m" or "s"
Exceptions
----------
ValueError
The attribute is not found in the database.
Returns
-------
[mdates, value_min, value_max, value_mean] : array
mdates : numpy.ndarray of datetime.datime objects
Dates of the values, middle of timeInterval windows
value_min : numpy.ndarray
Minimum of the value on the interval
value_max : numpy.ndarray
Maximum of the value on the interval
value_mean : numpy.ndarray
Mean of the value on the interval
if not db in ("H", "T"):
raise AttributeError("Attribute db should be 'H' or 'T'")
# Uncapitalize attribute
attribute = attribute.lower()
# Check attribute is in database
_check_attribute(attribute, db=db)
# Parse dates
dateStart = _dateparse(dateStart)
dateStop = _dateparse(dateStop)
# Parse timeInterval if string
if type(timeInterval) is str:
try:
mul = {'s':1, 'm':60, 'h':60*60, 'd':60*60*24}[timeInterval[-1]]
except KeyError:
logger.error("timeInterval could not be parsed")
raise ValueError("timeInterval could not be parsed")
timeInterval= datetime.timedelta(seconds=int(timeInterval[:-1])*mul)
info=infoattr(attribute)
logger.debug("Attribute information \n%s"%info)
# Detect spectrum
attrtype="scalar"
if int(info["max_dim_x"]) > 1:
logger.error("Attribute is not a scalar. Cannot perform this kind of operation.")
return None
# Cut data range in time chunks
cdates = [dateStart]
while cdates[-1] < dateStop:
cdates.append(cdates[-1]+timeInterval)
cdates[-1] = dateStop
mdates = np.asarray(cdates[:-1])+timeInterval/2
logger.debug("Cutting time range to %d chunks of time, %s each."%(len(cdates)-1, timeInterval))
# Prepare arrays
value_min = np.empty(len(cdates)-1)
value_max = np.empty(len(cdates)-1)
value_mean = np.empty(len(cdates)-1)
# For each time chunk
for i_d in range(len(cdates)-1):
for func, arr in zip(
["Max", "Min", "Avg"],
[value_max, value_min, value_mean],
):
# Make requests
logger.debug("Perform GetAttData%sBetweenDates (%s, %s, %s)"%(
func,
attribute,
cdates[i_d].strftime(_DBDFMT2),
cdates[i_d+1].strftime(_DBDFMT2))
)
_val =getattr(_extractors[{'H':0, 'T':1}[db]], "GetAttData%sBetweenDates"%func)([
attribute,
cdates[i_d].strftime(_DBDFMT2),
cdates[i_d+1].strftime(_DBDFMT2)
])
logger.debug("Extraction done for %s."%attribute)
return pd.DataFrame(
index=mdates,
data={
"Min":value_min,
"Mean":value_mean,
"Max":value_max,
},)
## ## ## ## ## ## ## ## ## ## ## ## ## ## ## ## ## ## ## ## ## ## ## ##
## Initialize on import
## ## ## ## ## ## ## ## ## ## ## ## ## ## ## ## ## ## ## ## ## ## ## ##
init()