Skip to content
Snippets Groups Projects
Commit e7d2b3ae authored by BRONES Romain's avatar BRONES Romain
Browse files

Merge fixes and feat

* Attribute type casting improved
* Unique entry function "extract"
* Multiexport
* Timedelta
parents dbdea2bf d0fd0633
No related branches found
No related tags found
No related merge requests found
""" """
Python module for extracting attribute from Arhive Extractor Device. Python module for extracting attribute from Archive Extractor Device.
""" """
import logging import logging
import datetime import datetime
import numpy as np import numpy as np
import PyTango as tango import PyTango as tango
import pandas as pd import pandas as pd
import traceback
__version__ = "1.0.1" __version__ = "1.0.1"
...@@ -38,8 +39,16 @@ _DBDFMT2 = "%d-%m-%Y %H:%M:%S" ...@@ -38,8 +39,16 @@ _DBDFMT2 = "%d-%m-%Y %H:%M:%S"
########################################################################## ##########################################################################
# Vectorized fromtimestamp function # Vectorized fromtimestamp function
# NOTE: it is faster than using pandas.to_datetime()
_ArrayTimeStampToDatetime = np.vectorize(datetime.datetime.fromtimestamp) _ArrayTimeStampToDatetime = np.vectorize(datetime.datetime.fromtimestamp)
# Vectorized bool map dictionnary
_ArrayStr2Bool = np.vectorize({
"true":True, 't':True,
"false":False, 'f':False,
}.get)
def _check_initialized(): def _check_initialized():
""" """
Check if the module is initialized. Check if the module is initialized.
...@@ -57,14 +66,16 @@ def _check_initialized(): ...@@ -57,14 +66,16 @@ def _check_initialized():
##----------------------------------------------------------------------## ##----------------------------------------------------------------------##
def _dateparse(datestr): def _dateparse(datestr):
""" """
Convenient function to parse date strings. Convenient function to parse date or duration strings.
Global format is %Y-%m-%d-%H:%M:%S and it can be reduced to be less precise. Exact date format is %Y-%m-%d-%H:%M:%S and it can be reduced to be less precise.
Duration format is 'Xu' where X is a number and u is a unit in ('m':minutes, 'h':hours, 'd':days, 'M':months)
If datstr is None, take the actual date and time. If datstr is None, take the actual date and time.
Parameters Parameters
--------- ---------
datestr : string datestr : string
Date as a string, format %Y-%m-%d-%H:%M:%S or less precise. Date as a string, format %Y-%m-%d-%H:%M:%S or less precise.
Duration as a string, format 'Xu' where X is a number and u is a unit in ('m':minutes, 'h':hours, 'd':days, 'M':months)
Exceptions Exceptions
---------- ----------
...@@ -73,12 +84,29 @@ def _dateparse(datestr): ...@@ -73,12 +84,29 @@ def _dateparse(datestr):
Returns Returns
------- -------
date : datetime.datetime date : datetime.datetime or datetime.timedelta
Parsed date Parsed date or duration
""" """
logger.debug("Parsing date string '%s'"%datestr)
# Determine date/duration by looking at the last char
if datestr[-1] in "mhdM":
# Duration
logger.debug("Assuming a duration")
try:
q=float(datestr[:-1])
except ValueError as e:
logger.error("Failed to parse date string. Given the last character, a duration was assumed.")
raise Exception("Could not parse argument to a date") from e
if datestr is None: # Convert all in minutes
return datetime.datetime.now() minutes = q*{'m':1, 'h':60, 'd':60*24, 'm':30*60*24}[datestr[-1]]
return datetime.timedelta(minutes=minutes)
else:
# Probably a date string
# This gives all format that will be tried, in order. # This gives all format that will be tried, in order.
# Stop on first parse success. Raise error if none succeed. # Stop on first parse success. Raise error if none succeed.
...@@ -204,6 +232,7 @@ def _cmd_with_retry(dp, cmd, arg, retry=2): ...@@ -204,6 +232,7 @@ def _cmd_with_retry(dp, cmd, arg, retry=2):
Whatever the command returns. Whatever the command returns.
None if failed after the amount of retries. None if failed after the amount of retries.
""" """
logger.info("Perform Command {} {}".format(cmd, arg))
for i in range(retry): for i in range(retry):
# Make retrieval request # Make retrieval request
...@@ -221,6 +250,48 @@ def _cmd_with_retry(dp, cmd, arg, retry=2): ...@@ -221,6 +250,48 @@ def _cmd_with_retry(dp, cmd, arg, retry=2):
break break
return cmdreturn return cmdreturn
def _cast_bool(value):
"""
Cast a value, or array of values, to boolean.
Try to assess the input data type. If string, then try to find true or false word inside.
Parameters:
-----------
value: string, integer, or array of such
value to convert.
Return:
boolean:
value or array of boolean.
"""
# Force to array
value = np.asarray(value)
# cast back to single value
def castback(v):
if v.shape == ():
return v.item()
return v
# Simply try to cast to bool first
try:
value = value.astype("bool")
logger.debug("Direct conversion to boolean")
return castback(value)
except ValueError:
# Keep trying to cast
pass
logger.debug("Try to convert to boolean")
value = np.char.strip(np.char.lower(value))
value = _ArrayStr2Bool(value)
return castback(value)
########################################################################## ##########################################################################
### Module private variables ### ### Module private variables ###
########################################################################## ##########################################################################
...@@ -243,6 +314,8 @@ def init( ...@@ -243,6 +314,8 @@ def init(
Initialize the module. Initialize the module.
Instanciate tango.DeviceProxy for extractors (TDB and HDB) Instanciate tango.DeviceProxy for extractors (TDB and HDB)
Parameters:
-----------
HdbExtractorPath, TdbExtractorPath: string HdbExtractorPath, TdbExtractorPath: string
Tango path to the extractors. Tango path to the extractors.
...@@ -274,6 +347,123 @@ def init( ...@@ -274,6 +347,123 @@ def init(
_AttrTables = tuple(e.getattnameall() for e in _extractors) _AttrTables = tuple(e.getattnameall() for e in _extractors)
logger.debug("HDB: {} TDB: {} attributes counted".format(len(_AttrTables[0]), len(_AttrTables[1]))) logger.debug("HDB: {} TDB: {} attributes counted".format(len(_AttrTables[0]), len(_AttrTables[1])))
##########################################################################
### Module access functions ###
##########################################################################
def extract(
attr,
date1, date2=None,
method="nearest",
db='H',
):
"""
Access function to perform extraction between date1 and date2.
Can extract one or several attributes.
date1 and date2 can be both exact date, or one of two can be a time interval that will be taken relative to the other.
Parameters:
-----------
attr: string, list, dict
Attribute(s) to extract.
If string, extract the given attribute, returning a pandas.Series.
If list, extract attributes and return a list of pandas.Series.
If a dict, extract attributes and return a dict of pandas.Series with same keys.
date1, date2: string, datetime.datetime, datetime.timedelta, None
Exact date, or duration relative to date2.
If string, it will be parsed.
A start date can be given with string format '%Y-%m-%d-%H:%M:%S' or less precise (ie '2021-02', '2022-11-03' '2022-05-10-21:00'.i..).
A duration can be given with string format 'Xu' where X is a number and u is a unit in ('m':minutes, 'h':hours, 'd':days, 'M':months)
A datetime.datetime object or datetime.timedelta object will be used as is.
date2 can be None. In that case it is replaced by the current time.
method: str
Method of extraction
'nearest': Retrieve nearest value of date1, date2 is ignored.
'between': Retrive data between date1 and date2.
db: str
Which database to look in, 'H' or 'T'.
"""
## _-_-_-_-_-_-_-_-_-_-_-_-_-_-_-_-_-_-_-_-_-_-_-_-_-_-_-_-_-_-_-_-_-_-_-_-_
# Perform a few sanity checks
if not _check_initialized():
# Stop here, the function has produced a message if necessary
return
if not db in ("H", "T"):
raise ValueError("Attribute 'db' should be 'H' or 'T'")
allowedmethods=("nearest", "between", "minmaxmean")
if not method in allowedmethods:
raise ValueError("Attribute 'method' should be in {}".format(str(allowedmethods)))
## _-_-_-_-_-_-_-_-_-_-_-_-_-_-_-_-_-_-_-_-_-_-_-_-_-_-_-_-_-_-_-_-_-_-_-_-_
# Work with dates
if not type(date1) in (datetime.datetime, datetime.timedelta):
date1 = _dateparse(date1)
if date2 is None:
date2 = datetime.datetime.now()
else:
if not type(date2) in (datetime.datetime, datetime.timedelta):
date2 = _dateparse(date2)
if not datetime.datetime in (type(date1), type(date2)):
logger.error("One of date1 date2 should be an exact date.\nGot {} {}".format(date1, date2))
raise ValueError("date1 and date2 not valid")
# Use timedelta relative to the other date. date1 is always before date2
if type(date1) is datetime.timedelta:
date1 = date2-date1
if type(date2) is datetime.timedelta:
date2 = date1+date2
if date1 > date2:
logger.error("date1 must precede date2.\nGot {} {}".format(date1, date2))
raise ValueError("date1 and date2 not valid")
## _-_-_-_-_-_-_-_-_-_-_-_-_-_-_-_-_-_-_-_-_-_-_-_-_-_-_-_-_-_-_-_-_-_-_-_-_
# Perform extraction and return
if type(attr) is dict:
d=dict()
for k,v in attr.items():
try:
d.update({k:_extract_attribute(v, method, date1, date2, db)})
except Exception as e:
logger.debug("Exception in _extract_attribute(): "+str(e))
logger.debug(traceback.print_tb(e.__traceback__))
logger.error("Could not extract {}.".format(v))
return d
if type(attr) in (list,tuple):
d=[]
for v in attr:
try:
d.append(_extract_attribute(v, method, date1, date2, db))
except Exception as e:
logger.debug("Exception in _extract_attribute(): "+str(e))
logger.debug(traceback.print_tb(e.__traceback__))
logger.error("Could not extract {}.".format(v))
return d
try:
d=_extract_attribute(attr, method, date1, date2, db)
except Exception as e:
logger.debug("Exception in _extract_attribute(): "+str(e))
logger.debug(traceback.print_tb(e.__traceback__))
logger.error("Could not extract {}.".format(attr))
return None
return d
##----------------------------------------------------------------------## ##----------------------------------------------------------------------##
def findattr(pattern, db="H"): def findattr(pattern, db="H"):
...@@ -349,65 +539,18 @@ def infoattr(attribute, db='H'): ...@@ -349,65 +539,18 @@ def infoattr(attribute, db='H'):
return info return info
##---------------------------------------------------------------------------## ##########################################################################
def ExtrBetweenDates( ### Module core functions ###
attribute, ##########################################################################
dateStart,
dateStop=None,
db='H',
):
"""
Query attribute data from an archiver database, get all points between dates.
Use ExtractBetweenDates.
Parameters
----------
attribute : String
Name of the attribute. Full Tango name i.e. "test/dg/panda/current".
dateStart : datetime.datetime, string
Start date for extraction. If string, it will be parsed.
Example of string format %Y-%m-%d-%H:%M:%S or less precise.
dateStop : datetime.datetime, string, None
Stop date for extraction.
If string, it will be parsed.
Example of string format %Y-%m-%d-%H:%M:%S or less precise.
If None, it takes the current date and time.
Default is None (now).
db: str
Which database to look in, 'H' or 'T'.
Exceptions
----------
ValueError
The attribute is not found in the database.
Returns
-------
[date, value] : array
date : numpy.ndarray of datetime.datime objects
Dates of the values
value : numpy.ndarray
Archived values
def _extract_attribute(attribute, method, date1, date2, db):
"""
Check if exists, check scalar or spectrum and dispatch
""" """
if not _check_initialized():
return
if not db in ("H", "T"):
raise AttributeError("Attribute db should be 'H' or 'T'")
# Uncapitalize attribute # Uncapitalize attribute
attribute = attribute.lower() attribute = attribute.lower()
_check_attribute(attribute, db)
# Check attribute is in database
_check_attribute(attribute, db=db)
# Parse dates
dateStart = _dateparse(dateStart)
dateStop = _dateparse(dateStop)
# Get info about the attribute # Get info about the attribute
info=infoattr(attribute, db=db) info=infoattr(attribute, db=db)
...@@ -425,63 +568,132 @@ def ExtrBetweenDates( ...@@ -425,63 +568,132 @@ def ExtrBetweenDates(
attribute, info["max_dim_x"])) attribute, info["max_dim_x"]))
attrtype="vector" attrtype="vector"
# Cut the time horizon in chunks
cdates = _chunkerize(attribute, dateStart, dateStop, db)
# Arrays to hold every chunks
value = []
date = []
# For each date chunk
for i_d in range(len(cdates)-1):
# ============= # =============
# For now we handle multi dimension the same way as scalar, which will get only the first element # For now we handle multi dimension the same way as scalar, which will get only the first element
if (attrtype=="scalar") or (attrtype=="multi"): if (attrtype=="scalar") or (attrtype=="multi"):
# Inform on retrieval request if info["data_type"] == '1':
logger.info("Perform ExtractBetweenDates (%s, %s, %s)"%( # Boolean data type, quick fix
dtype=bool
else:
dtype=float
return _extract_scalar(attribute, method, date1, date2, db, dtype)
if attrtype=="vector":
return _extract_vector(attribute, method, date1, date2, db)
##---------------------------------------------------------------------------##
def _extract_scalar(attribute, method, date1, date2, db, dtype):
# =====================
if method == "nearest":
cmdreturn = _cmd_with_retry(_extractors[{'H':0, 'T':1}[db]], "GetNearestValue", [
attribute, attribute,
cdates[i_d].strftime(_DBDFMT), date1.strftime(_DBDFMT),
cdates[i_d+1].strftime(_DBDFMT)) ])
)
# Unpack return
try:
_date, _value = cmdreturn.split(';')
except TypeError:
logger.error("Could not extract this chunk. Check the device extractor")
return None
# Transform by datatype
if dtype is bool:
_value = _cast_bool(_value)
# Fabricate return pandas.Series
d=pd.Series(index=[datetime.datetime.fromtimestamp(int(_date)/1000),], data=[_value,], name=attribute)
return d
# =====================
if method == "between":
# Cut the time horizon in chunks
cdates = _chunkerize(attribute, date1, date2, db)
# Array to hold data
data = []
# For each date chunk
for i_d in range(len(cdates)-1):
cmdreturn = _cmd_with_retry(_extractors[{'H':0, 'T':1}[db]], "ExtractBetweenDates", [ cmdreturn = _cmd_with_retry(_extractors[{'H':0, 'T':1}[db]], "ExtractBetweenDates", [
attribute, attribute,
cdates[i_d].strftime(_DBDFMT), cdates[i_d].strftime(_DBDFMT),
cdates[i_d+1].strftime(_DBDFMT) cdates[i_d+1].strftime(_DBDFMT)
]) ])
# Check command return
if cmdreturn is None:
logger.error("Could not extract this chunk. Check the device extractor")
return None
# Unpack return # Unpack return
try:
_date, _value = cmdreturn _date, _value = cmdreturn
except TypeError:
logger.error("Could not extract this chunk. Check the device extractor")
return None
# Transform to datetime - value arrays # Transform to datetime - value arrays
# NOTE: it is faster than using pandas.to_datetime() if dtype is bool:
_value = np.asarray(_value, dtype=float) _value = _cast_bool(_value)
else:
_value = np.asarray(_value, dtype=dtype)
if len(_date) > 0: if len(_date) > 0:
_date = _ArrayTimeStampToDatetime(_date/1000.0) _date = _ArrayTimeStampToDatetime(_date/1000.0)
value.append(_value) # Fabricate return pandas.Series
date.append(_date) data.append(pd.Series(index=_date, data=_value, name=attribute))
# ============= # Concatenate chunks
if attrtype=="vector": return pd.concat(data)
logger.info("Perform GetAttDataBetweenDates (%s, %s, %s)"%(
# ========================
if method == "minmaxmean":
# If we are here, the method is not implemented
logger.error("Method {} is not implemented for scalars.".format(method))
raise NotImplemented
##---------------------------------------------------------------------------##
def _extract_vector(attribute, method, date1, date2, db):
# Get info about the attribute
info=infoattr(attribute, db=db)
# =====================
if method == "nearest":
# Get nearest does not work with vector.
# Make a between date with surounding dates.
# Dynamically find surounding
cnt=0
dt=datetime.timedelta(seconds=10)
while cnt<1:
logger.debug("Seeking points in {} to {}".format(date1-dt,date1+dt))
cnt=_extractors[{'H':0, 'T':1}[db]].GetAttDataBetweenDatesCount([
attribute, attribute,
cdates[i_d].strftime(_DBDFMT), (date1-dt).strftime(_DBDFMT2),
cdates[i_d+1].strftime(_DBDFMT) (date1+dt).strftime(_DBDFMT2)
)) ])
dt=dt*1.5
logger.debug("Found {} points in a +- {} interval".format(cnt,str(dt/1.5)))
[N,], [name,] = _extractors[{'H':0, 'T':1}[db]].GetAttDataBetweenDates([
# For vector, we have to use the GetAttxxx commands
cmdreturn = _cmd_with_retry(_extractors[{'H':0, 'T':1}[db]], "GetAttDataBetweenDates", [
attribute, attribute,
cdates[i_d].strftime(_DBDFMT), (date1-dt).strftime(_DBDFMT),
cdates[i_d+1].strftime(_DBDFMT) (date1+dt).strftime(_DBDFMT),
]) ])
# Unpack return
try:
[N,], [name,] = cmdreturn
N=int(N) N=int(N)
except TypeError:
logger.error("Could not extract this attribute. Check the device extractor")
return None
# Read the history # Read the history
logger.debug("Retrieve history of %d values. Dynamic attribute named %s."%(N, name)) logger.debug("Retrieve history of %d values. Dynamic attribute named %s."%(N, name))
...@@ -495,23 +707,18 @@ def ExtrBetweenDates( ...@@ -495,23 +707,18 @@ def ExtrBetweenDates(
_value[i_h,:attrHist[i_h].dim_x]=attrHist[i_h].value _value[i_h,:attrHist[i_h].dim_x]=attrHist[i_h].value
_date[i_h]=attrHist[i_h].time.todatetime() _date[i_h]=attrHist[i_h].time.todatetime()
# Remove dynamic attribute # Seeking nearest entry
logger.debug("Remove dynamic attribute %s."%name) idx=np.argmin(abs(_date-date1))
_extractors[{'H':0, 'T':1}[db]].RemoveDynamicAttribute(name) logger.debug("Found nearest value at index {}: {}".format(idx, _date[idx]))
value.append(_value) # Fabricate return pandas.Series
date.append(_date) d=pd.Series(index=[_date[idx],], data=[_value[idx],], name=attribute)
logger.debug("Concatenate chunks") return d
value = np.concatenate(value)
date = np.concatenate(date)
logger.debug("Extraction done for %s."%attribute) # If we are here, the method is not implemented
if attrtype=="vector": logger.error("Method {} is not implemented for vectors.".format(method))
return pd.DataFrame(index=date, data=value).dropna(axis=1, how='all') raise NotImplemented
else:
return pd.Series(index=date, data=value)
##---------------------------------------------------------------------------## ##---------------------------------------------------------------------------##
...@@ -644,3 +851,7 @@ def ExtrBetweenDates_MinMaxMean( ...@@ -644,3 +851,7 @@ def ExtrBetweenDates_MinMaxMean(
"Max":value_max, "Max":value_max,
},) },)
## ## ## ## ## ## ## ## ## ## ## ## ## ## ## ## ## ## ## ## ## ## ## ##
## Initialize on import
## ## ## ## ## ## ## ## ## ## ## ## ## ## ## ## ## ## ## ## ## ## ## ##
init()
...@@ -13,12 +13,9 @@ Usage example, with an ipython prompt ...@@ -13,12 +13,9 @@ Usage example, with an ipython prompt
```python ```python
In [1]: import pySoleilControl.ArchiveExtractor as AE In [1]: import pySoleilControl.ArchiveExtractor as AE
In [2]: # For now, we need manual initialization of the module In [2]: # Looking for an attribute in HDB
...: AE.init()
In [3]: # Looking for an attribute in HDB
...: AE.findattr("ans/dg/*dcct*") ...: AE.findattr("ans/dg/*dcct*")
Out[3]: Out[2]:
['ANS/DG/DCCT-CTRL/State', ['ANS/DG/DCCT-CTRL/State',
'ANS/DG/DCCT-CTRL/Status', 'ANS/DG/DCCT-CTRL/Status',
'ANS/DG/DCCT-CTRL/current', 'ANS/DG/DCCT-CTRL/current',
...@@ -26,10 +23,10 @@ Out[3]: ...@@ -26,10 +23,10 @@ Out[3]:
'ANS/DG/DCCT-CTRL/lifeTime', 'ANS/DG/DCCT-CTRL/lifeTime',
'ANS/DG/DCCT-CTRL/lifeTimeErr'] 'ANS/DG/DCCT-CTRL/lifeTimeErr']
In [4]: # Get data between two dates, this return a pandas.Dataframe object In [3]: # Get data between two dates, this return a pandas.Dataframe object
...: AE.ExtrBetweenDates('ANS/DG/DCCT-CTRL/current', '2021-12-13', '2021-12-13-12:00') ...: AE.ExtrBetweenDates('ANS/DG/DCCT-CTRL/current', '2021-12-13', '2021-12-13-12:00')
INFO:Perform ExtractBetweenDates (ans/dg/dcct-ctrl/current, 2021-12-13 00:00:00, 2021-12-13 12:00:00) INFO:Perform ExtractBetweenDates (ans/dg/dcct-ctrl/current, 2021-12-13 00:00:00, 2021-12-13 12:00:00)
Out[4]: Out[3]:
2021-12-13 00:00:00 450.993568 2021-12-13 00:00:00 450.993568
2021-12-13 00:00:01 450.981979 2021-12-13 00:00:01 450.981979
2021-12-13 00:00:02 450.971455 2021-12-13 00:00:02 450.971455
...@@ -43,11 +40,11 @@ Out[4]: ...@@ -43,11 +40,11 @@ Out[4]:
2021-12-13 12:00:00 15.005410 2021-12-13 12:00:00 15.005410
Length: 42725, dtype: float64 Length: 42725, dtype: float64
In [5]: # Get min, max and mean with a 10 minute window In [4]: # Get min, max and mean with a 10 minute window
...: d=AE.ExtrBetweenDates_MinMaxMean('ANS/DG/DCCT-CTRL/current', '2021-12-13', '2021-12-13-12:00', timeInterval='10m') ...: d=AE.ExtrBetweenDates_MinMaxMean('ANS/DG/DCCT-CTRL/current', '2021-12-13', '2021-12-13-12:00', timeInterval='10m')
In [6]: d In [5]: d
Out[6]: Out[5]:
Min Mean Max Min Mean Max
2021-12-13 00:05:00 449.762286 450.619654 451.617095 2021-12-13 00:05:00 449.762286 450.619654 451.617095
2021-12-13 00:15:00 449.761171 450.676306 451.595391 2021-12-13 00:15:00 449.761171 450.676306 451.595391
...@@ -64,14 +61,14 @@ Out[6]: ...@@ -64,14 +61,14 @@ Out[6]:
[72 rows x 3 columns] [72 rows x 3 columns]
In [7]: # Activate inline matplotlib In [6]: # Activate inline matplotlib
...: %matplotlib ...: %matplotlib
Using matplotlib backend: TkAgg Using matplotlib backend: TkAgg
In [7]: # Simply plot In [6]: # Simply plot
...: d.plot() ...: d.plot()
In [8]: # ipython prompt supports autocompletion. The doc of function can be quickly read by adding a '?' In [7]: # ipython prompt supports autocompletion. The doc of function can be quickly read by adding a '?'
...: AE.ExtrBetweenDates? ...: AE.ExtrBetweenDates?
Signature: AE.ExtrBetweenDates(attribute, dateStart, dateStop=None, db='H') Signature: AE.ExtrBetweenDates(attribute, dateStart, dateStop=None, db='H')
Docstring: Docstring:
......
0% Loading or .
You are about to add 0 people to the discussion. Proceed with caution.
Please register or to comment