Skip to content
Snippets Groups Projects
Commit e7d2b3ae authored by BRONES Romain's avatar BRONES Romain
Browse files

Merge fixes and feat

* Attribute type casting improved
* Unique entry function "extract"
* Multiexport
* Timedelta
parents dbdea2bf d0fd0633
No related branches found
No related tags found
No related merge requests found
"""
Python module for extracting attribute from Arhive Extractor Device.
Python module for extracting attribute from Archive Extractor Device.
"""
import logging
import datetime
import numpy as np
import PyTango as tango
import pandas as pd
import traceback
__version__ = "1.0.1"
......@@ -38,8 +39,16 @@ _DBDFMT2 = "%d-%m-%Y %H:%M:%S"
##########################################################################
# Vectorized fromtimestamp function
# NOTE: it is faster than using pandas.to_datetime()
_ArrayTimeStampToDatetime = np.vectorize(datetime.datetime.fromtimestamp)
# Vectorized bool map dictionnary
_ArrayStr2Bool = np.vectorize({
"true":True, 't':True,
"false":False, 'f':False,
}.get)
def _check_initialized():
"""
Check if the module is initialized.
......@@ -57,14 +66,16 @@ def _check_initialized():
##----------------------------------------------------------------------##
def _dateparse(datestr):
"""
Convenient function to parse date strings.
Global format is %Y-%m-%d-%H:%M:%S and it can be reduced to be less precise.
Convenient function to parse date or duration strings.
Exact date format is %Y-%m-%d-%H:%M:%S and it can be reduced to be less precise.
Duration format is 'Xu' where X is a number and u is a unit in ('m':minutes, 'h':hours, 'd':days, 'M':months)
If datstr is None, take the actual date and time.
Parameters
---------
datestr : string
Date as a string, format %Y-%m-%d-%H:%M:%S or less precise.
Duration as a string, format 'Xu' where X is a number and u is a unit in ('m':minutes, 'h':hours, 'd':days, 'M':months)
Exceptions
----------
......@@ -73,35 +84,52 @@ def _dateparse(datestr):
Returns
-------
date : datetime.datetime
Parsed date
date : datetime.datetime or datetime.timedelta
Parsed date or duration
"""
logger.debug("Parsing date string '%s'"%datestr)
# Determine date/duration by looking at the last char
if datestr[-1] in "mhdM":
# Duration
logger.debug("Assuming a duration")
if datestr is None:
return datetime.datetime.now()
# This gives all format that will be tried, in order.
# Stop on first parse success. Raise error if none succeed.
fmt = [
"%Y-%m-%d-%H:%M:%S",
"%Y-%m-%d-%H:%M",
"%Y-%m-%d-%H",
"%Y-%m-%d",
"%Y-%m",
]
date = None
for f in fmt:
try:
date = datetime.datetime.strptime(datestr, f)
except ValueError:
continue
else:
break
q=float(datestr[:-1])
except ValueError as e:
logger.error("Failed to parse date string. Given the last character, a duration was assumed.")
raise Exception("Could not parse argument to a date") from e
# Convert all in minutes
minutes = q*{'m':1, 'h':60, 'd':60*24, 'm':30*60*24}[datestr[-1]]
return datetime.timedelta(minutes=minutes)
else:
raise ValueError("Could not parse argument to a date")
# Probably a date string
# This gives all format that will be tried, in order.
# Stop on first parse success. Raise error if none succeed.
fmt = [
"%Y-%m-%d-%H:%M:%S",
"%Y-%m-%d-%H:%M",
"%Y-%m-%d-%H",
"%Y-%m-%d",
"%Y-%m",
]
date = None
for f in fmt:
try:
date = datetime.datetime.strptime(datestr, f)
except ValueError:
continue
else:
break
else:
raise ValueError("Could not parse argument to a date")
return date
return date
##----------------------------------------------------------------------##
def _check_attribute(attribute, db):
......@@ -204,6 +232,7 @@ def _cmd_with_retry(dp, cmd, arg, retry=2):
Whatever the command returns.
None if failed after the amount of retries.
"""
logger.info("Perform Command {} {}".format(cmd, arg))
for i in range(retry):
# Make retrieval request
......@@ -221,6 +250,48 @@ def _cmd_with_retry(dp, cmd, arg, retry=2):
break
return cmdreturn
def _cast_bool(value):
"""
Cast a value, or array of values, to boolean.
Try to assess the input data type. If string, then try to find true or false word inside.
Parameters:
-----------
value: string, integer, or array of such
value to convert.
Return:
boolean:
value or array of boolean.
"""
# Force to array
value = np.asarray(value)
# cast back to single value
def castback(v):
if v.shape == ():
return v.item()
return v
# Simply try to cast to bool first
try:
value = value.astype("bool")
logger.debug("Direct conversion to boolean")
return castback(value)
except ValueError:
# Keep trying to cast
pass
logger.debug("Try to convert to boolean")
value = np.char.strip(np.char.lower(value))
value = _ArrayStr2Bool(value)
return castback(value)
##########################################################################
### Module private variables ###
##########################################################################
......@@ -243,11 +314,13 @@ def init(
Initialize the module.
Instanciate tango.DeviceProxy for extractors (TDB and HDB)
HdbExtractorPath, TdbExtractorPath: string
Tango path to the extractors.
Parameters:
-----------
HdbExtractorPath, TdbExtractorPath: string
Tango path to the extractors.
loglevel: string
loglevel to pass to logging.Logger
loglevel: string
loglevel to pass to logging.Logger
"""
global _extractors
global _AttrTables
......@@ -274,6 +347,123 @@ def init(
_AttrTables = tuple(e.getattnameall() for e in _extractors)
logger.debug("HDB: {} TDB: {} attributes counted".format(len(_AttrTables[0]), len(_AttrTables[1])))
##########################################################################
### Module access functions ###
##########################################################################
def extract(
attr,
date1, date2=None,
method="nearest",
db='H',
):
"""
Access function to perform extraction between date1 and date2.
Can extract one or several attributes.
date1 and date2 can be both exact date, or one of two can be a time interval that will be taken relative to the other.
Parameters:
-----------
attr: string, list, dict
Attribute(s) to extract.
If string, extract the given attribute, returning a pandas.Series.
If list, extract attributes and return a list of pandas.Series.
If a dict, extract attributes and return a dict of pandas.Series with same keys.
date1, date2: string, datetime.datetime, datetime.timedelta, None
Exact date, or duration relative to date2.
If string, it will be parsed.
A start date can be given with string format '%Y-%m-%d-%H:%M:%S' or less precise (ie '2021-02', '2022-11-03' '2022-05-10-21:00'.i..).
A duration can be given with string format 'Xu' where X is a number and u is a unit in ('m':minutes, 'h':hours, 'd':days, 'M':months)
A datetime.datetime object or datetime.timedelta object will be used as is.
date2 can be None. In that case it is replaced by the current time.
method: str
Method of extraction
'nearest': Retrieve nearest value of date1, date2 is ignored.
'between': Retrive data between date1 and date2.
db: str
Which database to look in, 'H' or 'T'.
"""
## _-_-_-_-_-_-_-_-_-_-_-_-_-_-_-_-_-_-_-_-_-_-_-_-_-_-_-_-_-_-_-_-_-_-_-_-_
# Perform a few sanity checks
if not _check_initialized():
# Stop here, the function has produced a message if necessary
return
if not db in ("H", "T"):
raise ValueError("Attribute 'db' should be 'H' or 'T'")
allowedmethods=("nearest", "between", "minmaxmean")
if not method in allowedmethods:
raise ValueError("Attribute 'method' should be in {}".format(str(allowedmethods)))
## _-_-_-_-_-_-_-_-_-_-_-_-_-_-_-_-_-_-_-_-_-_-_-_-_-_-_-_-_-_-_-_-_-_-_-_-_
# Work with dates
if not type(date1) in (datetime.datetime, datetime.timedelta):
date1 = _dateparse(date1)
if date2 is None:
date2 = datetime.datetime.now()
else:
if not type(date2) in (datetime.datetime, datetime.timedelta):
date2 = _dateparse(date2)
if not datetime.datetime in (type(date1), type(date2)):
logger.error("One of date1 date2 should be an exact date.\nGot {} {}".format(date1, date2))
raise ValueError("date1 and date2 not valid")
# Use timedelta relative to the other date. date1 is always before date2
if type(date1) is datetime.timedelta:
date1 = date2-date1
if type(date2) is datetime.timedelta:
date2 = date1+date2
if date1 > date2:
logger.error("date1 must precede date2.\nGot {} {}".format(date1, date2))
raise ValueError("date1 and date2 not valid")
## _-_-_-_-_-_-_-_-_-_-_-_-_-_-_-_-_-_-_-_-_-_-_-_-_-_-_-_-_-_-_-_-_-_-_-_-_
# Perform extraction and return
if type(attr) is dict:
d=dict()
for k,v in attr.items():
try:
d.update({k:_extract_attribute(v, method, date1, date2, db)})
except Exception as e:
logger.debug("Exception in _extract_attribute(): "+str(e))
logger.debug(traceback.print_tb(e.__traceback__))
logger.error("Could not extract {}.".format(v))
return d
if type(attr) in (list,tuple):
d=[]
for v in attr:
try:
d.append(_extract_attribute(v, method, date1, date2, db))
except Exception as e:
logger.debug("Exception in _extract_attribute(): "+str(e))
logger.debug(traceback.print_tb(e.__traceback__))
logger.error("Could not extract {}.".format(v))
return d
try:
d=_extract_attribute(attr, method, date1, date2, db)
except Exception as e:
logger.debug("Exception in _extract_attribute(): "+str(e))
logger.debug(traceback.print_tb(e.__traceback__))
logger.error("Could not extract {}.".format(attr))
return None
return d
##----------------------------------------------------------------------##
def findattr(pattern, db="H"):
......@@ -349,65 +539,18 @@ def infoattr(attribute, db='H'):
return info
##---------------------------------------------------------------------------##
def ExtrBetweenDates(
attribute,
dateStart,
dateStop=None,
db='H',
):
"""
Query attribute data from an archiver database, get all points between dates.
Use ExtractBetweenDates.
Parameters
----------
attribute : String
Name of the attribute. Full Tango name i.e. "test/dg/panda/current".
dateStart : datetime.datetime, string
Start date for extraction. If string, it will be parsed.
Example of string format %Y-%m-%d-%H:%M:%S or less precise.
dateStop : datetime.datetime, string, None
Stop date for extraction.
If string, it will be parsed.
Example of string format %Y-%m-%d-%H:%M:%S or less precise.
If None, it takes the current date and time.
Default is None (now).
db: str
Which database to look in, 'H' or 'T'.
Exceptions
----------
ValueError
The attribute is not found in the database.
Returns
-------
[date, value] : array
date : numpy.ndarray of datetime.datime objects
Dates of the values
value : numpy.ndarray
Archived values
##########################################################################
### Module core functions ###
##########################################################################
def _extract_attribute(attribute, method, date1, date2, db):
"""
Check if exists, check scalar or spectrum and dispatch
"""
if not _check_initialized():
return
if not db in ("H", "T"):
raise AttributeError("Attribute db should be 'H' or 'T'")
# Uncapitalize attribute
attribute = attribute.lower()
# Check attribute is in database
_check_attribute(attribute, db=db)
# Parse dates
dateStart = _dateparse(dateStart)
dateStop = _dateparse(dateStop)
_check_attribute(attribute, db)
# Get info about the attribute
info=infoattr(attribute, db=db)
......@@ -425,93 +568,157 @@ def ExtrBetweenDates(
attribute, info["max_dim_x"]))
attrtype="vector"
# Cut the time horizon in chunks
cdates = _chunkerize(attribute, dateStart, dateStop, db)
# =============
# For now we handle multi dimension the same way as scalar, which will get only the first element
if (attrtype=="scalar") or (attrtype=="multi"):
if info["data_type"] == '1':
# Boolean data type, quick fix
dtype=bool
else:
dtype=float
# Arrays to hold every chunks
value = []
date = []
return _extract_scalar(attribute, method, date1, date2, db, dtype)
if attrtype=="vector":
return _extract_vector(attribute, method, date1, date2, db)
# For each date chunk
for i_d in range(len(cdates)-1):
# =============
# For now we handle multi dimension the same way as scalar, which will get only the first element
if (attrtype=="scalar") or (attrtype=="multi"):
# Inform on retrieval request
logger.info("Perform ExtractBetweenDates (%s, %s, %s)"%(
attribute,
cdates[i_d].strftime(_DBDFMT),
cdates[i_d+1].strftime(_DBDFMT))
)
##---------------------------------------------------------------------------##
def _extract_scalar(attribute, method, date1, date2, db, dtype):
# =====================
if method == "nearest":
cmdreturn = _cmd_with_retry(_extractors[{'H':0, 'T':1}[db]], "GetNearestValue", [
attribute,
date1.strftime(_DBDFMT),
])
# Unpack return
try:
_date, _value = cmdreturn.split(';')
except TypeError:
logger.error("Could not extract this chunk. Check the device extractor")
return None
# Transform by datatype
if dtype is bool:
_value = _cast_bool(_value)
# Fabricate return pandas.Series
d=pd.Series(index=[datetime.datetime.fromtimestamp(int(_date)/1000),], data=[_value,], name=attribute)
return d
# =====================
if method == "between":
# Cut the time horizon in chunks
cdates = _chunkerize(attribute, date1, date2, db)
# Array to hold data
data = []
# For each date chunk
for i_d in range(len(cdates)-1):
cmdreturn = _cmd_with_retry(_extractors[{'H':0, 'T':1}[db]], "ExtractBetweenDates", [
attribute,
cdates[i_d].strftime(_DBDFMT),
cdates[i_d+1].strftime(_DBDFMT)
])
# Check command return
if cmdreturn is None:
# Unpack return
try:
_date, _value = cmdreturn
except TypeError:
logger.error("Could not extract this chunk. Check the device extractor")
return None
# Unpack return
_date, _value = cmdreturn
# Transform to datetime - value arrays
# NOTE: it is faster than using pandas.to_datetime()
_value = np.asarray(_value, dtype=float)
if dtype is bool:
_value = _cast_bool(_value)
else:
_value = np.asarray(_value, dtype=dtype)
if len(_date) > 0:
_date = _ArrayTimeStampToDatetime(_date/1000.0)
value.append(_value)
date.append(_date)
# Fabricate return pandas.Series
data.append(pd.Series(index=_date, data=_value, name=attribute))
# =============
if attrtype=="vector":
logger.info("Perform GetAttDataBetweenDates (%s, %s, %s)"%(
attribute,
cdates[i_d].strftime(_DBDFMT),
cdates[i_d+1].strftime(_DBDFMT)
))
# Concatenate chunks
return pd.concat(data)
[N,], [name,] = _extractors[{'H':0, 'T':1}[db]].GetAttDataBetweenDates([
attribute,
cdates[i_d].strftime(_DBDFMT),
cdates[i_d+1].strftime(_DBDFMT)
])
N=int(N)
# ========================
if method == "minmaxmean":
# Read the history
logger.debug("Retrieve history of %d values. Dynamic attribute named %s."%(N, name))
attrHist = _extractors[{'H':0, 'T':1}[db]].attribute_history(name, N)
# If we are here, the method is not implemented
logger.error("Method {} is not implemented for scalars.".format(method))
raise NotImplemented
# Transform to datetime - value arrays
_value = np.empty((N, int(info["max_dim_x"])), dtype=float)
_value[:] = np.nan
_date = np.empty(N, dtype=object)
for i_h in range(N):
_value[i_h,:attrHist[i_h].dim_x]=attrHist[i_h].value
_date[i_h]=attrHist[i_h].time.todatetime()
##---------------------------------------------------------------------------##
def _extract_vector(attribute, method, date1, date2, db):
# Remove dynamic attribute
logger.debug("Remove dynamic attribute %s."%name)
_extractors[{'H':0, 'T':1}[db]].RemoveDynamicAttribute(name)
# Get info about the attribute
info=infoattr(attribute, db=db)
# =====================
if method == "nearest":
# Get nearest does not work with vector.
# Make a between date with surounding dates.
# Dynamically find surounding
cnt=0
dt=datetime.timedelta(seconds=10)
while cnt<1:
logger.debug("Seeking points in {} to {}".format(date1-dt,date1+dt))
cnt=_extractors[{'H':0, 'T':1}[db]].GetAttDataBetweenDatesCount([
attribute,
(date1-dt).strftime(_DBDFMT2),
(date1+dt).strftime(_DBDFMT2)
])
dt=dt*1.5
logger.debug("Found {} points in a +- {} interval".format(cnt,str(dt/1.5)))
# For vector, we have to use the GetAttxxx commands
cmdreturn = _cmd_with_retry(_extractors[{'H':0, 'T':1}[db]], "GetAttDataBetweenDates", [
attribute,
(date1-dt).strftime(_DBDFMT),
(date1+dt).strftime(_DBDFMT),
])
# Unpack return
try:
[N,], [name,] = cmdreturn
N=int(N)
except TypeError:
logger.error("Could not extract this attribute. Check the device extractor")
return None
value.append(_value)
date.append(_date)
# Read the history
logger.debug("Retrieve history of %d values. Dynamic attribute named %s."%(N, name))
attrHist = _extractors[{'H':0, 'T':1}[db]].attribute_history(name, N)
logger.debug("Concatenate chunks")
value = np.concatenate(value)
date = np.concatenate(date)
# Transform to datetime - value arrays
_value = np.empty((N, int(info["max_dim_x"])), dtype=float)
_value[:] = np.nan
_date = np.empty(N, dtype=object)
for i_h in range(N):
_value[i_h,:attrHist[i_h].dim_x]=attrHist[i_h].value
_date[i_h]=attrHist[i_h].time.todatetime()
logger.debug("Extraction done for %s."%attribute)
if attrtype=="vector":
return pd.DataFrame(index=date, data=value).dropna(axis=1, how='all')
else:
return pd.Series(index=date, data=value)
# Seeking nearest entry
idx=np.argmin(abs(_date-date1))
logger.debug("Found nearest value at index {}: {}".format(idx, _date[idx]))
# Fabricate return pandas.Series
d=pd.Series(index=[_date[idx],], data=[_value[idx],], name=attribute)
return d
# If we are here, the method is not implemented
logger.error("Method {} is not implemented for vectors.".format(method))
raise NotImplemented
##---------------------------------------------------------------------------##
......@@ -644,3 +851,7 @@ def ExtrBetweenDates_MinMaxMean(
"Max":value_max,
},)
## ## ## ## ## ## ## ## ## ## ## ## ## ## ## ## ## ## ## ## ## ## ## ##
## Initialize on import
## ## ## ## ## ## ## ## ## ## ## ## ## ## ## ## ## ## ## ## ## ## ## ##
init()
......@@ -13,12 +13,9 @@ Usage example, with an ipython prompt
```python
In [1]: import pySoleilControl.ArchiveExtractor as AE
In [2]: # For now, we need manual initialization of the module
...: AE.init()
In [3]: # Looking for an attribute in HDB
In [2]: # Looking for an attribute in HDB
...: AE.findattr("ans/dg/*dcct*")
Out[3]:
Out[2]:
['ANS/DG/DCCT-CTRL/State',
'ANS/DG/DCCT-CTRL/Status',
'ANS/DG/DCCT-CTRL/current',
......@@ -26,10 +23,10 @@ Out[3]:
'ANS/DG/DCCT-CTRL/lifeTime',
'ANS/DG/DCCT-CTRL/lifeTimeErr']
In [4]: # Get data between two dates, this return a pandas.Dataframe object
In [3]: # Get data between two dates, this return a pandas.Dataframe object
...: AE.ExtrBetweenDates('ANS/DG/DCCT-CTRL/current', '2021-12-13', '2021-12-13-12:00')
INFO:Perform ExtractBetweenDates (ans/dg/dcct-ctrl/current, 2021-12-13 00:00:00, 2021-12-13 12:00:00)
Out[4]:
Out[3]:
2021-12-13 00:00:00 450.993568
2021-12-13 00:00:01 450.981979
2021-12-13 00:00:02 450.971455
......@@ -43,11 +40,11 @@ Out[4]:
2021-12-13 12:00:00 15.005410
Length: 42725, dtype: float64
In [5]: # Get min, max and mean with a 10 minute window
In [4]: # Get min, max and mean with a 10 minute window
...: d=AE.ExtrBetweenDates_MinMaxMean('ANS/DG/DCCT-CTRL/current', '2021-12-13', '2021-12-13-12:00', timeInterval='10m')
In [6]: d
Out[6]:
In [5]: d
Out[5]:
Min Mean Max
2021-12-13 00:05:00 449.762286 450.619654 451.617095
2021-12-13 00:15:00 449.761171 450.676306 451.595391
......@@ -64,14 +61,14 @@ Out[6]:
[72 rows x 3 columns]
In [7]: # Activate inline matplotlib
In [6]: # Activate inline matplotlib
...: %matplotlib
Using matplotlib backend: TkAgg
In [7]: # Simply plot
In [6]: # Simply plot
...: d.plot()
In [8]: # ipython prompt supports autocompletion. The doc of function can be quickly read by adding a '?'
In [7]: # ipython prompt supports autocompletion. The doc of function can be quickly read by adding a '?'
...: AE.ExtrBetweenDates?
Signature: AE.ExtrBetweenDates(attribute, dateStart, dateStop=None, db='H')
Docstring:
......@@ -114,4 +111,4 @@ Type: function
```
\ No newline at end of file
```
0% Loading or .
You are about to add 0 people to the discussion. Proceed with caution.
Please register or to comment