Merge fixes and feat

* Attribute type casting improved * Unique entry function "extract" * Multiexport * Timedelta

Merge fixes and feat
e7d2b3ae · BRONES Romain · dbdea2bf · d0fd0633 · e7d2b3ae · e7d2b3ae
Commit e7d2b3ae authored 1 year ago by BRONES Romain
--- a/ArchiveExtractor.py
+++ b/ArchiveExtractor.py
 """
-Python module for extracting attribute from Arhive Extractor Device.
+Python module for extracting attribute from Archive Extractor Device.
 """
 import logging
 import datetime
 import numpy as np
 import PyTango as tango
 import pandas as pd
+import traceback
 __version__ = "1.0.1"
@@ -38,8 +39,16 @@ _DBDFMT2 = "%d-%m-%Y %H:%M:%S"
 ##########################################################################
 # Vectorized fromtimestamp function
+# NOTE: it is faster than using pandas.to_datetime()
 _ArrayTimeStampToDatetime = np.vectorize(datetime.datetime.fromtimestamp)
+# Vectorized bool map dictionnary
+_ArrayStr2Bool = np.vectorize({
+    "true":True, 't':True,
+    "false":False, 'f':False,
+    }.get)
 def _check_initialized():
    """
    Check if the module is initialized.
@@ -57,14 +66,16 @@ def _check_initialized():
 ##----------------------------------------------------------------------##
 def _dateparse(datestr):
    """
-    Convenient function to parse date strings.
+    Convenient function to parse date or duration strings.
-    Global format is %Y-%m-%d-%H:%M:%S and it can be reduced to be less precise.
+    Exact date format is %Y-%m-%d-%H:%M:%S and it can be reduced to be less precise.
+    Duration format is 'Xu' where X is a number and u is a unit in ('m':minutes, 'h':hours, 'd':days, 'M':months)
    If datstr is None, take the actual date and time.
    Parameters
    ---------
    datestr : string
        Date as a string, format %Y-%m-%d-%H:%M:%S or less precise.
+        Duration as a string, format 'Xu' where X is a number and u is a unit in ('m':minutes, 'h':hours, 'd':days, 'M':months)
    Exceptions
    ----------
@@ -73,12 +84,29 @@ def _dateparse(datestr):
    Returns
    -------
-    date : datetime.datetime
+    date : datetime.datetime or datetime.timedelta
-        Parsed date
+        Parsed date or duration
    """
+    logger.debug("Parsing date string '%s'"%datestr)
+    # Determine date/duration by looking at the last char
+    if datestr[-1] in "mhdM":
+        # Duration
+        logger.debug("Assuming a duration")
+        try:
+            q=float(datestr[:-1])
+        except ValueError as e:
+            logger.error("Failed to parse date string. Given the last character, a duration was assumed.")
+            raise Exception("Could not parse argument to a date") from e
-    if datestr is None:
+        # Convert all in minutes
-        return datetime.datetime.now()
+        minutes = q*{'m':1, 'h':60, 'd':60*24, 'm':30*60*24}[datestr[-1]]
+        return datetime.timedelta(minutes=minutes)
+    else:
+        # Probably a date string
        # This gives all format that will be tried, in order.
        # Stop on first parse success. Raise error if none succeed.
@@ -204,6 +232,7 @@ def _cmd_with_retry(dp, cmd, arg, retry=2):
        Whatever the command returns.
        None if failed after the amount of retries.
    """
+    logger.info("Perform Command {} {}".format(cmd, arg))
    for i in range(retry):
        # Make retrieval request
@@ -221,6 +250,48 @@ def _cmd_with_retry(dp, cmd, arg, retry=2):
        break
    return cmdreturn
+def _cast_bool(value):
+    """
+    Cast a value, or array of values, to boolean.
+    Try to assess the input data type. If string, then try to find true or false word inside.
+    Parameters:
+    -----------
+    value: string, integer, or array of such
+        value to convert.
+    Return:
+    boolean:
+        value or array of boolean.
+    """
+    # Force to array
+    value = np.asarray(value)
+    # cast back to single value
+    def castback(v):
+        if v.shape == ():
+            return v.item()
+        return v
+    # Simply try to cast to bool first
+    try:
+        value = value.astype("bool")
+        logger.debug("Direct conversion to boolean")
+        return castback(value)
+    except ValueError:
+        # Keep trying to cast
+        pass
+    logger.debug("Try to convert to boolean")
+    value = np.char.strip(np.char.lower(value))
+    value = _ArrayStr2Bool(value)
+    return castback(value)
 ##########################################################################
 ###                  Module private variables                          ###
 ##########################################################################
@@ -243,6 +314,8 @@ def init(
    Initialize the module.
    Instanciate tango.DeviceProxy for extractors (TDB and HDB)
+    Parameters:
+    -----------
    HdbExtractorPath, TdbExtractorPath: string
        Tango path to the extractors.
@@ -274,6 +347,123 @@ def init(
    _AttrTables = tuple(e.getattnameall() for e in _extractors)
    logger.debug("HDB: {} TDB: {} attributes counted".format(len(_AttrTables[0]), len(_AttrTables[1])))
+##########################################################################
+###                    Module access functions                         ###
+##########################################################################
+def extract(
+        attr,
+        date1, date2=None,
+        method="nearest",
+        db='H',
+        ):
+    """
+    Access function to perform extraction between date1 and date2.
+    Can extract one or several attributes.
+    date1 and date2 can be both exact date, or one of two can be a time interval that will be taken relative to the other.
+    Parameters:
+    -----------
+    attr: string, list, dict
+        Attribute(s) to extract.
+        If string, extract the given attribute, returning a pandas.Series.
+        If list, extract attributes and return a list of pandas.Series.
+        If a dict, extract attributes and return a dict of pandas.Series with same keys.
+    date1, date2: string, datetime.datetime, datetime.timedelta, None
+        Exact date, or duration relative to date2.
+        If string, it will be parsed.
+        A start date can be given with string format '%Y-%m-%d-%H:%M:%S' or less precise (ie '2021-02', '2022-11-03' '2022-05-10-21:00'.i..).
+        A duration can be given with string format 'Xu' where X is a number and u is a unit in ('m':minutes, 'h':hours, 'd':days, 'M':months)
+        A datetime.datetime object or datetime.timedelta object will be used as is.
+        date2 can be None. In that case it is replaced by the current time.
+    method: str
+        Method of extraction
+            'nearest': Retrieve nearest value of date1, date2 is ignored.
+            'between': Retrive data between date1 and date2.
+    db: str
+        Which database to look in, 'H' or 'T'.
+    """
+    ## _-_-_-_-_-_-_-_-_-_-_-_-_-_-_-_-_-_-_-_-_-_-_-_-_-_-_-_-_-_-_-_-_-_-_-_-_
+    #    Perform a few sanity checks
+    if not _check_initialized():
+        # Stop here, the function has produced a message if necessary
+        return
+    if not db in ("H", "T"):
+        raise ValueError("Attribute 'db' should be 'H' or 'T'")
+    allowedmethods=("nearest", "between", "minmaxmean")
+    if not method in allowedmethods:
+        raise ValueError("Attribute 'method' should be in {}".format(str(allowedmethods)))
+    ## _-_-_-_-_-_-_-_-_-_-_-_-_-_-_-_-_-_-_-_-_-_-_-_-_-_-_-_-_-_-_-_-_-_-_-_-_
+    #     Work with dates
+    if not type(date1) in (datetime.datetime, datetime.timedelta):
+        date1 = _dateparse(date1)
+    if date2 is None:
+        date2 = datetime.datetime.now()
+    else:
+        if not type(date2) in (datetime.datetime, datetime.timedelta):
+            date2 = _dateparse(date2)
+    if not datetime.datetime in (type(date1), type(date2)):
+        logger.error("One of date1 date2 should be an exact date.\nGot {} {}".format(date1, date2))
+        raise ValueError("date1 and date2 not valid")
+    # Use timedelta relative to the other date. date1 is always before date2
+    if type(date1) is datetime.timedelta:
+        date1 = date2-date1
+    if type(date2) is datetime.timedelta:
+        date2 = date1+date2
+    if  date1 > date2:
+        logger.error("date1 must precede date2.\nGot {} {}".format(date1, date2))
+        raise ValueError("date1 and date2 not valid")
+    ## _-_-_-_-_-_-_-_-_-_-_-_-_-_-_-_-_-_-_-_-_-_-_-_-_-_-_-_-_-_-_-_-_-_-_-_-_
+    #      Perform extraction and return
+    if type(attr) is dict:
+        d=dict()
+        for k,v in attr.items():
+            try:
+                d.update({k:_extract_attribute(v, method, date1, date2, db)})
+            except Exception as e:
+                logger.debug("Exception in _extract_attribute(): "+str(e))
+                logger.debug(traceback.print_tb(e.__traceback__))
+                logger.error("Could not extract {}.".format(v))
+        return d
+    if type(attr) in (list,tuple):
+        d=[]
+        for v in attr:
+            try:
+                d.append(_extract_attribute(v, method, date1, date2, db))
+            except Exception as e:
+                logger.debug("Exception in _extract_attribute(): "+str(e))
+                logger.debug(traceback.print_tb(e.__traceback__))
+                logger.error("Could not extract {}.".format(v))
+        return d
+    try:
+        d=_extract_attribute(attr, method, date1, date2, db)
+    except Exception as e:
+        logger.debug("Exception in _extract_attribute(): "+str(e))
+        logger.debug(traceback.print_tb(e.__traceback__))
+        logger.error("Could not extract {}.".format(attr))
+        return None
+    return d
 ##----------------------------------------------------------------------##
 def findattr(pattern, db="H"):
@@ -349,65 +539,18 @@ def infoattr(attribute, db='H'):
    return info
-##---------------------------------------------------------------------------##
+##########################################################################
-def ExtrBetweenDates(
+###                    Module core functions                           ###
-        attribute,
+##########################################################################
-        dateStart,
-        dateStop=None,
-        db='H',
-        ):
-    """
-    Query attribute data from an archiver database, get all points between dates.
-    Use ExtractBetweenDates.
-    Parameters
-    ----------
-    attribute : String
-        Name of the attribute. Full Tango name i.e. "test/dg/panda/current".
-    dateStart : datetime.datetime, string
-        Start date for extraction. If string, it will be parsed.
-        Example of string format %Y-%m-%d-%H:%M:%S or less precise.
-    dateStop : datetime.datetime, string, None
-        Stop date for extraction.
-        If string, it will be parsed.
-        Example of string format %Y-%m-%d-%H:%M:%S or less precise.
-        If None, it takes the current date and time.
-        Default is None (now).
-    db: str
-        Which database to look in, 'H' or 'T'.
-    Exceptions
-    ----------
-    ValueError
-        The attribute is not found in the database.
-    Returns
-    -------
-    [date, value] : array
-        date : numpy.ndarray of datetime.datime objects
-            Dates of the values
-        value : numpy.ndarray
-            Archived values
+def _extract_attribute(attribute, method, date1, date2, db):
+    """
+    Check if exists, check scalar or spectrum and dispatch
    """
-    if not _check_initialized():
-        return
-    if not db in ("H", "T"):
-        raise AttributeError("Attribute db should be 'H' or 'T'")
    # Uncapitalize attribute
    attribute = attribute.lower()
+    _check_attribute(attribute, db)
-    # Check attribute is in database
-    _check_attribute(attribute, db=db)
-    # Parse dates
-    dateStart = _dateparse(dateStart)
-    dateStop = _dateparse(dateStop)
    # Get info about the attribute
    info=infoattr(attribute, db=db)
@@ -425,63 +568,132 @@ def ExtrBetweenDates(
                attribute, info["max_dim_x"]))
            attrtype="vector"
-    # Cut the time horizon in chunks
-    cdates = _chunkerize(attribute, dateStart, dateStop, db)
-    # Arrays to hold every chunks
-    value = []
-    date = []
-    # For each date chunk
-    for i_d in range(len(cdates)-1):
    # =============
    # For now we handle multi dimension the same way as scalar, which will get only the first element
    if (attrtype=="scalar") or (attrtype=="multi"):
-            # Inform on retrieval request
+        if info["data_type"] == '1':
-            logger.info("Perform ExtractBetweenDates (%s, %s, %s)"%(
+            # Boolean data type, quick fix
+            dtype=bool
+        else:
+            dtype=float
+        return _extract_scalar(attribute, method, date1, date2, db, dtype)
+    if attrtype=="vector":
+        return _extract_vector(attribute, method, date1, date2, db)
+##---------------------------------------------------------------------------##
+def _extract_scalar(attribute, method, date1, date2, db, dtype):
+    # =====================
+    if method == "nearest":
+        cmdreturn = _cmd_with_retry(_extractors[{'H':0, 'T':1}[db]], "GetNearestValue", [
                                                attribute,
-                cdates[i_d].strftime(_DBDFMT),
+                                                date1.strftime(_DBDFMT),
-                cdates[i_d+1].strftime(_DBDFMT))
+                                                ])
-                )
+        # Unpack return
+        try:
+            _date, _value = cmdreturn.split(';')
+        except TypeError:
+            logger.error("Could not extract this chunk. Check the device extractor")
+            return None
+        # Transform by datatype
+        if dtype is bool:
+            _value = _cast_bool(_value)
+        # Fabricate return pandas.Series
+        d=pd.Series(index=[datetime.datetime.fromtimestamp(int(_date)/1000),], data=[_value,], name=attribute)
+        return d
+    # =====================
+    if method == "between":
+        # Cut the time horizon in chunks
+        cdates = _chunkerize(attribute, date1, date2, db)
+        # Array to hold data
+        data = []
+        # For each date chunk
+        for i_d in range(len(cdates)-1):
            cmdreturn = _cmd_with_retry(_extractors[{'H':0, 'T':1}[db]], "ExtractBetweenDates", [
                                                    attribute,
                                                    cdates[i_d].strftime(_DBDFMT),
                                                    cdates[i_d+1].strftime(_DBDFMT)
                                                    ])
-            # Check command return
-            if cmdreturn is None:
-                logger.error("Could not extract this chunk. Check the device extractor")
-                return None
            # Unpack return
+            try:
                _date, _value = cmdreturn
+            except TypeError:
+                logger.error("Could not extract this chunk. Check the device extractor")
+                return None
            # Transform to datetime - value arrays
-            # NOTE: it is faster than using pandas.to_datetime()
+            if dtype is bool:
-            _value = np.asarray(_value, dtype=float)
+                _value = _cast_bool(_value)
+            else:
+                _value = np.asarray(_value, dtype=dtype)
            if len(_date) > 0:
                _date = _ArrayTimeStampToDatetime(_date/1000.0)
-            value.append(_value)
+            # Fabricate return pandas.Series
-            date.append(_date)
+            data.append(pd.Series(index=_date, data=_value, name=attribute))
-        # =============
+        # Concatenate chunks
-        if attrtype=="vector":
+        return pd.concat(data)
-            logger.info("Perform GetAttDataBetweenDates (%s, %s, %s)"%(
+    # ========================
+    if method == "minmaxmean":
+        # If we are here, the method is not implemented
+        logger.error("Method {} is not implemented for scalars.".format(method))
+        raise NotImplemented
+##---------------------------------------------------------------------------##
+def _extract_vector(attribute, method, date1, date2, db):
+    # Get info about the attribute
+    info=infoattr(attribute, db=db)
+    # =====================
+    if method == "nearest":
+        # Get nearest does not work with vector.
+        # Make a between date with surounding dates.
+        # Dynamically find surounding
+        cnt=0
+        dt=datetime.timedelta(seconds=10)
+        while cnt<1:
+            logger.debug("Seeking points in {} to {}".format(date1-dt,date1+dt))
+            cnt=_extractors[{'H':0, 'T':1}[db]].GetAttDataBetweenDatesCount([
                    attribute,
-                                                    cdates[i_d].strftime(_DBDFMT),
+                    (date1-dt).strftime(_DBDFMT2),
-                                                    cdates[i_d+1].strftime(_DBDFMT)
+                    (date1+dt).strftime(_DBDFMT2)
-                                                    ))
+                    ])
+            dt=dt*1.5
+        logger.debug("Found {} points in a +- {} interval".format(cnt,str(dt/1.5)))
-            [N,], [name,] = _extractors[{'H':0, 'T':1}[db]].GetAttDataBetweenDates([
+        # For vector, we have to use the GetAttxxx commands
+        cmdreturn = _cmd_with_retry(_extractors[{'H':0, 'T':1}[db]], "GetAttDataBetweenDates", [
                                                attribute,
-                cdates[i_d].strftime(_DBDFMT),
+                                                (date1-dt).strftime(_DBDFMT),
-                cdates[i_d+1].strftime(_DBDFMT)
+                                                (date1+dt).strftime(_DBDFMT),
                                                ])
+        # Unpack return
+        try:
+            [N,], [name,] = cmdreturn
            N=int(N)
+        except TypeError:
+            logger.error("Could not extract this attribute. Check the device extractor")
+            return None
        # Read the history
        logger.debug("Retrieve history of %d values. Dynamic attribute named %s."%(N, name))
@@ -495,23 +707,18 @@ def ExtrBetweenDates(
            _value[i_h,:attrHist[i_h].dim_x]=attrHist[i_h].value
            _date[i_h]=attrHist[i_h].time.todatetime()
-            # Remove dynamic attribute
+        # Seeking nearest entry
-            logger.debug("Remove dynamic attribute %s."%name)
+        idx=np.argmin(abs(_date-date1))
-            _extractors[{'H':0, 'T':1}[db]].RemoveDynamicAttribute(name)
+        logger.debug("Found nearest value at index {}: {}".format(idx, _date[idx]))
-            value.append(_value)
+        # Fabricate return pandas.Series
-            date.append(_date)
+        d=pd.Series(index=[_date[idx],], data=[_value[idx],], name=attribute)
-    logger.debug("Concatenate chunks")
+        return d
-    value = np.concatenate(value)
-    date = np.concatenate(date)
-    logger.debug("Extraction done for %s."%attribute)
+    # If we are here, the method is not implemented
-    if attrtype=="vector":
+    logger.error("Method {} is not implemented for vectors.".format(method))
-        return pd.DataFrame(index=date, data=value).dropna(axis=1, how='all')
+    raise NotImplemented
-    else:
-        return pd.Series(index=date, data=value)
 ##---------------------------------------------------------------------------##
@@ -644,3 +851,7 @@ def ExtrBetweenDates_MinMaxMean(
                "Max":value_max,
                },)
+## ## ## ## ## ## ## ## ## ## ## ## ## ## ## ## ## ## ## ## ## ## ## ##
+## Initialize on import
+## ## ## ## ## ## ## ## ## ## ## ## ## ## ## ## ## ## ## ## ## ## ## ##
+init()
--- a/README.md
+++ b/README.md
@@ -13,12 +13,9 @@ Usage example, with an ipython prompt
 ```python
 In [1]: import pySoleilControl.ArchiveExtractor as AE
-In [2]: # For now, we need manual initialization of the module
+In [2]: # Looking for an attribute in HDB
-   ...: AE.init()
-In [3]: # Looking for an attribute in HDB
   ...: AE.findattr("ans/dg/*dcct*")
-Out[3]: 
+Out[2]: 
 ['ANS/DG/DCCT-CTRL/State',
 'ANS/DG/DCCT-CTRL/Status',
 'ANS/DG/DCCT-CTRL/current',
@@ -26,10 +23,10 @@ Out[3]:
 'ANS/DG/DCCT-CTRL/lifeTime',
 'ANS/DG/DCCT-CTRL/lifeTimeErr']
-In [4]: # Get data between two dates, this return a pandas.Dataframe object
+In [3]: # Get data between two dates, this return a pandas.Dataframe object
   ...: AE.ExtrBetweenDates('ANS/DG/DCCT-CTRL/current', '2021-12-13', '2021-12-13-12:00')
 INFO:Perform ExtractBetweenDates (ans/dg/dcct-ctrl/current, 2021-12-13 00:00:00, 2021-12-13 12:00:00)
-Out[4]: 
+Out[3]: 
 2021-12-13 00:00:00    450.993568
 2021-12-13 00:00:01    450.981979
 2021-12-13 00:00:02    450.971455
@@ -43,11 +40,11 @@ Out[4]:
 2021-12-13 12:00:00     15.005410
 Length: 42725, dtype: float64
-In [5]: # Get min, max and mean with a 10 minute window
+In [4]: # Get min, max and mean with a 10 minute window
   ...: d=AE.ExtrBetweenDates_MinMaxMean('ANS/DG/DCCT-CTRL/current', '2021-12-13', '2021-12-13-12:00', timeInterval='10m')
-In [6]: d
+In [5]: d
-Out[6]: 
+Out[5]: 
                            Min        Mean         Max
 2021-12-13 00:05:00  449.762286  450.619654  451.617095
 2021-12-13 00:15:00  449.761171  450.676306  451.595391
@@ -64,14 +61,14 @@ Out[6]:
 [72 rows x 3 columns]
-In [7]: # Activate inline matplotlib
+In [6]: # Activate inline matplotlib
   ...: %matplotlib
 Using matplotlib backend: TkAgg
-In [7]: # Simply plot
+In [6]: # Simply plot
   ...: d.plot()
-In [8]: # ipython prompt supports autocompletion. The doc of function can be quickly read by adding a '?'
+In [7]: # ipython prompt supports autocompletion. The doc of function can be quickly read by adding a '?'
    ...: AE.ExtrBetweenDates?
 Signature: AE.ExtrBetweenDates(attribute, dateStart, dateStop=None, db='H')
 Docstring: