Merge fixes and feat

* Attribute type casting improved * Unique entry function "extract" * Multiexport * Timedelta

Merge fixes and feat
e7d2b3ae · BRONES Romain · dbdea2bf · d0fd0633 · e7d2b3ae · e7d2b3ae
Commit e7d2b3ae authored 1 year ago by BRONES Romain
--- a/ArchiveExtractor.py
+++ b/ArchiveExtractor.py
 """
-Python module for extracting attribute from Arhive Extractor Device.
+Python module for extracting attribute from Archive Extractor Device.
 """
 import logging
 import datetime
 import numpy as np
 import PyTango as tango
 import pandas as pd
+import traceback

 __version__ = "1.0.1"

@@ -38,8 +39,16 @@ _DBDFMT2 = "%d-%m-%Y %H:%M:%S"
 ##########################################################################

 # Vectorized fromtimestamp function
+# NOTE: it is faster than using pandas.to_datetime()
 _ArrayTimeStampToDatetime = np.vectorize(datetime.datetime.fromtimestamp)

+# Vectorized bool map dictionnary
+_ArrayStr2Bool = np.vectorize({
+    "true":True, 't':True,
+    "false":False, 'f':False,
+    }.get)
+
+
 def _check_initialized():
    """
    Check if the module is initialized.
@@ -57,14 +66,16 @@ def _check_initialized():
 ##----------------------------------------------------------------------##
 def _dateparse(datestr):
    """
-    Convenient function to parse date strings.
-    Global format is %Y-%m-%d-%H:%M:%S and it can be reduced to be less precise.
+    Convenient function to parse date or duration strings.
+    Exact date format is %Y-%m-%d-%H:%M:%S and it can be reduced to be less precise.
+    Duration format is 'Xu' where X is a number and u is a unit in ('m':minutes, 'h':hours, 'd':days, 'M':months)
    If datstr is None, take the actual date and time.

    Parameters
    ---------
    datestr : string
        Date as a string, format %Y-%m-%d-%H:%M:%S or less precise.
+        Duration as a string, format 'Xu' where X is a number and u is a unit in ('m':minutes, 'h':hours, 'd':days, 'M':months)

    Exceptions
    ----------
@@ -73,35 +84,52 @@ def _dateparse(datestr):

    Returns
    -------
-    date : datetime.datetime
-        Parsed date
+    date : datetime.datetime or datetime.timedelta
+        Parsed date or duration
    """
+    logger.debug("Parsing date string '%s'"%datestr)
+
+    # Determine date/duration by looking at the last char
+    if datestr[-1] in "mhdM":
+        # Duration
+        logger.debug("Assuming a duration")

-    if datestr is None:
-        return datetime.datetime.now()
-
-    # This gives all format that will be tried, in order.
-    # Stop on first parse success. Raise error if none succeed.
-    fmt = [
-        "%Y-%m-%d-%H:%M:%S",
-        "%Y-%m-%d-%H:%M",
-        "%Y-%m-%d-%H",
-        "%Y-%m-%d",
-        "%Y-%m",
-        ]
-
-    date = None
-    for f in fmt:
        try:
-            date = datetime.datetime.strptime(datestr, f)
-        except ValueError:
-            continue
-        else:
-            break
+            q=float(datestr[:-1])
+        except ValueError as e:
+            logger.error("Failed to parse date string. Given the last character, a duration was assumed.")
+            raise Exception("Could not parse argument to a date") from e
+
+        # Convert all in minutes
+        minutes = q*{'m':1, 'h':60, 'd':60*24, 'm':30*60*24}[datestr[-1]]
+
+        return datetime.timedelta(minutes=minutes)
+
    else:
-        raise ValueError("Could not parse argument to a date")
+        # Probably a date string
+
+        # This gives all format that will be tried, in order.
+        # Stop on first parse success. Raise error if none succeed.
+        fmt = [
+            "%Y-%m-%d-%H:%M:%S",
+            "%Y-%m-%d-%H:%M",
+            "%Y-%m-%d-%H",
+            "%Y-%m-%d",
+            "%Y-%m",
+            ]
+
+        date = None
+        for f in fmt:
+            try:
+                date = datetime.datetime.strptime(datestr, f)
+            except ValueError:
+                continue
+            else:
+                break
+        else:
+            raise ValueError("Could not parse argument to a date")

-    return date
+        return date

 ##----------------------------------------------------------------------##
 def _check_attribute(attribute, db):
@@ -204,6 +232,7 @@ def _cmd_with_retry(dp, cmd, arg, retry=2):
        Whatever the command returns.
        None if failed after the amount of retries.
    """
+    logger.info("Perform Command {} {}".format(cmd, arg))

    for i in range(retry):
        # Make retrieval request
@@ -221,6 +250,48 @@ def _cmd_with_retry(dp, cmd, arg, retry=2):
        break
    return cmdreturn

+
+def _cast_bool(value):
+    """
+    Cast a value, or array of values, to boolean.
+    Try to assess the input data type. If string, then try to find true or false word inside.
+
+    Parameters:
+    -----------
+    value: string, integer, or array of such
+        value to convert.
+
+    Return:
+    boolean:
+        value or array of boolean.
+    """
+
+    # Force to array
+    value = np.asarray(value)
+
+    # cast back to single value
+    def castback(v):
+        if v.shape == ():
+            return v.item()
+        return v
+
+    # Simply try to cast to bool first
+    try:
+        value = value.astype("bool")
+        logger.debug("Direct conversion to boolean")
+        return castback(value)
+    except ValueError:
+        # Keep trying to cast
+        pass
+
+    logger.debug("Try to convert to boolean")
+
+    value = np.char.strip(np.char.lower(value))
+    value = _ArrayStr2Bool(value)
+
+    return castback(value)
+
+
 ##########################################################################
 ###                  Module private variables                          ###
 ##########################################################################
@@ -243,11 +314,13 @@ def init(
    Initialize the module.
    Instanciate tango.DeviceProxy for extractors (TDB and HDB)

-        HdbExtractorPath, TdbExtractorPath: string
-            Tango path to the extractors.
+    Parameters:
+    -----------
+    HdbExtractorPath, TdbExtractorPath: string
+        Tango path to the extractors.

-        loglevel: string
-            loglevel to pass to logging.Logger
+    loglevel: string
+        loglevel to pass to logging.Logger
    """
    global _extractors
    global _AttrTables
@@ -274,6 +347,123 @@ def init(
    _AttrTables = tuple(e.getattnameall() for e in _extractors)
    logger.debug("HDB: {} TDB: {} attributes counted".format(len(_AttrTables[0]), len(_AttrTables[1])))

+##########################################################################
+###                    Module access functions                         ###
+##########################################################################
+
+def extract(
+        attr,
+        date1, date2=None,
+        method="nearest",
+        db='H',
+        ):
+    """
+    Access function to perform extraction between date1 and date2.
+    Can extract one or several attributes.
+    date1 and date2 can be both exact date, or one of two can be a time interval that will be taken relative to the other.
+
+
+    Parameters:
+    -----------
+    attr: string, list, dict
+        Attribute(s) to extract.
+        If string, extract the given attribute, returning a pandas.Series.
+        If list, extract attributes and return a list of pandas.Series.
+        If a dict, extract attributes and return a dict of pandas.Series with same keys.
+
+    date1, date2: string, datetime.datetime, datetime.timedelta, None
+        Exact date, or duration relative to date2.
+        If string, it will be parsed.
+        A start date can be given with string format '%Y-%m-%d-%H:%M:%S' or less precise (ie '2021-02', '2022-11-03' '2022-05-10-21:00'.i..).
+        A duration can be given with string format 'Xu' where X is a number and u is a unit in ('m':minutes, 'h':hours, 'd':days, 'M':months)
+        A datetime.datetime object or datetime.timedelta object will be used as is.
+        date2 can be None. In that case it is replaced by the current time.
+
+    method: str
+        Method of extraction
+            'nearest': Retrieve nearest value of date1, date2 is ignored.
+            'between': Retrive data between date1 and date2.
+
+    db: str
+        Which database to look in, 'H' or 'T'.
+
+    """
+
+    ## _-_-_-_-_-_-_-_-_-_-_-_-_-_-_-_-_-_-_-_-_-_-_-_-_-_-_-_-_-_-_-_-_-_-_-_-_
+    #    Perform a few sanity checks
+    if not _check_initialized():
+        # Stop here, the function has produced a message if necessary
+        return
+
+    if not db in ("H", "T"):
+        raise ValueError("Attribute 'db' should be 'H' or 'T'")
+
+
+    allowedmethods=("nearest", "between", "minmaxmean")
+    if not method in allowedmethods:
+        raise ValueError("Attribute 'method' should be in {}".format(str(allowedmethods)))
+
+    ## _-_-_-_-_-_-_-_-_-_-_-_-_-_-_-_-_-_-_-_-_-_-_-_-_-_-_-_-_-_-_-_-_-_-_-_-_
+    #     Work with dates
+    if not type(date1) in (datetime.datetime, datetime.timedelta):
+        date1 = _dateparse(date1)
+    if date2 is None:
+        date2 = datetime.datetime.now()
+    else:
+        if not type(date2) in (datetime.datetime, datetime.timedelta):
+            date2 = _dateparse(date2)
+
+    if not datetime.datetime in (type(date1), type(date2)):
+        logger.error("One of date1 date2 should be an exact date.\nGot {} {}".format(date1, date2))
+        raise ValueError("date1 and date2 not valid")
+
+    # Use timedelta relative to the other date. date1 is always before date2
+    if type(date1) is datetime.timedelta:
+        date1 = date2-date1
+    if type(date2) is datetime.timedelta:
+        date2 = date1+date2
+
+    if  date1 > date2:
+        logger.error("date1 must precede date2.\nGot {} {}".format(date1, date2))
+        raise ValueError("date1 and date2 not valid")
+
+    ## _-_-_-_-_-_-_-_-_-_-_-_-_-_-_-_-_-_-_-_-_-_-_-_-_-_-_-_-_-_-_-_-_-_-_-_-_
+    #      Perform extraction and return
+
+    if type(attr) is dict:
+        d=dict()
+        for k,v in attr.items():
+            try:
+                d.update({k:_extract_attribute(v, method, date1, date2, db)})
+            except Exception as e:
+                logger.debug("Exception in _extract_attribute(): "+str(e))
+                logger.debug(traceback.print_tb(e.__traceback__))
+                logger.error("Could not extract {}.".format(v))
+
+        return d
+
+    if type(attr) in (list,tuple):
+        d=[]
+        for v in attr:
+            try:
+                d.append(_extract_attribute(v, method, date1, date2, db))
+            except Exception as e:
+                logger.debug("Exception in _extract_attribute(): "+str(e))
+                logger.debug(traceback.print_tb(e.__traceback__))
+                logger.error("Could not extract {}.".format(v))
+
+        return d
+
+    try:
+        d=_extract_attribute(attr, method, date1, date2, db)
+    except Exception as e:
+        logger.debug("Exception in _extract_attribute(): "+str(e))
+        logger.debug(traceback.print_tb(e.__traceback__))
+        logger.error("Could not extract {}.".format(attr))
+        return None
+
+    return d
+

 ##----------------------------------------------------------------------##
 def findattr(pattern, db="H"):
@@ -349,65 +539,18 @@ def infoattr(attribute, db='H'):

    return info

-##---------------------------------------------------------------------------##
-def ExtrBetweenDates(
-        attribute,
-        dateStart,
-        dateStop=None,
-        db='H',
-        ):
-    """
-    Query attribute data from an archiver database, get all points between dates.
-    Use ExtractBetweenDates.
-
-    Parameters
-    ----------
-    attribute : String
-        Name of the attribute. Full Tango name i.e. "test/dg/panda/current".
-
-    dateStart : datetime.datetime, string
-        Start date for extraction. If string, it will be parsed.
-        Example of string format %Y-%m-%d-%H:%M:%S or less precise.
-
-    dateStop : datetime.datetime, string, None
-        Stop date for extraction.
-        If string, it will be parsed.
-        Example of string format %Y-%m-%d-%H:%M:%S or less precise.
-        If None, it takes the current date and time.
-        Default is None (now).
-
-    db: str
-        Which database to look in, 'H' or 'T'.
-
-    Exceptions
-    ----------
-    ValueError
-        The attribute is not found in the database.
-
-    Returns
-    -------
-    [date, value] : array
-        date : numpy.ndarray of datetime.datime objects
-            Dates of the values
-        value : numpy.ndarray
-            Archived values
+##########################################################################
+###                    Module core functions                           ###
+##########################################################################

+def _extract_attribute(attribute, method, date1, date2, db):
+    """
+    Check if exists, check scalar or spectrum and dispatch
    """

-    if not _check_initialized():
-        return
-
-    if not db in ("H", "T"):
-        raise AttributeError("Attribute db should be 'H' or 'T'")
    # Uncapitalize attribute
    attribute = attribute.lower()
-
-    # Check attribute is in database
-    _check_attribute(attribute, db=db)
-
-    # Parse dates
-    dateStart = _dateparse(dateStart)
-    dateStop = _dateparse(dateStop)
+    _check_attribute(attribute, db)

    # Get info about the attribute
    info=infoattr(attribute, db=db)
@@ -425,93 +568,157 @@ def ExtrBetweenDates(
                attribute, info["max_dim_x"]))
            attrtype="vector"

-    # Cut the time horizon in chunks
-    cdates = _chunkerize(attribute, dateStart, dateStop, db)
+    # =============
+    # For now we handle multi dimension the same way as scalar, which will get only the first element
+    if (attrtype=="scalar") or (attrtype=="multi"):
+        if info["data_type"] == '1':
+            # Boolean data type, quick fix
+            dtype=bool
+        else:
+            dtype=float

-    # Arrays to hold every chunks
-    value = []
-    date = []
+        return _extract_scalar(attribute, method, date1, date2, db, dtype)
+    if attrtype=="vector":
+        return _extract_vector(attribute, method, date1, date2, db)

-    # For each date chunk
-    for i_d in range(len(cdates)-1):

-        # =============
-        # For now we handle multi dimension the same way as scalar, which will get only the first element
-        if (attrtype=="scalar") or (attrtype=="multi"):
-            # Inform on retrieval request
-            logger.info("Perform ExtractBetweenDates (%s, %s, %s)"%(
-                attribute,
-                cdates[i_d].strftime(_DBDFMT),
-                cdates[i_d+1].strftime(_DBDFMT))
-                )
+##---------------------------------------------------------------------------##
+def _extract_scalar(attribute, method, date1, date2, db, dtype):
+
+    # =====================
+    if method == "nearest":
+        cmdreturn = _cmd_with_retry(_extractors[{'H':0, 'T':1}[db]], "GetNearestValue", [
+                                                attribute,
+                                                date1.strftime(_DBDFMT),
+                                                ])
+
+        # Unpack return
+        try:
+            _date, _value = cmdreturn.split(';')
+        except TypeError:
+            logger.error("Could not extract this chunk. Check the device extractor")
+            return None
+
+        # Transform by datatype
+        if dtype is bool:
+            _value = _cast_bool(_value)

+        # Fabricate return pandas.Series
+        d=pd.Series(index=[datetime.datetime.fromtimestamp(int(_date)/1000),], data=[_value,], name=attribute)
+
+        return d
+
+    # =====================
+    if method == "between":
+        # Cut the time horizon in chunks
+        cdates = _chunkerize(attribute, date1, date2, db)
+
+        # Array to hold data
+        data = []
+
+        # For each date chunk
+        for i_d in range(len(cdates)-1):
            cmdreturn = _cmd_with_retry(_extractors[{'H':0, 'T':1}[db]], "ExtractBetweenDates", [
                                                    attribute,
                                                    cdates[i_d].strftime(_DBDFMT),
                                                    cdates[i_d+1].strftime(_DBDFMT)
                                                    ])

-            # Check command return
-            if cmdreturn is None:
+
+            # Unpack return
+            try:
+                _date, _value = cmdreturn
+            except TypeError:
                logger.error("Could not extract this chunk. Check the device extractor")
                return None

-            # Unpack return
-            _date, _value = cmdreturn

            # Transform to datetime - value arrays
-            # NOTE: it is faster than using pandas.to_datetime()
-            _value = np.asarray(_value, dtype=float)
+            if dtype is bool:
+                _value = _cast_bool(_value)
+            else:
+                _value = np.asarray(_value, dtype=dtype)
+
            if len(_date) > 0:
                _date = _ArrayTimeStampToDatetime(_date/1000.0)

-            value.append(_value)
-            date.append(_date)
+            # Fabricate return pandas.Series
+            data.append(pd.Series(index=_date, data=_value, name=attribute))

-        # =============
-        if attrtype=="vector":
-            logger.info("Perform GetAttDataBetweenDates (%s, %s, %s)"%(
-                                                    attribute,
-                                                    cdates[i_d].strftime(_DBDFMT),
-                                                    cdates[i_d+1].strftime(_DBDFMT)
-                                                    ))
+        # Concatenate chunks
+        return pd.concat(data)

-            [N,], [name,] = _extractors[{'H':0, 'T':1}[db]].GetAttDataBetweenDates([
-                attribute,
-                cdates[i_d].strftime(_DBDFMT),
-                cdates[i_d+1].strftime(_DBDFMT)
-                ])
-            N=int(N)
+    # ========================
+    if method == "minmaxmean":

-            # Read the history
-            logger.debug("Retrieve history of %d values. Dynamic attribute named %s."%(N, name))
-            attrHist = _extractors[{'H':0, 'T':1}[db]].attribute_history(name, N)
+        # If we are here, the method is not implemented
+        logger.error("Method {} is not implemented for scalars.".format(method))
+        raise NotImplemented

-            # Transform to datetime - value arrays
-            _value = np.empty((N, int(info["max_dim_x"])), dtype=float)
-            _value[:] = np.nan
-            _date = np.empty(N, dtype=object)
-            for i_h in range(N):
-                _value[i_h,:attrHist[i_h].dim_x]=attrHist[i_h].value
-                _date[i_h]=attrHist[i_h].time.todatetime()
+##---------------------------------------------------------------------------##
+def _extract_vector(attribute, method, date1, date2, db):

-            # Remove dynamic attribute
-            logger.debug("Remove dynamic attribute %s."%name)
-            _extractors[{'H':0, 'T':1}[db]].RemoveDynamicAttribute(name)
+    # Get info about the attribute
+    info=infoattr(attribute, db=db)

+    # =====================
+    if method == "nearest":
+        # Get nearest does not work with vector.
+        # Make a between date with surounding dates.
+
+        # Dynamically find surounding
+        cnt=0
+        dt=datetime.timedelta(seconds=10)
+        while cnt<1:
+            logger.debug("Seeking points in {} to {}".format(date1-dt,date1+dt))
+            cnt=_extractors[{'H':0, 'T':1}[db]].GetAttDataBetweenDatesCount([
+                    attribute,
+                    (date1-dt).strftime(_DBDFMT2),
+                    (date1+dt).strftime(_DBDFMT2)
+                    ])
+            dt=dt*1.5
+        logger.debug("Found {} points in a +- {} interval".format(cnt,str(dt/1.5)))
+
+
+        # For vector, we have to use the GetAttxxx commands
+        cmdreturn = _cmd_with_retry(_extractors[{'H':0, 'T':1}[db]], "GetAttDataBetweenDates", [
+                                                attribute,
+                                                (date1-dt).strftime(_DBDFMT),
+                                                (date1+dt).strftime(_DBDFMT),
+                                                ])
+
+        # Unpack return
+        try:
+            [N,], [name,] = cmdreturn
+            N=int(N)
+        except TypeError:
+            logger.error("Could not extract this attribute. Check the device extractor")
+            return None

-            value.append(_value)
-            date.append(_date)
+        # Read the history
+        logger.debug("Retrieve history of %d values. Dynamic attribute named %s."%(N, name))
+        attrHist = _extractors[{'H':0, 'T':1}[db]].attribute_history(name, N)

-    logger.debug("Concatenate chunks")
-    value = np.concatenate(value)
-    date = np.concatenate(date)
+        # Transform to datetime - value arrays
+        _value = np.empty((N, int(info["max_dim_x"])), dtype=float)
+        _value[:] = np.nan
+        _date = np.empty(N, dtype=object)
+        for i_h in range(N):
+            _value[i_h,:attrHist[i_h].dim_x]=attrHist[i_h].value
+            _date[i_h]=attrHist[i_h].time.todatetime()

-    logger.debug("Extraction done for %s."%attribute)
-    if attrtype=="vector":
-        return pd.DataFrame(index=date, data=value).dropna(axis=1, how='all')
-    else:
-        return pd.Series(index=date, data=value)
+        # Seeking nearest entry
+        idx=np.argmin(abs(_date-date1))
+        logger.debug("Found nearest value at index {}: {}".format(idx, _date[idx]))
+
+        # Fabricate return pandas.Series
+        d=pd.Series(index=[_date[idx],], data=[_value[idx],], name=attribute)
+
+        return d
+
+    # If we are here, the method is not implemented
+    logger.error("Method {} is not implemented for vectors.".format(method))
+    raise NotImplemented


 ##---------------------------------------------------------------------------##
@@ -644,3 +851,7 @@ def ExtrBetweenDates_MinMaxMean(
                "Max":value_max,
                },)

+## ## ## ## ## ## ## ## ## ## ## ## ## ## ## ## ## ## ## ## ## ## ## ##
+## Initialize on import
+## ## ## ## ## ## ## ## ## ## ## ## ## ## ## ## ## ## ## ## ## ## ## ##
+init()
--- a/README.md
+++ b/README.md
@@ -13,12 +13,9 @@ Usage example, with an ipython prompt
 ```python
 In [1]: import pySoleilControl.ArchiveExtractor as AE

-In [2]: # For now, we need manual initialization of the module
-   ...: AE.init()
-
-In [3]: # Looking for an attribute in HDB
+In [2]: # Looking for an attribute in HDB
   ...: AE.findattr("ans/dg/*dcct*")
-Out[3]: 
+Out[2]: 
 ['ANS/DG/DCCT-CTRL/State',
 'ANS/DG/DCCT-CTRL/Status',
 'ANS/DG/DCCT-CTRL/current',
@@ -26,10 +23,10 @@ Out[3]:
 'ANS/DG/DCCT-CTRL/lifeTime',
 'ANS/DG/DCCT-CTRL/lifeTimeErr']

-In [4]: # Get data between two dates, this return a pandas.Dataframe object
+In [3]: # Get data between two dates, this return a pandas.Dataframe object
   ...: AE.ExtrBetweenDates('ANS/DG/DCCT-CTRL/current', '2021-12-13', '2021-12-13-12:00')
 INFO:Perform ExtractBetweenDates (ans/dg/dcct-ctrl/current, 2021-12-13 00:00:00, 2021-12-13 12:00:00)
-Out[4]: 
+Out[3]: 
 2021-12-13 00:00:00    450.993568
 2021-12-13 00:00:01    450.981979
 2021-12-13 00:00:02    450.971455
@@ -43,11 +40,11 @@ Out[4]:
 2021-12-13 12:00:00     15.005410
 Length: 42725, dtype: float64

-In [5]: # Get min, max and mean with a 10 minute window
+In [4]: # Get min, max and mean with a 10 minute window
   ...: d=AE.ExtrBetweenDates_MinMaxMean('ANS/DG/DCCT-CTRL/current', '2021-12-13', '2021-12-13-12:00', timeInterval='10m')

-In [6]: d
-Out[6]: 
+In [5]: d
+Out[5]: 
                            Min        Mean         Max
 2021-12-13 00:05:00  449.762286  450.619654  451.617095
 2021-12-13 00:15:00  449.761171  450.676306  451.595391
@@ -64,14 +61,14 @@ Out[6]:
 [72 rows x 3 columns]


-In [7]: # Activate inline matplotlib
+In [6]: # Activate inline matplotlib
   ...: %matplotlib
 Using matplotlib backend: TkAgg

-In [7]: # Simply plot
+In [6]: # Simply plot
   ...: d.plot()

-In [8]: # ipython prompt supports autocompletion. The doc of function can be quickly read by adding a '?'
+In [7]: # ipython prompt supports autocompletion. The doc of function can be quickly read by adding a '?'
    ...: AE.ExtrBetweenDates?
 Signature: AE.ExtrBetweenDates(attribute, dateStart, dateStop=None, db='H')
 Docstring:
@@ -114,4 +111,4 @@ Type:      function



-```
\ No newline at end of file
+```