On the way for module style

47234c7d · BRONES Romain · e7d2b3ae · 47234c7d · 47234c7d · 47234c7d
Commit 47234c7d authored 1 year ago by BRONES Romain
--- a/ArchiveExtractor/Access.py
+++ b/ArchiveExtractor/Access.py
+import logging
+import datetime
+import numpy as np
+import tango
+import pandas as pd
+import traceback
+import ArchiveExtractor.Amenities as aea
+import ArchiveExtractor.Core as aec
+
+##########################################################################
+###                 Install logger for the module                      ###
+##########################################################################
+logger = logging.getLogger(__name__)
+#logger.setLevel(getattr(logging, logger.upper()))
+
+if not logger.hasHandlers():
+    # No handlers, create one
+    sh = logging.StreamHandler()
+    sh.setLevel(logger.level)
+    sh.setFormatter(logging.Formatter("%(levelname)s:%(message)s"))
+    logger.addHandler(sh)
+
+
+##########################################################################
+###                  Module private variables                          ###
+##########################################################################
+# Tuple of extractor for HDB and TDB
+_extractors = (None, None)
+
+# Tuple for attribute tables
+_AttrTables = (None, None)
+
+##########################################################################
+###                Module initialisation functions                     ###
+##########################################################################
+
+def init(
+        HdbExtractorPath="archiving/hdbextractor/2",
+        TdbExtractorPath="archiving/tdbextractor/2",
+        loglevel="info",
+            ):
+    """
+    Initialize the module.
+    Instanciate tango.DeviceProxy for extractors (TDB and HDB)
+
+    Parameters:
+    -----------
+    HdbExtractorPath, TdbExtractorPath: string
+        Tango path to the extractors.
+
+    loglevel: string
+        loglevel to pass to logging.Logger
+    """
+    global _extractors
+    global _AttrTables
+
+    _extractors = (None, None)
+    _AttrTables = (None, None)
+
+    try:
+        logger.setLevel(getattr(logging, loglevel.upper()))
+    except AttributeError:
+        logger.error("Wrong log level specified: {}".format(loglevel.upper()))
+
+    logger.debug("Instanciating extractors device proxy...")
+
+    _extractors = (tango.DeviceProxy(HdbExtractorPath), tango.DeviceProxy(TdbExtractorPath))
+    logger.debug("{} and {} instanciated.".format(*_extractors))
+
+    logger.debug("Configuring extractors device proxy...")
+    for e in _extractors:
+        # set timeout to 3 sec
+        e.set_timeout_millis(3000)
+
+    logger.debug("Filling attributes lookup tables...")
+    _AttrTables = tuple(e.getattnameall() for e in _extractors)
+    logger.debug("HDB: {} TDB: {} attributes counted".format(len(_AttrTables[0]), len(_AttrTables[1])))
+
+##########################################################################
+###                    Module access functions                         ###
+##########################################################################
+
+def extract(
+        attr,
+        date1, date2=None,
+        method="nearest",
+        db='H',
+        ):
+    """
+    Access function to perform extraction between date1 and date2.
+    Can extract one or several attributes.
+    date1 and date2 can be both exact date, or one of two can be a time interval that will be taken relative to the other.
+
+
+    Parameters:
+    -----------
+    attr: string, list, dict
+        Attribute(s) to extract.
+        If string, extract the given attribute, returning a pandas.Series.
+        If list, extract attributes and return a list of pandas.Series.
+        If a dict, extract attributes and return a dict of pandas.Series with same keys.
+
+    date1, date2: string, datetime.datetime, datetime.timedelta, None
+        Exact date, or duration relative to date2.
+        If string, it will be parsed.
+        A start date can be given with string format '%Y-%m-%d-%H:%M:%S' or less precise (ie '2021-02', '2022-11-03' '2022-05-10-21:00'.i..).
+        A duration can be given with string format 'Xu' where X is a number and u is a unit in ('m':minutes, 'h':hours, 'd':days, 'M':months)
+        A datetime.datetime object or datetime.timedelta object will be used as is.
+        date2 can be None. In that case it is replaced by the current time.
+
+    method: str
+        Method of extraction
+            'nearest': Retrieve nearest value of date1, date2 is ignored.
+            'between': Retrive data between date1 and date2.
+
+    db: str
+        Which database to look in, 'H' or 'T'.
+
+    """
+
+    ## _-_-_-_-_-_-_-_-_-_-_-_-_-_-_-_-_-_-_-_-_-_-_-_-_-_-_-_-_-_-_-_-_-_-_-_-_
+    #    Perform a few sanity checks
+    if not aea._check_initialized():
+        # Stop here, the function has produced a message if necessary
+        return
+
+    if not db in ("H", "T"):
+        raise ValueError("Attribute 'db' should be 'H' or 'T'")
+
+
+    allowedmethods=("nearest", "between", "minmaxmean")
+    if not method in allowedmethods:
+        raise ValueError("Attribute 'method' should be in {}".format(str(allowedmethods)))
+
+    ## _-_-_-_-_-_-_-_-_-_-_-_-_-_-_-_-_-_-_-_-_-_-_-_-_-_-_-_-_-_-_-_-_-_-_-_-_
+    #     Work with dates
+    if not type(date1) in (datetime.datetime, datetime.timedelta):
+        date1 = aea._dateparse(date1)
+    if date2 is None:
+        date2 = datetime.datetime.now()
+    else:
+        if not type(date2) in (datetime.datetime, datetime.timedelta):
+            date2 = aea._dateparse(date2)
+
+    if not datetime.datetime in (type(date1), type(date2)):
+        logger.error("One of date1 date2 should be an exact date.\nGot {} {}".format(date1, date2))
+        raise ValueError("date1 and date2 not valid")
+
+    # Use timedelta relative to the other date. date1 is always before date2
+    if type(date1) is datetime.timedelta:
+        date1 = date2-date1
+    if type(date2) is datetime.timedelta:
+        date2 = date1+date2
+
+    if  date1 > date2:
+        logger.error("date1 must precede date2.\nGot {} {}".format(date1, date2))
+        raise ValueError("date1 and date2 not valid")
+
+    ## _-_-_-_-_-_-_-_-_-_-_-_-_-_-_-_-_-_-_-_-_-_-_-_-_-_-_-_-_-_-_-_-_-_-_-_-_
+    #      Perform extraction and return
+
+    if type(attr) is dict:
+        d=dict()
+        for k,v in attr.items():
+            try:
+                d.update({k:aec._extract_attribute(v, method, date1, date2, db)})
+            except Exception as e:
+                logger.debug("Exception in _extract_attribute(): "+str(e))
+                logger.debug(traceback.print_tb(e.__traceback__))
+                logger.error("Could not extract {}.".format(v))
+
+        return d
+
+    if type(attr) in (list,tuple):
+        d=[]
+        for v in attr:
+            try:
+                d.append(aec._extract_attribute(v, method, date1, date2, db))
+            except Exception as e:
+                logger.debug("Exception in _extract_attribute(): "+str(e))
+                logger.debug(traceback.print_tb(e.__traceback__))
+                logger.error("Could not extract {}.".format(v))
+
+        return d
+
+    try:
+        d=aec._extract_attribute(attr, method, date1, date2, db)
+    except Exception as e:
+        logger.debug("Exception in _extract_attribute(): "+str(e))
+        logger.debug(traceback.print_tb(e.__traceback__))
+        logger.error("Could not extract {}.".format(attr))
+        return None
+
+    return d
+
+
+##----------------------------------------------------------------------##
+def findattr(pattern, db="H"):
+    """
+    Search for an attribute path using the pattern given.
+    Case insensitive.
+
+    Parameters:
+    -----------
+    pattern: str
+        Pattern to search, wildchar * accepted.
+        example "dg*dcct*current"
+
+    db: str
+        Which database to look in, 'H' or 'T'.
+
+    Returns:
+    --------
+    results: (str,)
+        List of string match
+    """
+    if not aea._check_initialized():
+        return
+
+    if not db in ("H", "T"):
+        raise AttributeError("Attribute db should be 'H' or 'T'")
+
+    global _AttrTables
+
+    keywords=pattern.lower().split('*')
+
+    # Select DB
+    attr_table = _AttrTables[{'H':0, 'T':1}[db]]
+
+    matches = [attr for attr in attr_table if all(k in attr.lower() for k in keywords)]
+
+    return matches
+
+##----------------------------------------------------------------------##
+def infoattr(attribute, db='H'):
+    """
+    Get informations for an attribute and pack it into a python dict.
+
+    Parameters
+    ----------
+    attribute : String
+        Name of the attribute. Full Tango name i.e. "test/dg/panda/current".
+
+    db: str
+        Which database to look in, 'H' or 'T'.
+
+    Returns
+    -------
+    info : dict
+        Dictionnary of propertyname:propertyvalue
+    """
+    if not aea._check_initialized():
+        return
+
+    if not db in ("H", "T"):
+        raise AttributeError("Attribute db should be 'H' or 'T'")
+
+    info = dict()
+
+    for func in ("GetAttDefinitionData", "GetAttPropertiesData"):
+        R=getattr(_extractors[{'H':0, 'T':1}[db]], func)(attribute)
+        if not R is None:
+            for i in R:
+                _s=i.split("::")
+                info[_s[0]]=_s[1]
+        else:
+            logger.warning("Function %s on extractor returned None"%func)
+
+    return info
+
+## ## ## ## ## ## ## ## ## ## ## ## ## ## ## ## ## ## ## ## ## ## ## ##
+## Initialize on import
+## ## ## ## ## ## ## ## ## ## ## ## ## ## ## ## ## ## ## ## ## ## ## ##
+init()
--- a/ArchiveExtractor/Amenities.py
+++ b/ArchiveExtractor/Amenities.py
+
+
+##########################################################################
+###               Commodity private variables                          ###
+##########################################################################
+
+# Extractor date format for GetAttDataBetweenDates
+_DBDFMT = "%Y-%m-%d %H:%M:%S"
+
+# Extractor date format for GetNearestValue
+_DBDFMT2 = "%d-%m-%Y %H:%M:%S"
+
+##########################################################################
+###               Commodity private functions                          ###
+##########################################################################
+
+# Vectorized fromtimestamp function
+# NOTE: it is faster than using pandas.to_datetime()
+_ArrayTimeStampToDatetime = np.vectorize(datetime.datetime.fromtimestamp)
+
+# Vectorized bool map dictionnary
+_ArrayStr2Bool = np.vectorize({
+    "true":True, 't':True,
+    "false":False, 'f':False,
+    }.get)
+
+
+def _check_initialized():
+    """
+    Check if the module is initialized.
+
+    Returns
+    -------
+    success : boolean
+    """
+    global _extractors
+    if None in _extractors:
+        logger.error("Module {0} is not initialied. You should run {0}.init().".format(__name__))
+        return False
+    return True
+
+##----------------------------------------------------------------------##
+def _dateparse(datestr):
+    """
+    Convenient function to parse date or duration strings.
+    Exact date format is %Y-%m-%d-%H:%M:%S and it can be reduced to be less precise.
+    Duration format is 'Xu' where X is a number and u is a unit in ('m':minutes, 'h':hours, 'd':days, 'M':months)
+    If datstr is None, take the actual date and time.
+
+    Parameters
+    ---------
+    datestr : string
+        Date as a string, format %Y-%m-%d-%H:%M:%S or less precise.
+        Duration as a string, format 'Xu' where X is a number and u is a unit in ('m':minutes, 'h':hours, 'd':days, 'M':months)
+
+    Exceptions
+    ----------
+    ValueError
+        If the parsing failed.
+
+    Returns
+    -------
+    date : datetime.datetime or datetime.timedelta
+        Parsed date or duration
+    """
+    logger.debug("Parsing date string '%s'"%datestr)
+
+    # Determine date/duration by looking at the last char
+    if datestr[-1] in "mhdM":
+        # Duration
+        logger.debug("Assuming a duration")
+
+        try:
+            q=float(datestr[:-1])
+        except ValueError as e:
+            logger.error("Failed to parse date string. Given the last character, a duration was assumed.")
+            raise Exception("Could not parse argument to a date") from e
+
+        # Convert all in minutes
+        minutes = q*{'m':1, 'h':60, 'd':60*24, 'm':30*60*24}[datestr[-1]]
+
+        return datetime.timedelta(minutes=minutes)
+
+    else:
+        # Probably a date string
+
+        # This gives all format that will be tried, in order.
+        # Stop on first parse success. Raise error if none succeed.
+        fmt = [
+            "%Y-%m-%d-%H:%M:%S",
+            "%Y-%m-%d-%H:%M",
+            "%Y-%m-%d-%H",
+            "%Y-%m-%d",
+            "%Y-%m",
+            ]
+
+        date = None
+        for f in fmt:
+            try:
+                date = datetime.datetime.strptime(datestr, f)
+            except ValueError:
+                continue
+            else:
+                break
+        else:
+            raise ValueError("Could not parse argument to a date")
+
+        return date
+
+##----------------------------------------------------------------------##
+def _check_attribute(attribute, db):
+    """
+    Check that the attribute is in the database
+
+    Parameters
+    ----------
+    attribute : String
+        Name of the attribute. Full Tango name i.e. "test/dg/panda/current".
+
+    db: str
+        Which database to look in, 'H' or 'T'.
+    """
+    global _extractors
+
+    logger.debug("Check that %s is archived."%attribute)
+    if not _extractors[{'H':0, 'T':1}[db]].IsArchived(attribute):
+        logger.error("Attribute '%s' is not archived in DB %s"%(attribute, _extractors[{'H':0, 'T':1}[db]]))
+        raise ValueError("Attribute '%s' is not archived in DB %s"%(attribute, _extractors[{'H':0, 'T':1}[db]]))
+
+##----------------------------------------------------------------------##
+def _chunkerize(attribute, dateStart, dateStop, db, Nmax=100000):
+    """
+
+    Parameters
+    ----------
+    attribute : String
+        Name of the attribute. Full Tango name i.e. "test/dg/panda/current".
+
+    dateStart : datetime.datetime
+        Start date for extraction.
+
+    dateStop : datetime.datetime
+        Stop date for extraction.
+
+    db: str
+        Which database to look in, 'H' or 'T'.
+
+    Nmax: int
+        Max number of atoms in one chunk. Default 100000.
+
+    Returns
+    -------
+    cdates : list
+        List of datetime giving the limit of each chunks.
+        For N chunks, there is N+1 elements in cdates, as the start and end boundaries are included.
+    """
+    info=infoattr(attribute, db=db)
+    logger.debug("Attribute information \n%s"%info)
+
+    # Get the number of points
+    N=_extractors[{'H':0, 'T':1}[db]].GetAttDataBetweenDatesCount([
+            attribute,
+            dateStart.strftime(_DBDFMT2),
+            dateStop.strftime(_DBDFMT2)
+            ])
+    logger.debug("On the period, there is %d entries"%N)
+
+    dx=int(info["max_dim_x"])
+    if dx > 1:
+        logger.debug("Attribute is a vector with max dimension = %s"%dx)
+        N=N*dx
+
+    # If data chunk is too much, we need to cut it
+    if N > Nmax:
+        dt = (dateStop-dateStart)/(N//Nmax)
+        cdates = [dateStart]
+        while cdates[-1] < dateStop:
+            cdates.append(cdates[-1]+dt)
+        cdates[-1] = dateStop
+        logger.debug("Cutting access to %d little chunks of time, %s each."%(len(cdates)-1, dt))
+    else:
+        cdates=[dateStart, dateStop]
+
+    return cdates
+
+##----------------------------------------------------------------------##
+def _cmd_with_retry(dp, cmd, arg, retry=2):
+    """
+    Run a command on tango.DeviceProxy, retrying on DevFailed.
+
+    Parameters
+    ----------
+    dp: tango.DeviceProxy
+        Device proxy to try command onto.
+
+    cmd : str
+        Command to executte on the extractor
+
+    arg : list
+        Attribute to pass to the command
+
+    retry : int
+        Number of command retry on DevFailed
+
+    Returns
+    -------
+    cmdreturn :
+        Whatever the command returns.
+        None if failed after the amount of retries.
+    """
+    logger.info("Perform Command {} {}".format(cmd, arg))
+
+    for i in range(retry):
+        # Make retrieval request
+        logger.debug("Execute %s (%s)"%(cmd, arg))
+        try:
+            cmdreturn = getattr(dp, cmd)(arg)
+        except tango.DevFailed as e:
+            logger.warning("The extractor device returned the following error:")
+            logger.warning(e)
+            if  i == retry-1:
+                logger.error("Could not execute command %s (%s). Check the device extractor"%(cmd, arg))
+                return None
+            logger.warning("Retrying...")
+            continue
+        break
+    return cmdreturn
+
+
+def _cast_bool(value):
+    """
+    Cast a value, or array of values, to boolean.
+    Try to assess the input data type. If string, then try to find true or false word inside.
+
+    Parameters:
+    -----------
+    value: string, integer, or array of such
+        value to convert.
+
+    Return:
+    boolean:
+        value or array of boolean.
+    """
+
+    # Force to array
+    value = np.asarray(value)
+
+    # cast back to single value
+    def castback(v):
+        if v.shape == ():
+            return v.item()
+        return v
+
+    # Simply try to cast to bool first
+    try:
+        value = value.astype("bool")
+        logger.debug("Direct conversion to boolean")
+        return castback(value)
+    except ValueError:
+        # Keep trying to cast
+        pass
+
+    logger.debug("Try to convert to boolean")
+
+    value = np.char.strip(np.char.lower(value))
+    value = _ArrayStr2Bool(value)
+
+    return castback(value)
+
--- a/ArchiveExtractor.py
+++ b/ArchiveExtractor.py
--- a/ArchiveExtractor/__init__.py
+++ b/ArchiveExtractor/__init__.py
+"""
+Python module for extracting attribute from Archive Extractor Device.
+"""
+
+__version__ = "AUTOVERSIONREPLACE"
+
+__all__ = ["Access", ]
--- a/__init__.py
+++ b/__init__.py