diff --git a/ArchiveExtractor.py b/ArchiveExtractor.py index 66d79c0285320e2b99dbdf7616aef447559ac8f0..6e6641547e2f8356adb57afac4a1d140367ac93d 100755 --- a/ArchiveExtractor.py +++ b/ArchiveExtractor.py @@ -1,11 +1,12 @@ """ -Python module for extracting attribute from Arhive Extractor Device. +Python module for extracting attribute from Archive Extractor Device. """ import logging import datetime import numpy as np import PyTango as tango import pandas as pd +import traceback __version__ = "1.0.1" @@ -38,8 +39,16 @@ _DBDFMT2 = "%d-%m-%Y %H:%M:%S" ########################################################################## # Vectorized fromtimestamp function +# NOTE: it is faster than using pandas.to_datetime() _ArrayTimeStampToDatetime = np.vectorize(datetime.datetime.fromtimestamp) +# Vectorized bool map dictionnary +_ArrayStr2Bool = np.vectorize({ + "true":True, 't':True, + "false":False, 'f':False, + }.get) + + def _check_initialized(): """ Check if the module is initialized. @@ -57,14 +66,16 @@ def _check_initialized(): ##----------------------------------------------------------------------## def _dateparse(datestr): """ - Convenient function to parse date strings. - Global format is %Y-%m-%d-%H:%M:%S and it can be reduced to be less precise. + Convenient function to parse date or duration strings. + Exact date format is %Y-%m-%d-%H:%M:%S and it can be reduced to be less precise. + Duration format is 'Xu' where X is a number and u is a unit in ('m':minutes, 'h':hours, 'd':days, 'M':months) If datstr is None, take the actual date and time. Parameters --------- datestr : string Date as a string, format %Y-%m-%d-%H:%M:%S or less precise. + Duration as a string, format 'Xu' where X is a number and u is a unit in ('m':minutes, 'h':hours, 'd':days, 'M':months) Exceptions ---------- @@ -73,35 +84,52 @@ def _dateparse(datestr): Returns ------- - date : datetime.datetime - Parsed date + date : datetime.datetime or datetime.timedelta + Parsed date or duration """ + logger.debug("Parsing date string '%s'"%datestr) + + # Determine date/duration by looking at the last char + if datestr[-1] in "mhdM": + # Duration + logger.debug("Assuming a duration") - if datestr is None: - return datetime.datetime.now() - - # This gives all format that will be tried, in order. - # Stop on first parse success. Raise error if none succeed. - fmt = [ - "%Y-%m-%d-%H:%M:%S", - "%Y-%m-%d-%H:%M", - "%Y-%m-%d-%H", - "%Y-%m-%d", - "%Y-%m", - ] - - date = None - for f in fmt: try: - date = datetime.datetime.strptime(datestr, f) - except ValueError: - continue - else: - break + q=float(datestr[:-1]) + except ValueError as e: + logger.error("Failed to parse date string. Given the last character, a duration was assumed.") + raise Exception("Could not parse argument to a date") from e + + # Convert all in minutes + minutes = q*{'m':1, 'h':60, 'd':60*24, 'm':30*60*24}[datestr[-1]] + + return datetime.timedelta(minutes=minutes) + else: - raise ValueError("Could not parse argument to a date") + # Probably a date string + + # This gives all format that will be tried, in order. + # Stop on first parse success. Raise error if none succeed. + fmt = [ + "%Y-%m-%d-%H:%M:%S", + "%Y-%m-%d-%H:%M", + "%Y-%m-%d-%H", + "%Y-%m-%d", + "%Y-%m", + ] + + date = None + for f in fmt: + try: + date = datetime.datetime.strptime(datestr, f) + except ValueError: + continue + else: + break + else: + raise ValueError("Could not parse argument to a date") - return date + return date ##----------------------------------------------------------------------## def _check_attribute(attribute, db): @@ -204,6 +232,7 @@ def _cmd_with_retry(dp, cmd, arg, retry=2): Whatever the command returns. None if failed after the amount of retries. """ + logger.info("Perform Command {} {}".format(cmd, arg)) for i in range(retry): # Make retrieval request @@ -221,6 +250,48 @@ def _cmd_with_retry(dp, cmd, arg, retry=2): break return cmdreturn + +def _cast_bool(value): + """ + Cast a value, or array of values, to boolean. + Try to assess the input data type. If string, then try to find true or false word inside. + + Parameters: + ----------- + value: string, integer, or array of such + value to convert. + + Return: + boolean: + value or array of boolean. + """ + + # Force to array + value = np.asarray(value) + + # cast back to single value + def castback(v): + if v.shape == (): + return v.item() + return v + + # Simply try to cast to bool first + try: + value = value.astype("bool") + logger.debug("Direct conversion to boolean") + return castback(value) + except ValueError: + # Keep trying to cast + pass + + logger.debug("Try to convert to boolean") + + value = np.char.strip(np.char.lower(value)) + value = _ArrayStr2Bool(value) + + return castback(value) + + ########################################################################## ### Module private variables ### ########################################################################## @@ -243,11 +314,13 @@ def init( Initialize the module. Instanciate tango.DeviceProxy for extractors (TDB and HDB) - HdbExtractorPath, TdbExtractorPath: string - Tango path to the extractors. + Parameters: + ----------- + HdbExtractorPath, TdbExtractorPath: string + Tango path to the extractors. - loglevel: string - loglevel to pass to logging.Logger + loglevel: string + loglevel to pass to logging.Logger """ global _extractors global _AttrTables @@ -274,6 +347,123 @@ def init( _AttrTables = tuple(e.getattnameall() for e in _extractors) logger.debug("HDB: {} TDB: {} attributes counted".format(len(_AttrTables[0]), len(_AttrTables[1]))) +########################################################################## +### Module access functions ### +########################################################################## + +def extract( + attr, + date1, date2=None, + method="nearest", + db='H', + ): + """ + Access function to perform extraction between date1 and date2. + Can extract one or several attributes. + date1 and date2 can be both exact date, or one of two can be a time interval that will be taken relative to the other. + + + Parameters: + ----------- + attr: string, list, dict + Attribute(s) to extract. + If string, extract the given attribute, returning a pandas.Series. + If list, extract attributes and return a list of pandas.Series. + If a dict, extract attributes and return a dict of pandas.Series with same keys. + + date1, date2: string, datetime.datetime, datetime.timedelta, None + Exact date, or duration relative to date2. + If string, it will be parsed. + A start date can be given with string format '%Y-%m-%d-%H:%M:%S' or less precise (ie '2021-02', '2022-11-03' '2022-05-10-21:00'.i..). + A duration can be given with string format 'Xu' where X is a number and u is a unit in ('m':minutes, 'h':hours, 'd':days, 'M':months) + A datetime.datetime object or datetime.timedelta object will be used as is. + date2 can be None. In that case it is replaced by the current time. + + method: str + Method of extraction + 'nearest': Retrieve nearest value of date1, date2 is ignored. + 'between': Retrive data between date1 and date2. + + db: str + Which database to look in, 'H' or 'T'. + + """ + + ## _-_-_-_-_-_-_-_-_-_-_-_-_-_-_-_-_-_-_-_-_-_-_-_-_-_-_-_-_-_-_-_-_-_-_-_-_ + # Perform a few sanity checks + if not _check_initialized(): + # Stop here, the function has produced a message if necessary + return + + if not db in ("H", "T"): + raise ValueError("Attribute 'db' should be 'H' or 'T'") + + + allowedmethods=("nearest", "between", "minmaxmean") + if not method in allowedmethods: + raise ValueError("Attribute 'method' should be in {}".format(str(allowedmethods))) + + ## _-_-_-_-_-_-_-_-_-_-_-_-_-_-_-_-_-_-_-_-_-_-_-_-_-_-_-_-_-_-_-_-_-_-_-_-_ + # Work with dates + if not type(date1) in (datetime.datetime, datetime.timedelta): + date1 = _dateparse(date1) + if date2 is None: + date2 = datetime.datetime.now() + else: + if not type(date2) in (datetime.datetime, datetime.timedelta): + date2 = _dateparse(date2) + + if not datetime.datetime in (type(date1), type(date2)): + logger.error("One of date1 date2 should be an exact date.\nGot {} {}".format(date1, date2)) + raise ValueError("date1 and date2 not valid") + + # Use timedelta relative to the other date. date1 is always before date2 + if type(date1) is datetime.timedelta: + date1 = date2-date1 + if type(date2) is datetime.timedelta: + date2 = date1+date2 + + if date1 > date2: + logger.error("date1 must precede date2.\nGot {} {}".format(date1, date2)) + raise ValueError("date1 and date2 not valid") + + ## _-_-_-_-_-_-_-_-_-_-_-_-_-_-_-_-_-_-_-_-_-_-_-_-_-_-_-_-_-_-_-_-_-_-_-_-_ + # Perform extraction and return + + if type(attr) is dict: + d=dict() + for k,v in attr.items(): + try: + d.update({k:_extract_attribute(v, method, date1, date2, db)}) + except Exception as e: + logger.debug("Exception in _extract_attribute(): "+str(e)) + logger.debug(traceback.print_tb(e.__traceback__)) + logger.error("Could not extract {}.".format(v)) + + return d + + if type(attr) in (list,tuple): + d=[] + for v in attr: + try: + d.append(_extract_attribute(v, method, date1, date2, db)) + except Exception as e: + logger.debug("Exception in _extract_attribute(): "+str(e)) + logger.debug(traceback.print_tb(e.__traceback__)) + logger.error("Could not extract {}.".format(v)) + + return d + + try: + d=_extract_attribute(attr, method, date1, date2, db) + except Exception as e: + logger.debug("Exception in _extract_attribute(): "+str(e)) + logger.debug(traceback.print_tb(e.__traceback__)) + logger.error("Could not extract {}.".format(attr)) + return None + + return d + ##----------------------------------------------------------------------## def findattr(pattern, db="H"): @@ -349,65 +539,18 @@ def infoattr(attribute, db='H'): return info -##---------------------------------------------------------------------------## -def ExtrBetweenDates( - attribute, - dateStart, - dateStop=None, - db='H', - ): - """ - Query attribute data from an archiver database, get all points between dates. - Use ExtractBetweenDates. - - Parameters - ---------- - attribute : String - Name of the attribute. Full Tango name i.e. "test/dg/panda/current". - - dateStart : datetime.datetime, string - Start date for extraction. If string, it will be parsed. - Example of string format %Y-%m-%d-%H:%M:%S or less precise. - - dateStop : datetime.datetime, string, None - Stop date for extraction. - If string, it will be parsed. - Example of string format %Y-%m-%d-%H:%M:%S or less precise. - If None, it takes the current date and time. - Default is None (now). - - db: str - Which database to look in, 'H' or 'T'. - - Exceptions - ---------- - ValueError - The attribute is not found in the database. - - Returns - ------- - [date, value] : array - date : numpy.ndarray of datetime.datime objects - Dates of the values - value : numpy.ndarray - Archived values +########################################################################## +### Module core functions ### +########################################################################## +def _extract_attribute(attribute, method, date1, date2, db): + """ + Check if exists, check scalar or spectrum and dispatch """ - if not _check_initialized(): - return - - if not db in ("H", "T"): - raise AttributeError("Attribute db should be 'H' or 'T'") # Uncapitalize attribute attribute = attribute.lower() - - # Check attribute is in database - _check_attribute(attribute, db=db) - - # Parse dates - dateStart = _dateparse(dateStart) - dateStop = _dateparse(dateStop) + _check_attribute(attribute, db) # Get info about the attribute info=infoattr(attribute, db=db) @@ -425,93 +568,157 @@ def ExtrBetweenDates( attribute, info["max_dim_x"])) attrtype="vector" - # Cut the time horizon in chunks - cdates = _chunkerize(attribute, dateStart, dateStop, db) + # ============= + # For now we handle multi dimension the same way as scalar, which will get only the first element + if (attrtype=="scalar") or (attrtype=="multi"): + if info["data_type"] == '1': + # Boolean data type, quick fix + dtype=bool + else: + dtype=float - # Arrays to hold every chunks - value = [] - date = [] + return _extract_scalar(attribute, method, date1, date2, db, dtype) + if attrtype=="vector": + return _extract_vector(attribute, method, date1, date2, db) - # For each date chunk - for i_d in range(len(cdates)-1): - # ============= - # For now we handle multi dimension the same way as scalar, which will get only the first element - if (attrtype=="scalar") or (attrtype=="multi"): - # Inform on retrieval request - logger.info("Perform ExtractBetweenDates (%s, %s, %s)"%( - attribute, - cdates[i_d].strftime(_DBDFMT), - cdates[i_d+1].strftime(_DBDFMT)) - ) +##---------------------------------------------------------------------------## +def _extract_scalar(attribute, method, date1, date2, db, dtype): + + # ===================== + if method == "nearest": + cmdreturn = _cmd_with_retry(_extractors[{'H':0, 'T':1}[db]], "GetNearestValue", [ + attribute, + date1.strftime(_DBDFMT), + ]) + + # Unpack return + try: + _date, _value = cmdreturn.split(';') + except TypeError: + logger.error("Could not extract this chunk. Check the device extractor") + return None + + # Transform by datatype + if dtype is bool: + _value = _cast_bool(_value) + # Fabricate return pandas.Series + d=pd.Series(index=[datetime.datetime.fromtimestamp(int(_date)/1000),], data=[_value,], name=attribute) + + return d + + # ===================== + if method == "between": + # Cut the time horizon in chunks + cdates = _chunkerize(attribute, date1, date2, db) + + # Array to hold data + data = [] + + # For each date chunk + for i_d in range(len(cdates)-1): cmdreturn = _cmd_with_retry(_extractors[{'H':0, 'T':1}[db]], "ExtractBetweenDates", [ attribute, cdates[i_d].strftime(_DBDFMT), cdates[i_d+1].strftime(_DBDFMT) ]) - # Check command return - if cmdreturn is None: + + # Unpack return + try: + _date, _value = cmdreturn + except TypeError: logger.error("Could not extract this chunk. Check the device extractor") return None - # Unpack return - _date, _value = cmdreturn # Transform to datetime - value arrays - # NOTE: it is faster than using pandas.to_datetime() - _value = np.asarray(_value, dtype=float) + if dtype is bool: + _value = _cast_bool(_value) + else: + _value = np.asarray(_value, dtype=dtype) + if len(_date) > 0: _date = _ArrayTimeStampToDatetime(_date/1000.0) - value.append(_value) - date.append(_date) + # Fabricate return pandas.Series + data.append(pd.Series(index=_date, data=_value, name=attribute)) - # ============= - if attrtype=="vector": - logger.info("Perform GetAttDataBetweenDates (%s, %s, %s)"%( - attribute, - cdates[i_d].strftime(_DBDFMT), - cdates[i_d+1].strftime(_DBDFMT) - )) + # Concatenate chunks + return pd.concat(data) - [N,], [name,] = _extractors[{'H':0, 'T':1}[db]].GetAttDataBetweenDates([ - attribute, - cdates[i_d].strftime(_DBDFMT), - cdates[i_d+1].strftime(_DBDFMT) - ]) - N=int(N) + # ======================== + if method == "minmaxmean": - # Read the history - logger.debug("Retrieve history of %d values. Dynamic attribute named %s."%(N, name)) - attrHist = _extractors[{'H':0, 'T':1}[db]].attribute_history(name, N) + # If we are here, the method is not implemented + logger.error("Method {} is not implemented for scalars.".format(method)) + raise NotImplemented - # Transform to datetime - value arrays - _value = np.empty((N, int(info["max_dim_x"])), dtype=float) - _value[:] = np.nan - _date = np.empty(N, dtype=object) - for i_h in range(N): - _value[i_h,:attrHist[i_h].dim_x]=attrHist[i_h].value - _date[i_h]=attrHist[i_h].time.todatetime() +##---------------------------------------------------------------------------## +def _extract_vector(attribute, method, date1, date2, db): - # Remove dynamic attribute - logger.debug("Remove dynamic attribute %s."%name) - _extractors[{'H':0, 'T':1}[db]].RemoveDynamicAttribute(name) + # Get info about the attribute + info=infoattr(attribute, db=db) + # ===================== + if method == "nearest": + # Get nearest does not work with vector. + # Make a between date with surounding dates. + + # Dynamically find surounding + cnt=0 + dt=datetime.timedelta(seconds=10) + while cnt<1: + logger.debug("Seeking points in {} to {}".format(date1-dt,date1+dt)) + cnt=_extractors[{'H':0, 'T':1}[db]].GetAttDataBetweenDatesCount([ + attribute, + (date1-dt).strftime(_DBDFMT2), + (date1+dt).strftime(_DBDFMT2) + ]) + dt=dt*1.5 + logger.debug("Found {} points in a +- {} interval".format(cnt,str(dt/1.5))) + + + # For vector, we have to use the GetAttxxx commands + cmdreturn = _cmd_with_retry(_extractors[{'H':0, 'T':1}[db]], "GetAttDataBetweenDates", [ + attribute, + (date1-dt).strftime(_DBDFMT), + (date1+dt).strftime(_DBDFMT), + ]) + + # Unpack return + try: + [N,], [name,] = cmdreturn + N=int(N) + except TypeError: + logger.error("Could not extract this attribute. Check the device extractor") + return None - value.append(_value) - date.append(_date) + # Read the history + logger.debug("Retrieve history of %d values. Dynamic attribute named %s."%(N, name)) + attrHist = _extractors[{'H':0, 'T':1}[db]].attribute_history(name, N) - logger.debug("Concatenate chunks") - value = np.concatenate(value) - date = np.concatenate(date) + # Transform to datetime - value arrays + _value = np.empty((N, int(info["max_dim_x"])), dtype=float) + _value[:] = np.nan + _date = np.empty(N, dtype=object) + for i_h in range(N): + _value[i_h,:attrHist[i_h].dim_x]=attrHist[i_h].value + _date[i_h]=attrHist[i_h].time.todatetime() - logger.debug("Extraction done for %s."%attribute) - if attrtype=="vector": - return pd.DataFrame(index=date, data=value).dropna(axis=1, how='all') - else: - return pd.Series(index=date, data=value) + # Seeking nearest entry + idx=np.argmin(abs(_date-date1)) + logger.debug("Found nearest value at index {}: {}".format(idx, _date[idx])) + + # Fabricate return pandas.Series + d=pd.Series(index=[_date[idx],], data=[_value[idx],], name=attribute) + + return d + + # If we are here, the method is not implemented + logger.error("Method {} is not implemented for vectors.".format(method)) + raise NotImplemented ##---------------------------------------------------------------------------## @@ -644,3 +851,7 @@ def ExtrBetweenDates_MinMaxMean( "Max":value_max, },) +## ## ## ## ## ## ## ## ## ## ## ## ## ## ## ## ## ## ## ## ## ## ## ## +## Initialize on import +## ## ## ## ## ## ## ## ## ## ## ## ## ## ## ## ## ## ## ## ## ## ## ## +init() diff --git a/README.md b/README.md index 64b79542070294e31984b68bdc9306988570a93c..4098332b528b5a84659e7501ef324665f656bfae 100644 --- a/README.md +++ b/README.md @@ -13,12 +13,9 @@ Usage example, with an ipython prompt ```python In [1]: import pySoleilControl.ArchiveExtractor as AE -In [2]: # For now, we need manual initialization of the module - ...: AE.init() - -In [3]: # Looking for an attribute in HDB +In [2]: # Looking for an attribute in HDB ...: AE.findattr("ans/dg/*dcct*") -Out[3]: +Out[2]: ['ANS/DG/DCCT-CTRL/State', 'ANS/DG/DCCT-CTRL/Status', 'ANS/DG/DCCT-CTRL/current', @@ -26,10 +23,10 @@ Out[3]: 'ANS/DG/DCCT-CTRL/lifeTime', 'ANS/DG/DCCT-CTRL/lifeTimeErr'] -In [4]: # Get data between two dates, this return a pandas.Dataframe object +In [3]: # Get data between two dates, this return a pandas.Dataframe object ...: AE.ExtrBetweenDates('ANS/DG/DCCT-CTRL/current', '2021-12-13', '2021-12-13-12:00') INFO:Perform ExtractBetweenDates (ans/dg/dcct-ctrl/current, 2021-12-13 00:00:00, 2021-12-13 12:00:00) -Out[4]: +Out[3]: 2021-12-13 00:00:00 450.993568 2021-12-13 00:00:01 450.981979 2021-12-13 00:00:02 450.971455 @@ -43,11 +40,11 @@ Out[4]: 2021-12-13 12:00:00 15.005410 Length: 42725, dtype: float64 -In [5]: # Get min, max and mean with a 10 minute window +In [4]: # Get min, max and mean with a 10 minute window ...: d=AE.ExtrBetweenDates_MinMaxMean('ANS/DG/DCCT-CTRL/current', '2021-12-13', '2021-12-13-12:00', timeInterval='10m') -In [6]: d -Out[6]: +In [5]: d +Out[5]: Min Mean Max 2021-12-13 00:05:00 449.762286 450.619654 451.617095 2021-12-13 00:15:00 449.761171 450.676306 451.595391 @@ -64,14 +61,14 @@ Out[6]: [72 rows x 3 columns] -In [7]: # Activate inline matplotlib +In [6]: # Activate inline matplotlib ...: %matplotlib Using matplotlib backend: TkAgg -In [7]: # Simply plot +In [6]: # Simply plot ...: d.plot() -In [8]: # ipython prompt supports autocompletion. The doc of function can be quickly read by adding a '?' +In [7]: # ipython prompt supports autocompletion. The doc of function can be quickly read by adding a '?' ...: AE.ExtrBetweenDates? Signature: AE.ExtrBetweenDates(attribute, dateStart, dateStop=None, db='H') Docstring: @@ -114,4 +111,4 @@ Type: function -``` \ No newline at end of file +```