From ff9ac78a5206ded84a5f79665ebc5a25da352b3e Mon Sep 17 00:00:00 2001
From: Romain Broucquart <romain.broucquart@synchrotron-soleil.fr>
Date: Mon, 23 May 2022 11:49:17 +0200
Subject: [PATCH] Handle extraction of 1D spectrum

* Add a infoattr function to retrieve the information about attribute dimension.
* When we have a 1D spectrum, switch to GetAttBetweenDates
* Serve a Pandas DataFrame

Also:
* Add Date format on helps
* Avoid running minmaxmean function
---
 core/ArchiveExtractor.py | 150 ++++++++++++++++++++++++++++++++-------
 1 file changed, 125 insertions(+), 25 deletions(-)

diff --git a/core/ArchiveExtractor.py b/core/ArchiveExtractor.py
index 8e0ff33..01f6ca2 100755
--- a/core/ArchiveExtractor.py
+++ b/core/ArchiveExtractor.py
@@ -146,10 +146,12 @@ class ArchiveExtractor:
 
         dateStart : datetime.datetime, string
             Start date for extraction. If string, it will be parsed.
+            Example of string format %Y-%m-%d-%H:%M:%S or less precise.
 
         dateStop : datetime.datetime, string, None
             Stop date for extraction.
             If string, it will be parsed.
+            Example of string format %Y-%m-%d-%H:%M:%S or less precise.
             If None, it takes the current date and time.
             Default is None (now).
 
@@ -182,6 +184,22 @@ class ArchiveExtractor:
         # Check attribute is in database
         self._check_attribute(attribute)
 
+        # Get info about the attribute
+        info=self.infoattr(attribute)
+        self.logger.debug("Attribute information \n%s"%info)
+
+        # Detect spectrum
+        attrtype="scalar"
+        if int(info["max_dim_x"]) > 1:
+            if int(info["max_dim_y"]) > 0:
+                self.logger.warning("Attribute %s is a (%s; %s) vector. This is poorly handled by this script."%(
+                    attribute, info["max_dim_x"], info["max_dim_y"]))
+                attrtype="multi"
+            else:
+                self.logger.info("Attribute %s is a 1D vector, dimension = %s."%(
+                    attribute, info["max_dim_x"]))
+                attrtype="vector"
+
         # Cut the time horizon in chunks
         cdates = self.chunkerize(attribute, dateStart, dateStop)
 
@@ -192,42 +210,83 @@ class ArchiveExtractor:
         # For each date chunk
         for i_d in range(len(cdates)-1):
 
-            # Inform on retrieval request
-            self.logger.info("Perform ExtractBetweenDates (%s, %s, %s)"%(
-                attribute,
-                cdates[i_d].strftime(DBDFMT),
-                cdates[i_d+1].strftime(DBDFMT))
-                )
+            # =============
+            # For now we handle multi dimension the same way as scalar, which will get only the first element
+            if (attrtype=="scalar") or (attrtype=="multi"):
+                # Inform on retrieval request
+                self.logger.info("Perform ExtractBetweenDates (%s, %s, %s)"%(
+                    attribute,
+                    cdates[i_d].strftime(DBDFMT),
+                    cdates[i_d+1].strftime(DBDFMT))
+                    )
+
+                cmdreturn = self._cmd_with_retry("ExtractBetweenDates", [
+                                                        attribute,
+                                                        cdates[i_d].strftime(DBDFMT),
+                                                        cdates[i_d+1].strftime(DBDFMT)
+                                                        ])
+
+                # Check command return
+                if cmdreturn is None:
+                    logger.error("Could not extract this chunk. Check the device extractor")
+                    return None
+
+                # Unpack return
+                _date, _value = cmdreturn
+
+                # Transform to datetime - value arrays
+                # NOTE: it is faster than using pandas.to_datetime()
+                _value = np.asarray(_value, dtype=float)
+                if len(_date) > 0:
+                    _date = ArrayTimeStampToDatetime(_date/1000.0)
+
+                value.append(_value)
+                date.append(_date)
+
+            # =============
+            if attrtype=="vector":
+                self.logger.info("Perform GetAttDataBetweenDates (%s, %s, %s)"%(
+                                                        attribute,
+                                                        cdates[i_d].strftime(DBDFMT),
+                                                        cdates[i_d+1].strftime(DBDFMT)
+                                                        ))
+
+                [N,], [name,] = self.extractor.GetAttDataBetweenDates([
+                    attribute,
+                    cdates[i_d].strftime(DBDFMT),
+                    cdates[i_d+1].strftime(DBDFMT)
+                    ])
+                N=int(N)
 
-            cmdreturn = self._cmd_with_retry("ExtractBetweenDates", [
-                                                    attribute,
-                                                    cdates[i_d].strftime(DBDFMT),
-                                                    cdates[i_d+1].strftime(DBDFMT)
-                                                    ])
+                # Read the history
+                self.logger.debug("Retrieve history of %d values. Dynamic attribute named %s."%(N, name))
+                attrHist = self.extractor.attribute_history(name, N)
 
-            # Check command return
-            if cmdreturn is None:
-                logger.error("Could not extract this chunk. Check the device extractor")
-                return None
+                # Transform to datetime - value arrays
+                _value = np.empty((N, int(info["max_dim_x"])), dtype=float)
+                _value[:] = np.nan
+                _date = np.empty(N, dtype=object)
+                for i_h in range(N):
+                    _value[i_h,:attrHist[i_h].dim_x]=attrHist[i_h].value
+                    _date[i_h]=attrHist[i_h].time.todatetime()
 
-            # Unpack return
-            _date, _value = cmdreturn
+                # Remove dynamic attribute
+                self.logger.debug("Remove dynamic attribute %s."%name)
+                self.extractor.RemoveDynamicAttribute(name)
 
-            # Transform to datetime - value arrays
-            # NOTE: it is faster than using pandas.to_datetime()
-            _value = np.asarray(_value, dtype=float)
-            if len(_date) > 0:
-                _date = ArrayTimeStampToDatetime(_date/1000.0)
 
-            value.append(_value)
-            date.append(_date)
+                value.append(_value)
+                date.append(_date)
 
         self.logger.debug("Concatenate chunks")
         value = np.concatenate(value)
         date = np.concatenate(date)
 
         self.logger.debug("Extraction done for %s."%attribute)
-        return pd.Series(index=date, data=value)
+        if attrtype=="vector":
+            return pd.DataFrame(index=date, data=value).dropna(axis=1, how='all')
+        else:
+            return pd.Series(index=date, data=value)
 
 
     ##---------------------------------------------------------------------------##
@@ -249,9 +308,11 @@ class ArchiveExtractor:
 
         dateStart : datetime.datetime, string
             Start date for extraction. If string, it will be parsed.
+            Example of string format %Y-%m-%d-%H:%M:%S or less precise.
 
         dateStop : datetime.datetime, string
             Stop date for extraction. If string, it will be parsed.
+            Example of string format %Y-%m-%d-%H:%M:%S or less precise.
             Default is now (datetime.datetime.now())
 
         timeInterval: datetime.timedelta, string
@@ -299,6 +360,16 @@ class ArchiveExtractor:
             self.logger.error("Attribute '%s' is not archived in DB %s"%(attribute, extractor))
             raise ValueError("Attribute '%s' is not archived in DB %s"%(attribute, extractor))
 
+        # Get info about the attribute
+        info=self.infoattr(attribute)
+        self.logger.debug("Attribute information \n%s"%info)
+
+        # Detect spectrum
+        attrtype="scalar"
+        if int(info["max_dim_x"]) > 1:
+            self.logger.error("Attribute is not a scalar. Cannot perform this kind of operation.")
+            return None
+
         # Cut data range in time chunks
         cdates = [dateStart]
         while cdates[-1] < dateStop:
@@ -430,3 +501,32 @@ class ArchiveExtractor:
             cdates=[dateStart, dateStop]
 
         return cdates
+
+    def infoattr(self, attribute):
+        """
+        Get informations for an attribute and pack it into a python dict.
+
+        Parameters
+        ----------
+        attribute : String
+            Name of the attribute. Full Tango name i.e. "test/dg/panda/current".
+
+        Returns
+        -------
+        info : dict
+            Dictionnary of propertyname:propertyvalue
+        """
+        info = dict()
+
+        for func in ("GetAttDefinitionData", "GetAttPropertiesData"):
+            R=getattr(self.extractor, func)(attribute)
+            if not R is None:
+                for i in R:
+                    _s=i.split("::")
+                    info[_s[0]]=_s[1]
+            else:
+                self.logger.warning("Function %s on extractor returned None"%func)
+
+
+        return info
+
-- 
GitLab