First modification towards a module architecture

* ArchiveExtractor is now a class * CLI code is moved to a new script

First modification towards a module architecture
ccbef0ca · BRONES Romain · 6c3923c2 · ccbef0ca · ccbef0ca
Commit ccbef0ca authored Oct 13, 2021 by BRONES Romain
--- a/ArchiveExtractor.py
+++ b/ArchiveExtractor.py
@@ -13,6 +13,7 @@ import PyTango as tango

 __version__ = "1.0.1"

+class ArchiveExtractor:

    ##########################################################################
    """ Commodity variables """
@@ -23,9 +24,57 @@ DBDFMT = "%Y-%m-%d %H:%M:%S"
    # Extractor date format for GetNearestValue
    DBDFMT2 = "%d-%m-%Y %H:%M:%S"

+    # Vectorized fromtimestamp function
    ArrayTimeStampToDatetime = np.vectorize(datetime.datetime.fromtimestamp)

+    # Max number of point per extraction chunks
+    Nmax = 100000
+
+    ##########################################################################
+    def __init__(
+            self,
+            ExtractorKind='H', ExtractorNumber=2,
+            ExtractorPath=None,
+            logger=logging.getLogger("ArchiveExtractor")
+            ):
+        """
+        Constructor function
+
+        Parameters
+        ----------
+        ExtractorKind: char
+            Either 'H' or 'T' for HDB or TDB.
+
+        ExtractorNumber: int
+            Number of the archive extractor instance to use.
+
+        ExtractorPath: string
+            Tango path to the extractor.
+            If this argument is given, it takes precedence over ExtractorKind and ExtractorNumber.
+
+        logger: logging.Logger
+            Logger object to use
+
+        Return
+        ------
+        ArchiveExtractor
+        """
+
+        # Get logger
+        self.logger = logger
+
+        #######################################################
+        # Select Extractor
+        if ExtractorPath is None:
+            self.extractor = "archiving/%sDBExtractor/%d"%(ExtractKind, ExtractorNumber)
+        else:
+            self.extractor = tango.DeviceProxy(ExtractorPath)
+
+        self.extractor.set_timeout_millis(3000)
+        self.logger.debug("Archive Extractor %s used."%self.extractor.name())
+
    ##---------------------------------------------------------------------------##
+    @staticmethod
    def dateparse(datestr):
        """
        Convenient function to parse date strings.
@@ -46,8 +95,9 @@ def dateparse(datestr):
        date : datetime.datetime
            Parsed date
        """
-    logger.info("Parse date '%s'"%datestr)

+        # This gives all format that will be tried, in order.
+        # Stop on first parse success. Raise error if none succeed.
        fmt = [
            "%Y-%m-%d-%H:%M:%S",
            "%Y-%m-%d-%H:%M",
@@ -58,30 +108,32 @@ def dateparse(datestr):

        date = None
        for f in fmt:
-        logger.debug("Try format '%s'"%f)
            try:
                date = datetime.datetime.strptime(datestr, f)
            except ValueError:
-            logger.debug("Parsing failed")
-
-    if date is None:
-        logger.error("Could not parse date")
-        raise ValueError
+                continue
+            else:
+                break
+        else:
+            raise ValueError("Could not parse argument to a date")

        return date

+
    ##---------------------------------------------------------------------------##
-def query_ADB_BetweenDates(attr,
+    def evalPoints(
+            self,
+            attribute,
            dateStart,
-              dateStop=datetime.datetime.now(),
-              extractor="archiving/TDBExtractor/4"):
+            dateStop,
+            ):
        """
-    Query attribute data from an archiver database, get all points between dates.
-    Use ExtractBetweenDates.
+        Evaluate the number of points for the attribute on the date range.
+        Also checks for its presence.

        Parameters
        ----------
-    attr : String
+        attribute : String
            Name of the attribute. Full Tango name i.e. "test/dg/panda/current".

        dateStart : datetime.datetime
@@ -91,60 +143,92 @@ def query_ADB_BetweenDates(attr,
            Stop date for extraction.
            Default is now (datetime.datetime.now())

-    extractor : String
-        Name of the DB Extractor device.
-        Default is "archiving/TDBExtractor/4"
-
        Exceptions
        ----------
        ValueError
            The attribute is not found in the database.

-    Returns
-    -------
-    [date, value] : array
-        date : numpy.ndarray of datetime.datime objects
-            Dates of the values
-        value : numpy.ndarray
-            Archived values
-
-    """
-
-    # Max number of point per extraction chunks
-    Nmax = 100000
+        NotImplemented
+            The archive mode returned by the DB is not handled.

-    # Device Proxy to DB
-    logger.debug("Instantiate proxy to %s"%extractor)
-    ADB = tango.DeviceProxy(extractor)

-    # Give the DB extractor 3 seconds timeout
-    ADB.set_timeout_millis(3000)
+        Return
+        ------
+        N: int
+            Number of points on the date range.

+        """
        # Check that the attribute is in the database
-    logger.debug("Check that %s is archived."%attr)
-    if not ADB.IsArchived(attr):
-        logger.error("Attribute '%s' is not archived in DB %s"%(attr, extractor))
-        raise ValueError("Attribute '%s' is not archived in DB %s"%(attr, extractor))
+        self.logger.debug("Check that %s is archived."%attribute)
+        if not self.extractor.IsArchived(attribute):
+            self.logger.error("Attribute '%s' is not archived in DB %s"%(attribute, extractor))
+            raise ValueError("Attribute '%s' is not archived in DB %s"%(attribute, extractor))

        # Get its sampling period in seconds
-    req=ADB.GetArchivingMode(attr)
-    logger.debug("GetArchivingMode: "+str(req))
+        req=self.extractor.GetArchivingMode(attribute)
+        self.logger.debug("GetArchivingMode: "+str(req))

        if req[0] == "MODE_P":
            samplingPeriod = int(req[1])*10**-3
-        logger.debug("Attribute is sampled every %g seconds"%samplingPeriod)
+            self.logger.debug("Attribute is sampled every %g seconds"%samplingPeriod)

        elif req[0] == "MODE_EVT":
-        logger.warning("Attribute is archived on event. Chunks of data are sized with an estimated datarate of 0.1Hz")
+            self.logger.warning("Attribute is archived on event. Chunks of data are sized with an estimated datarate of 0.1Hz")
            samplingPeriod = 10

        else:
-        raise NotImplemented("Archive mode not implemented")
+            self.logger.error("Archive mode not implemented in this script")
+            raise NotImplemented("Archive mode not implemented in this script")


        # Evaluate the number of points
-    est_N = (dateStop-dateStart).total_seconds()/samplingPeriod
-    logger.debug("Which leads to %d points to extract."%est_N)
+        N = (dateStop-dateStart).total_seconds()/samplingPeriod
+        self.logger.debug("Which leads to %d points to extract."%est_N)
+
+        return N
+
+
+
+    ##---------------------------------------------------------------------------##
+    def BetweenDates(
+            self,
+            attr,
+            dateStart,
+            dateStop=datetime.datetime.now(),
+            ):
+        """
+        Query attribute data from an archiver database, get all points between dates.
+        Use ExtractBetweenDates.
+
+        Parameters
+        ----------
+        attr : String
+            Name of the attribute. Full Tango name i.e. "test/dg/panda/current".
+
+        dateStart : datetime.datetime
+            Start date for extraction.
+
+        dateStop : datetime.datetime
+            Stop date for extraction.
+            Default is now (datetime.datetime.now())
+
+        Exceptions
+        ----------
+        ValueError
+            The attribute is not found in the database.
+
+        Returns
+        -------
+        [date, value] : array
+            date : numpy.ndarray of datetime.datime objects
+                Dates of the values
+            value : numpy.ndarray
+                Archived values
+
+        """
+
+        # Check and estimate the number of points
+        est_N = self.evalPoints(attribute, dateStart, dateStop)

        # If data chunk is too much, we need to cut it
        if est_N > Nmax:
@@ -153,7 +237,7 @@ def query_ADB_BetweenDates(attr,
            while cdates[-1] < dateStop:
                cdates.append(cdates[-1]+dt)
            cdates[-1] = dateStop
-        logger.debug("Cutting access to %d little chunks of time, %s each."%(len(cdates)-1, dt))
+            self.logger.debug("Cutting access to %d little chunks of time, %s each."%(len(cdates)-1, dt))
        else:
            cdates=[dateStart, dateStop]

@@ -164,13 +248,13 @@ def query_ADB_BetweenDates(attr,
        # For each date chunk
        for i_d in range(len(cdates)-1):
            # Make retrieval request
-        logger.debug("Perform ExtractBetweenDates (%s, %s, %s)"%(
+            self.logger.debug("Perform ExtractBetweenDates (%s, %s, %s)"%(
                attr,
                cdates[i_d].strftime(DBDFMT),
                cdates[i_d+1].strftime(DBDFMT))
                )

-        _date, _value = ADB.ExtractBetweenDates([
+            _date, _value = self.extractor.ExtractBetweenDates([
                attr,
                cdates[i_d].strftime(DBDFMT),
                cdates[i_d+1].strftime(DBDFMT)
@@ -184,12 +268,12 @@ def query_ADB_BetweenDates(attr,
            value.append(_value)
            date.append(_date)

-    logger.debug("Concatenate chunks")
+        self.logger.debug("Concatenate chunks")
        value = np.concatenate(value)
        date = np.concatenate(date)


-    logger.debug("Extraction done for %s."%attr)
+        self.logger.debug("Extraction done for %s."%attr)
        return [date, value]

    ##---------------------------------------------------------------------------##
@@ -349,137 +433,3 @@ def query_ADB_NearestValue(attr,
        return [realdate, value]


-
-##########################################################################
-""" Command Line Interface """
-if __name__ == "__main__":
-
-    # Name the logger after the filename
-    logger = logging.getLogger("ArchiveExtractor")
-
-    # Default stop date
-    dateStop = datetime.datetime.now()
-
-    # Default stop date
-    dateStart = datetime.datetime.now()-datetime.timedelta(days=1)
-
-    #######################################################
-    # Install argument parser
-    import argparse
-
-    parser = argparse.ArgumentParser(description="Extract attributes from the extractor devices.\nVersion %s"%__version__)
-
-    parser.add_argument("--from", type=dateparse, dest="dateStart",
-        help="Start date for extraction, format '1990-12-13-22:33:45'. "+
-        "It is possible to be less precise and drop, seconds, minutes, hours or even day."+
-        " Default is one day ago",
-        default=dateStart)
-
-    parser.add_argument("--to", type=dateparse, dest="dateStop",
-        help="Stop date for extraction, format '1990-12-13-22:33:45'. It is possible to be less precise and drop, seconds, minutes, hours or even day."+
-        " Default is now.",
-        default=dateStop)
-
-    parser.add_argument("--DB", choices=["H", "T", "L"],
-        default="T", help="Database to extract from. HDB (H) or TDB (T), default: %(default)s")
-
-    parser.add_argument("--DBN", type=int, default=2,
-            help="Extractor device number, default: %(default)s")
-
-    parser.add_argument("--fileout", type=str, default="extracted_%s.npy"%datetime.datetime.now().strftime("%Y%m%d_%H%M%S"),
-            help="filename of the extraction destination. Default: %(default)s"),
-
-    parser.add_argument('--log', type=str, default="INFO",
-            help="Log level. Default: %(default)s.")
-
-
-    parser.add_argument('--filemode', action="store_true",
-            help="Set attribute to filemode."+
-            " Instead of specifying attributes, put a path to a file containing a list of attributes."+
-            " The file contains one attribute per line.")
-
-    parser.add_argument('attributes', type=str, nargs='+',
-                        help="List of attributes to extract. Full tango path.")
-
-    args = parser.parse_args()
-
-
-    #######################################################
-    # Configure logger
-
-    # Add a stream handler
-    s_handler = logging.StreamHandler()
-    s_handler.setFormatter(logging.Formatter("%(levelname)s\t[%(funcName)s] \t%(message)s"))
-
-    # Set level according to command line attribute
-    s_handler.setLevel(level=getattr(logging, args.log.upper()))
-    logger.setLevel(level=getattr(logging, args.log.upper()))
-    logger.addHandler(s_handler)
-
-    logger.debug("Parsed arguments: %s"%args)
-
-    logger.info("Archive Extractor %s"%__version__)
-
-    #######################################################
-    # Filemode or not
-    if args.filemode:
-        logger.info("Filemode, openning file %s"%args.attributes[0])
-        # Read the file. Each line is an attribute
-        with open(args.attributes[0], "r") as fp:
-            attributes = fp.readlines()
-
-        logger.debug("Read lines : %s"%attributes)
-
-        # Clean end of line
-        for i_a in range(len(attributes)):
-            attributes[i_a] = attributes[i_a].rstrip()
-
-    else:
-        attributes = args.attributes
-
-    #######################################################
-    # Select Extractor
-    if args.DB == "L":
-        extractor = "archiving/extractor/%d"%(args.DBN)
-    else:
-        extractor = "archiving/%sDBExtractor/%d"%(args.DB, args.DBN)
-
-    #######################################################
-    # Prepare dictionnary for result
-    results = dict()
-
-    #######################################################
-    # Extract from database
-    logger.info("Extract from %s to %s."%(args.dateStart, args.dateStop))
-
-    for attr in attributes:
-        logger.info("Extracting attribute %s..."%attr)
-
-        for attempt in range(3):
-            try:
-                datevalue = query_ADB_BetweenDates(attr, args.dateStart, args.dateStop, extractor)
-
-                # Add to result dictionnary
-                results[attr] = datevalue
-
-            except ValueError as e:
-                logger.debug("ErrorMsg: %s"%e)
-                logger.warning("Failed to extract %s. Skipping..."%attr)
-            except (tango.CommunicationFailed, tango.DevFailed) as e:
-                # retry
-                logger.debug("ErrorMsg: %s"%e)
-                logger.warning("Failed to extract %s. Retry..."%attr)
-            break
-
-        else:
-            logger.error("The device %s might have crash.\n"%extractor+
-                    "You should check with Jive and probably restart with Astor.\n")
-
-        # Save all at each step
-        np.save(args.fileout, results)
-
-    logger.info("Extraction done, saved in file %s"%args.fileout)
-
-else:
-    # Name the logger after the module name
-    logger = logging.getLogger(__name__)
--- a/cli_archiveextractor.py
+++ b/cli_archiveextractor.py
+
+
+##########################################################################
+""" Command Line Interface """
+if __name__ == "__main__":
+
+    # Name the logger after the filename
+    logger = logging.getLogger("ArchiveExtractor")
+
+    # Default stop date
+    dateStop = datetime.datetime.now()
+
+    # Default stop date
+    dateStart = datetime.datetime.now()-datetime.timedelta(days=1)
+
+    #######################################################
+    # Install argument parser
+    import argparse
+
+    parser = argparse.ArgumentParser(description="Extract attributes from the extractor devices.\nVersion %s"%__version__)
+
+    parser.add_argument("--from", type=dateparse, dest="dateStart",
+        help="Start date for extraction, format '1990-12-13-22:33:45'. "+
+        "It is possible to be less precise and drop, seconds, minutes, hours or even day."+
+        " Default is one day ago",
+        default=dateStart)
+
+    parser.add_argument("--to", type=dateparse, dest="dateStop",
+        help="Stop date for extraction, format '1990-12-13-22:33:45'. It is possible to be less precise and drop, seconds, minutes, hours or even day."+
+        " Default is now.",
+        default=dateStop)
+
+    parser.add_argument("--DB", choices=["H", "T", "L"],
+        default="T", help="Database to extract from. HDB (H) or TDB (T), default: %(default)s")
+
+    parser.add_argument("--DBN", type=int, default=2,
+            help="Extractor device number, default: %(default)s")
+
+    parser.add_argument("--fileout", type=str, default="extracted_%s.npy"%datetime.datetime.now().strftime("%Y%m%d_%H%M%S"),
+            help="filename of the extraction destination. Default: %(default)s"),
+
+    parser.add_argument('--log', type=str, default="INFO",
+            help="Log level. Default: %(default)s.")
+
+
+    parser.add_argument('--filemode', action="store_true",
+            help="Set attribute to filemode."+
+            " Instead of specifying attributes, put a path to a file containing a list of attributes."+
+            " The file contains one attribute per line.")
+
+    parser.add_argument('attributes', type=str, nargs='+',
+                        help="List of attributes to extract. Full tango path.")
+
+    args = parser.parse_args()
+
+
+    #######################################################
+    # Configure logger
+
+    # Add a stream handler
+    s_handler = logging.StreamHandler()
+    s_handler.setFormatter(logging.Formatter("%(levelname)s\t[%(funcName)s] \t%(message)s"))
+
+    # Set level according to command line attribute
+    s_handler.setLevel(level=getattr(logging, args.log.upper()))
+    logger.setLevel(level=getattr(logging, args.log.upper()))
+    logger.addHandler(s_handler)
+
+    logger.debug("Parsed arguments: %s"%args)
+
+    logger.info("Archive Extractor %s"%__version__)
+
+    #######################################################
+    # Filemode or not
+    if args.filemode:
+        logger.info("Filemode, openning file %s"%args.attributes[0])
+        # Read the file. Each line is an attribute
+        with open(args.attributes[0], "r") as fp:
+            attributes = fp.readlines()
+
+        logger.debug("Read lines : %s"%attributes)
+
+        # Clean end of line
+        for i_a in range(len(attributes)):
+            attributes[i_a] = attributes[i_a].rstrip()
+
+    else:
+        attributes = args.attributes
+
+    #######################################################
+    # Select Extractor
+    if args.DB == "L":
+        extractor = "archiving/extractor/%d"%(args.DBN)
+    else:
+        extractor = "archiving/%sDBExtractor/%d"%(args.DB, args.DBN)
+
+    #######################################################
+    # Prepare dictionnary for result
+    results = dict()
+
+    #######################################################
+    # Extract from database
+    logger.info("Extract from %s to %s."%(args.dateStart, args.dateStop))
+
+    for attr in attributes:
+        logger.info("Extracting attribute %s..."%attr)
+
+        for attempt in range(3):
+            try:
+                datevalue = query_ADB_BetweenDates(attr, args.dateStart, args.dateStop, extractor)
+
+                # Add to result dictionnary
+                results[attr] = datevalue
+
+            except ValueError as e:
+                logger.debug("ErrorMsg: %s"%e)
+                logger.warning("Failed to extract %s. Skipping..."%attr)
+            except (tango.CommunicationFailed, tango.DevFailed) as e:
+                # retry
+                logger.debug("ErrorMsg: %s"%e)
+                logger.warning("Failed to extract %s. Retry..."%attr)
+            break
+
+        else:
+            logger.error("The device %s might have crash.\n"%extractor+
+                    "You should check with Jive and probably restart with Astor.\n")
+
+        # Save all at each step
+        np.save(args.fileout, results)
+
+    logger.info("Extraction done, saved in file %s"%args.fileout)
+
+else:
+    # Name the logger after the module name
+    logger = logging.getLogger(__name__)