diff --git a/ArchiveExtractor.py b/ArchiveExtractor.py index 9c18186b72f316aa3fec309651dc7d659ad0748d..492777dfebcb2a01fe7cce92c6b6d10e3bb05b15 100755 --- a/ArchiveExtractor.py +++ b/ArchiveExtractor.py @@ -42,6 +42,13 @@ _DBDFMT2 = "%d-%m-%Y %H:%M:%S" # NOTE: it is faster than using pandas.to_datetime() _ArrayTimeStampToDatetime = np.vectorize(datetime.datetime.fromtimestamp) +# Vectorized bool map dictionnary +_ArrayStr2Bool = np.vectorize({ + "true":True, 't':True, + "false":False, 'f':False, + }.get) + + def _check_initialized(): """ Check if the module is initialized. @@ -243,6 +250,48 @@ def _cmd_with_retry(dp, cmd, arg, retry=2): break return cmdreturn + +def _cast_bool(value): + """ + Cast a value, or array of values, to boolean. + Try to assess the input data type. If string, then try to find true or false word inside. + + Parameters: + ----------- + value: string, integer, or array of such + value to convert. + + Return: + boolean: + value or array of boolean. + """ + + # Force to array + value = np.asarray(value) + + # cast back to single value + def castback(v): + if v.shape == (): + return v.item() + return v + + # Simply try to cast to bool first + try: + value = value.astype("bool") + logger.debug("Direct conversion to boolean") + return castback(value) + except ValueError: + # Keep trying to cast + pass + + logger.debug("Try to convert to boolean") + + value = np.char.strip(np.char.lower(value)) + value = _ArrayStr2Bool(value) + + return castback(value) + + ########################################################################## ### Module private variables ### ########################################################################## @@ -547,8 +596,12 @@ def _extract_scalar(attribute, method, date1, date2, db, dtype): logger.error("Could not extract this chunk. Check the device extractor") return None + # Transform by datatype + if dtype is bool: + _value = _cast_bool(_value) + # Fabricate return pandas.Series - d=pd.Series(index=[datetime.datetime.fromtimestamp(int(_date)/1000),], data=[float(_value),], name=attribute) + d=pd.Series(index=[datetime.datetime.fromtimestamp(int(_date)/1000),], data=[_value,], name=attribute) return d @@ -579,7 +632,7 @@ def _extract_scalar(attribute, method, date1, date2, db, dtype): # Transform to datetime - value arrays if dtype is bool: - _value = np.asarray([{"true":True, "false":False}[_a.lower()] for _a in _value]) + _value = _cast_bool(_value) else: _value = np.asarray(_value, dtype=dtype)