Newer
Older
import logging
import datetime
import numpy as np
import pandas as pd
import ArchiveExtractor as ae
import ArchiveExtractor.Amenities as aea
# Get the module logger
logger = logging.getLogger("ArchiveExtractor")
##########################################################################
### Module core functions ###
##########################################################################
def _extract_attribute(attribute, method, date1, date2, db):
"""
Check if exists, check scalar or spectrum and dispatch
"""
# Uncapitalize attribute
attribute = attribute.lower()
aea._check_attribute(attribute, db)
# Get info about the attribute
logger.debug("Attribute information \n%s"%info)
# Detect spectrum
attrtype="scalar"
if int(info["max_dim_x"]) > 1:
if int(info["max_dim_y"]) > 0:
logger.warning("Attribute %s is a (%s; %s) vector. This is poorly handled by this module."%(
attribute, info["max_dim_x"], info["max_dim_y"]))
attrtype="multi"
else:
logger.info("Attribute %s is a 1D vector, dimension = %s."%(
attribute, info["max_dim_x"]))
attrtype="vector"
# =============
# For now we handle multi dimension the same way as scalar, which will get only the first element
if (attrtype=="scalar") or (attrtype=="multi"):
if info["data_type"] == '1':
# Boolean data type, quick fix
dtype=bool
else:
dtype=float
return _extract_scalar(attribute, method, date1, date2, db, dtype)
if attrtype=="vector":
return _extract_vector(attribute, method, date1, date2, db)
##---------------------------------------------------------------------------##
def _extract_scalar(attribute, method, date1, date2, db, dtype):
# =====================
if method == "nearest":
cmdreturn = aea._cmd_with_retry(ae._Extractors[{'H':0, 'T':1}[db]], "GetNearestValue", [
attribute,
date1.strftime(aea._DBDFMT),
])
# Unpack return
try:
_date, _value = cmdreturn.split(';')
except TypeError:
logger.error("Could not extract this chunk. Check the device extractor")
return None
# Transform by datatype
if dtype is bool:
_value = _cast_bool(_value)
# Fabricate return pandas.Series
d=pd.Series(index=[datetime.datetime.fromtimestamp(int(_date)/1000),], data=[_value,], name=attribute)
return d
# =====================
if method == "between":
# Cut the time horizon in chunks
cdates = aea._chunkerize(attribute, date1, date2, db)
# Array to hold data
data = []
# For each date chunk
for i_d in range(len(cdates)-1):
cmdreturn = aea._cmd_with_retry(ae._Extractors[{'H':0, 'T':1}[db]], "ExtractBetweenDates", [
attribute,
cdates[i_d].strftime(aea._DBDFMT),
cdates[i_d+1].strftime(aea._DBDFMT)
])
# Unpack return
try:
_date, _value = cmdreturn
except TypeError:
logger.error("Could not extract this chunk. Check the device extractor")
return None
# Transform to datetime - value arrays
if dtype is bool:
else:
_value = np.asarray(_value, dtype=dtype)
if len(_date) > 0:
_date = aea._ArrayTimeStampToDatetime(_date/1000.0)
# Fabricate return pandas.Series
data.append(pd.Series(index=_date, data=_value, name=attribute))
# Concatenate chunks
return pd.concat(data)
# ========================
if method == "minmaxmean":
# If we are here, the method is not implemented
logger.error("Method {} is not implemented for scalars.".format(method))
raise NotImplemented
##---------------------------------------------------------------------------##
def _extract_vector(attribute, method, date1, date2, db):
# Get info about the attribute
# =====================
if method == "nearest":
# Get nearest does not work with vector.
# Make a between date with surounding dates.
# Dynamically find surounding
cnt=0
dt=datetime.timedelta(seconds=10)
while cnt<1:
logger.debug("Seeking points in {} to {}".format(date1-dt,date1+dt))
cnt=ae._Extractors[{'H':0, 'T':1}[db]].GetAttDataBetweenDatesCount([
attribute,
(date1-dt).strftime(aea._DBDFMT2),
(date1+dt).strftime(aea._DBDFMT2)
])
dt=dt*1.5
logger.debug("Found {} points in a +- {} interval".format(cnt,str(dt/1.5)))
# For vector, we have to use the GetAttxxx commands
cmdreturn = aea._cmd_with_retry(ae._Extractors[{'H':0, 'T':1}[db]], "GetAttDataBetweenDates", [
attribute,
(date1-dt).strftime(aea._DBDFMT),
(date1+dt).strftime(aea._DBDFMT),
])
# Unpack return
try:
[N,], [name,] = cmdreturn
N=int(N)
except TypeError:
logger.error("Could not extract this attribute. Check the device extractor")
return None
# Read the history
logger.debug("Retrieve history of %d values. Dynamic attribute named %s."%(N, name))
attrHist = ae._Extractors[{'H':0, 'T':1}[db]].attribute_history(name, N)
ae._Extractors[{'H':0, 'T':1}[db]].RemoveDynamicAttribute(name)
mx = min(int(info["max_dim_x"]), 2048) # Quick fix: Crop dimension
_value = np.empty((N, mx), dtype=float)
_value[:] = np.nan
_date = np.empty(N, dtype=object)
for i_h in range(N):
_value[i_h,:attrHist[i_h].dim_x]=attrHist[i_h].value
_date[i_h]=attrHist[i_h].time.todatetime()
# Seeking nearest entry
idx=np.argmin(abs(_date-date1))
logger.debug("Found nearest value at index {}: {}".format(idx, _date[idx]))
# Fabricate return pandas.Series, droping empty columns
d=pd.Series(index=[_date[idx],], data=[_value[idx],], name=attribute).dropna(axis=1, how='all')
193
194
195
196
197
198
199
200
201
202
203
204
205
206
207
208
209
210
211
212
213
214
215
216
217
218
219
220
221
222
223
224
225
226
227
228
229
230
231
232
233
234
235
236
237
238
239
240
241
242
# =====================
if method == "between":
# Cut the time horizon in chunks
cdates = aea._chunkerize(attribute, date1, date2, db)
# Array to hold data
data = []
# For each date chunk
for i_d in range(len(cdates)-1):
cmdreturn = aea._cmd_with_retry(ae._Extractors[{'H':0, 'T':1}[db]], "GetAttDataBetweenDates", [
attribute,
cdates[i_d].strftime(aea._DBDFMT),
cdates[i_d+1].strftime(aea._DBDFMT)
])
# Unpack return
try:
[N,], [name,] = cmdreturn
N=int(N)
except TypeError:
logger.error("Could not extract this attribute. Check the device extractor")
return None
# Read the history
logger.debug("Retrieve history of %d values. Dynamic attribute named %s."%(N, name))
attrHist = ae._Extractors[{'H':0, 'T':1}[db]].attribute_history(name, N)
ae._Extractors[{'H':0, 'T':1}[db]].RemoveDynamicAttribute(name)
# Transform to datetime - value arrays
mx = min(int(info["max_dim_x"]), 2048) # Quick fix: Crop dimension
_value = np.empty((N, mx), dtype=float)
_value[:] = np.nan
_date = np.empty(N, dtype=object)
for i_h in range(N):
_value[i_h,:attrHist[i_h].dim_x]=attrHist[i_h].value
_date[i_h]=attrHist[i_h].time.todatetime()
# Fabricate return pandas.Series
data.append(pd.DataFrame(index=_date, data=_value))
# Concatenate chunks, dropping empty columns
return pd.concat(data).dropna(axis=1, how='all')
# ========================
if method == "minmaxmean":
pass
243
244
245
246
247
248
249
250
251
252
253
254
255
256
257
258
259
260
261
262
263
264
265
266
267
268
269
270
271
272
273
274
275
276
277
278
279
280
281
282
283
284
285
286
287
288
289
290
291
292
293
294
295
296
297
298
299
300
301
302
303
304
305
306
307
308
309
310
311
312
313
314
315
316
317
318
319
320
321
322
323
324
325
326
327
328
329
330
331
332
333
334
335
336
337
338
339
340
341
342
343
344
345
346
347
348
349
350
351
352
353
354
355
356
357
358
359
360
# If we are here, the method is not implemented
logger.error("Method {} is not implemented for vectors.".format(method))
raise NotImplemented
##---------------------------------------------------------------------------##
def ExtrBetweenDates_MinMaxMean(
attribute,
dateStart,
dateStop=None,
timeInterval=datetime.timedelta(seconds=60),
db='H',
):
"""
Query attribute data from an archiver database, get all points between dates.
Use ExtractBetweenDates.
Parameters
----------
attribute : String
Name of the attribute. Full Tango name i.e. "test/dg/panda/current".
dateStart : datetime.datetime, string
Start date for extraction. If string, it will be parsed.
Example of string format %Y-%m-%d-%H:%M:%S or less precise.
dateStop : datetime.datetime, string
Stop date for extraction. If string, it will be parsed.
Example of string format %Y-%m-%d-%H:%M:%S or less precise.
Default is now (datetime.datetime.now())
timeInterval: datetime.timedelta, string
Time interval used to perform min,max and mean.
Can be a string with a number and a unit in "d", "h", "m" or "s"
db: str
Which database to look in, 'H' or 'T'.
Exceptions
----------
ValueError
The attribute is not found in the database.
Returns
-------
[mdates, value_min, value_max, value_mean] : array
mdates : numpy.ndarray of datetime.datime objects
Dates of the values, middle of timeInterval windows
value_min : numpy.ndarray
Minimum of the value on the interval
value_max : numpy.ndarray
Maximum of the value on the interval
value_mean : numpy.ndarray
Mean of the value on the interval
"""
if not _check_initialized():
return
if not db in ("H", "T"):
raise AttributeError("Attribute db should be 'H' or 'T'")
# Uncapitalize attribute
attribute = attribute.lower()
# Check attribute is in database
_check_attribute(attribute, db=db)
# Parse dates
dateStart = _dateparse(dateStart)
dateStop = _dateparse(dateStop)
# Parse timeInterval if string
if type(timeInterval) is str:
try:
mul = {'s':1, 'm':60, 'h':60*60, 'd':60*60*24}[timeInterval[-1]]
except KeyError:
logger.error("timeInterval could not be parsed")
raise ValueError("timeInterval could not be parsed")
timeInterval= datetime.timedelta(seconds=int(timeInterval[:-1])*mul)
# Get info about the attribute
info=infoattr(attribute)
logger.debug("Attribute information \n%s"%info)
# Detect spectrum
attrtype="scalar"
if int(info["max_dim_x"]) > 1:
logger.error("Attribute is not a scalar. Cannot perform this kind of operation.")
return None
# Cut data range in time chunks
cdates = [dateStart]
while cdates[-1] < dateStop:
cdates.append(cdates[-1]+timeInterval)
cdates[-1] = dateStop
mdates = np.asarray(cdates[:-1])+timeInterval/2
logger.debug("Cutting time range to %d chunks of time, %s each."%(len(cdates)-1, timeInterval))
# Prepare arrays
value_min = np.empty(len(cdates)-1)
value_max = np.empty(len(cdates)-1)
value_mean = np.empty(len(cdates)-1)
# For each time chunk
for i_d in range(len(cdates)-1):
for func, arr in zip(
["Max", "Min", "Avg"],
[value_max, value_min, value_mean],
):
# Make requests
logger.debug("Perform GetAttData%sBetweenDates (%s, %s, %s)"%(
func,
attribute,
cdates[i_d].strftime(_DBDFMT2),
cdates[i_d+1].strftime(_DBDFMT2))
)
_val =getattr(ae._Extractors[{'H':0, 'T':1}[db]], "GetAttData%sBetweenDates"%func)([
attribute,
cdates[i_d].strftime(_DBDFMT2),
cdates[i_d+1].strftime(_DBDFMT2)
])
arr[i_d] = _val
logger.debug("Extraction done for %s."%attribute)
return pd.DataFrame(
index=mdates,
data={
"Min":value_min,
"Mean":value_mean,
"Max":value_max,
},)