Code development platform for open source projects from the European Union institutions :large_blue_circle: EU Login authentication by SMS has been phased out. To see alternatives please check here

Skip to content
Snippets Groups Projects
Commit cea57488 authored by Thomas Vliagkoftis's avatar Thomas Vliagkoftis
Browse files

change importers

parent abf015fb
Branches
No related tags found
No related merge requests found
#!/usr/bin/env/ python
from collections import defaultdict
import pandas as pd
from munch import Munch
from src.uds4jrc.config import Config
from src.uds4jrc.db import eea_raw_data, eea_reference
file_config = Munch(Config.EEA_2020)
def _import_data():
chunks = pd.read_csv(
'/eos/jeodpp/data/projects/LEGENT/transfer/EEA_passenger_cars_2019_final_AMI.csv',
delimiter=file_config.delimiter,
header=0,
dtype=defaultdict(
lambda: object, file_config.column_properties.iloc[:, 1:3].values),
names=file_config.column_properties.iloc[:, 0].values,
chunksize=100000,
encoding=file_config.encoding,
index_col=False,
low_memory=False
)
for df in chunks:
del df['id']
df['file_year'] = file_config.year
df['version'] = 1.0
eea_raw_data.insert_many(df.to_dict('records'))
def _import_reference_data():
df = file_config.column_properties
reference_docs = []
for i in range(len(df)):
reference_docs.append({
'db_property_name': df.iloc[i, 0],
'file_property_name': df.iloc[i, 1],
'file_year': file_config.year
})
eea_reference.insert_many(reference_docs)
_import_reference_data()
import pandas as pd
import numpy as np
import logging
import sys
from pandas.io.pytables import HDFStore
from boltons.setutils import IndexedSet as iset
from munch import Munch
from src.uds4jrc.config import Config
from src.uds4jrc.db import fiat_data, fiat_reference
def _import_reference_data():
df = file_config.column_properties
reference_docs = []
for i in range(len(df)):
reference_docs.append({
'db_property_name': df.iloc[i, 0],
'file_property_name': df.iloc[i, 1]
})
fiat_reference.insert_many(reference_docs)
def concat_levels(dfs, name=None, keys=None, **kws):
""":param list dfs: must contain *named* dataframes and series"""
if not keys:
keys=[df.name for df in dfs]
hdf = pd.concat(dfs, keys=keys, verify_integrity=True, **kws)
if name:
hdf.name = name
return hdf
def parse_driver_from_group(groups):
return groups.str.lstrip('/').str.partition('/')[0]
def calc_accum_fcr(df):
return (100 * df[c_fc].cumsum() / df[c_step].cumsum()).replace([np.inf, -np.inf], np.nan)
def enrich_h5_trip(dfs, group=None):
if group is None:
groups = dfs[c_group]
else:
dfs[c_group] = group # Potentially broadcasted.
groups = dfs[c_group]
dfs[c_group] = groups.astype('category')
dfs[c_driver] = parse_driver_from_group(groups).astype('category')
# Scale back some instantaneous signals according to frequency.
#
dfs[c_step] = dfs[c_v] / (3600 * freq)
dfs[c_fc] = dfs[c_ifcr_lh] / (3600 * freq)
dfs[c_ifcr] = (100 * dfs[c_fc] / dfs[c_step]).replace([np.inf, -np.inf], np.nan)
dfs[c_fcr] = calc_accum_fcr(dfs)
def read_df_from_groupname(hstore, gname, filter_cols=True, skip_enrich=False, n=0, totaln=0):
"""Used by mass-loading of trips, or interactively, for loading and enriching a specific trip."""
print("\rReading %i (of %i): %s... " % (n + 1, totaln, gname), file=sys.stderr, end='')
df = pd.read_hdf(hstore, gname)
df.name = gname
df.index.name = c_t
## Save time also as normal columns,
# not to be lost them by subsequet `set_index()` calls.
df[c_t] = df.index
if c_v not in df.columns:
log.warning('\nMissing velocity from: %s', gname)
return None
if filter_cols:
col_intersection = read_cols & df.columns
cols_not_in_df = read_cols - df.columns
filter_cols = (filter_cols
if isinstance(filter_cols, (tuple, list))
else read_cols)
df = df.loc[:, col_intersection]
for c in cols_not_in_df:
df[c] = np.nan
## Normalmennte, leave this for mass enrichment.
#
if not skip_enrich:
enrich_h5_trip(df, group=gname)
return df
pd.DataFrame._metadata=['name']
log = logging.getLogger('notebook')
freq = 10 # Hz
read_cols = iset([
'time', 'VehicleSpeedVSOSig', 'FuelConsumption_GAG', 'EngineSpeed', 'ExternalTemperature',
'Counter', 'Slope', 'Latitudine', 'Longitudine', 'Altitudine', 'BatteryCurrentLevel'#'MaxEngineTorque', 'EngineTorque'
])
l = [c_t, c_v, c_ifcr_lh, c_n, c_exttemp, c_count, c_sl, c_lat, c_long, c_alt, c_bat] = read_cols
c_dist, c_fc, c_fcr, c_ifcr, c_step, c_group, = ['D [km]', 'fc [l]', 'fcr [l/100km]', 'ifcr [l/100km]', 'dstep [km]', 'h5group']
c_driver = 'driver'
c_seg = 'seg'
c_pause = 'pause'
c_segt = 'seg_t'
drivers_map = {
'Adrian': 'ader',
'Andrea': 'anda',
'ALESSANDRO': 'ales',
'Arcidiacono': 'arko',
'Biagio': 'biag',
'Chiara': 'chia',
'DIMITRIS': 'dimi',
'ewelina': 'evel',
'Germana_Trentadue': 'gema',
'Heinrich': 'hein',
'Jelica': 'jeli',
'kostas': 'kost',
'Laura': 'lora',
'Marcos': 'marc',
'Mary': 'mary',
'Pavlovic': 'jela',
'Pirovano': 'piro',
'Silvio': 'silv',
'VictorValverde': 'vict',
'Zap': 'zapa',
}
h5tore_kws = dict(encoding='utf-8', complevel=6, complib='blosc')
#open_mode = 'a' # append (don't use 'w', will delete esisting store!)
open_mode = 'r' # read-only
fname='/eos/jeodpp/data/projects/LEGENT/fiat500/fiat500x.merged.h5' # Big 705MiB (673MB)
hstore = HDFStore(fname, open_mode, **h5tore_kws)
ntrips = 500
tripnames = list(hstore)[:ntrips]
dfs = [read_df_from_groupname(hstore, gname, skip_enrich=False, n=i, totaln=len(tripnames))
for i, gname in enumerate(tripnames)]
dfs = [df for df in dfs if df is not None] # Skip small adrian which have zero velocity everywhere.
dfs = concat_levels(dfs, names=[c_group, c_t])
dfs[c_group] = dfs[c_group].astype('category')
## Hide real driver names.
dfs[c_driver] = dfs[c_driver].map(drivers_map).astype('category')
assert not dfs[c_driver].isnull().any(), ("Forgotten drivername mappings:", dfs.loc[dfs[c_driver].isnull()])
file_config = Munch(Config.FIAT_500)
dfs.columns = file_config.column_properties.iloc[:, 0].values
fiat_data.insert_many(dfs.to_dict('records'))
# _import_reference_data()
\ No newline at end of file
#!/usr/bin/env python3
from collections import defaultdict
import pandas as pd
from munch import Munch
from uds4jrc.config import Config
from uds4jrc.db import eea_raw_data
file_config = Munch(Config.EEA_2019)
chunks = pd.read_csv(
'/eos/jeodpp/data/projects/LEGENT/transfer/EEA_passenger_cars_2019_final_AMI.csv',
delimiter=file_config.delimiter,
header=0,
dtype=defaultdict(
lambda: object, file_config.column_properties.iloc[:, 1:3].values),
names=file_config.column_properties.iloc[:, 0].values,
chunksize=100000,
encoding=file_config.encoding,
index_col=False,
low_memory=False
)
for df in chunks:
del df['id']
df['file_year'] = file_config.year
df['version'] = 1.0
eea_raw_data.insert_many(df.to_dict('records'))
0% Loading or .
You are about to add 0 people to the discussion. Proceed with caution.
Please register or to comment