From e1c8dcc7d9b2b6a8c58dc70cf0288c5d63bcba83 Mon Sep 17 00:00:00 2001 From: Thomas Vliagkoftis <thomas.vliagkoftis@gmail.com> Date: Tue, 5 Jul 2022 10:23:41 +0200 Subject: [PATCH] enh: README.md, environment.yml, db.py and config.py --- README.md | 14 ++- environment.yml | 2 + src/uds4jrc/config.py | 279 ++++++++++++++++++++++++++++++++++++++++++ src/uds4jrc/db.py | 8 ++ 4 files changed, 299 insertions(+), 4 deletions(-) diff --git a/README.md b/README.md index 86b057d..ebdfa9f 100644 --- a/README.md +++ b/README.md @@ -13,6 +13,8 @@ preloaded in *eos-dirs* (read section below about Nextcloud mapping). * data from those C4 projects & activities: * EEA - vehicle registrations in EU, reported by MSs & packaged by EEA (raw data) * Fiat500x - campaign with OBD data from JRC "amateur" drivers - TODO + * RealWorld (Travelcard, Geco Air, Spritmonitor) + * ATCT * DICE - TODO * commercial vehicle specs - TODO * ... @@ -168,7 +170,11 @@ save_to_excel(df, Config.UEOS, 'test.xlsx') ## 3. Data -| Name | Type | Path | -|:---------|:-------------:|:---------------------------------------:| -| EEA |raw data | /eos/jeodpp/data/projects/LEGENT/eea | -| f500x |processed | /eos/jeodpp/data/projects/LEGENT/fiat500| \ No newline at end of file +| Name | Type | Path | +|:-------------|:---------:|:-------------------------------------------------------:| +| EEA | raw data | /eos/jeodpp/data/projects/LEGENT/eea | +| f500x | processed | /eos/jeodpp/data/projects/LEGENT/fiat500 | +| Geco Air | raw data | /eos/jeodpp/data/projects/LEGENT/realworld/geco | +| Travelcard | raw data | /eos/jeodpp/data/projects/LEGENT/realworld/travelcard | +| Spritmonitor | raw data | /eos/jeodpp/data/projects/LEGENT/realworld/spritmonitor | +| ATCT | raw data | /eos/jeodpp/data/projects/LEGENT/atct | \ No newline at end of file diff --git a/environment.yml b/environment.yml index 17433b5..a8ed3c6 100644 --- a/environment.yml +++ b/environment.yml @@ -18,4 +18,6 @@ dependencies: - jupyterlab - black - boltons + - openpyxl + - xlrd - -e . diff --git a/src/uds4jrc/config.py b/src/uds4jrc/config.py index bc50e8d..0c4c858 100755 --- a/src/uds4jrc/config.py +++ b/src/uds4jrc/config.py @@ -341,3 +341,282 @@ class Config(mysecrets.Secrets): ['fuel_consumption_rate', 'fcr [l/100km]', np.float16], ], columns=['db_names', 'names', 'coltype']) } + + GECO_2020 = { + 'year': 2020, + 'file_path': '/eos/jeodpp/data/projects/LEGENT/realworld/Geco_air_2020.xlsx', + 'encoding': 'utf-8', + 'column_properties': pd.DataFrame([ + ['id', '', np.int32], + ['commercial_name', 'Model_name ', np.object], + ['commercial_name_2', 'Model_name2', np.object], + ['commercial_name_details', 'Model_name_details', np.object], + ['oem', 'OEM', np.object], + ['fuel_mode', 'Powertrain_type', np.int32], + ['engine_capacity', 'Engine_displacement', np.int32], + ['gearbox_type', 'Gearbox_type', np.object], + ['engine_max_power', 'Car_power', np.int32], + ['year', 'Car_year', np.int32], + ['vehicle_segment', 'Car_segment', np.object], + ['co2_declared', 'CO2_approval', np.int32], + ['co2_real_world', 'CO2_estimated', np.int32], + ['mileage', 'Distance_cumulated', np.float16], + ['time', 'Time_cumulated', np.int32], + ['fuel_consumption_declared', 'FC_approval(l/100km)', np.float16], + ['fuel_consumption_real_world', 'FC_estimated(l/100km)', np.float16], + ['powetrain', 'Powetrain', np.object], + ], columns=['db_names', 'names', 'coltype']), + } + + GECO_2021 = { + 'year': 2021, + 'file_path': '/eos/jeodpp/data/projects/LEGENT/realworld/Geco_air_2021.xls', + 'encoding': 'utf-8', + 'column_properties': pd.DataFrame([ + ['id', '', np.int32], + ['trip_count', 'Trips_number ', np.int32], + ['time', 'Time_cumulated', np.float16], + ['mileage', 'Distance_cumulated', np.float16], + ['co2_real_world', 'CO2_estimated', np.float16], + ['vehicle_segment', 'Car_segment', np.object], + ['body_type', 'bodyType', np.object], + ['mass', 'emptyWeight', np.int32], + ['co2_declared', 'CO2_approval', np.int32], + ['engine_capacity', 'Engine_displacement', np.int32], + ['fuel_type', 'Car_energy', np.object], + ['engine_max_power', 'Car_power', np.float16], + ['emission_standard', 'Euro_norm', np.object], + ['gearbox_type', 'Gearbox_type', np.object], + ['date', 'Car_year', np.object], + ['fuel_mode', 'Powertrain_type', np.object], + ['oem', 'OEM', np.object], + ['commercial_name_2', 'Model_name2', np.object], + ['commercial_name_details', 'Model_name_details', np.object], + ['commercial_name', 'Model_name ', np.object], + ], columns=['db_names', 'names', 'coltype']), + } + + TRAVELCARD_PETROL = { + 'year': '-', + 'file_path': '/eos/jeodpp/data/projects/LEGENT/realworld/TravelcardPetrol4JRC.xlsx', + 'file_data_category': 'petrol', + 'encoding': 'utf-8', + 'column_properties': pd.DataFrame([ + ['fuel_consumption_real_world', 'realworld FC [l/100km]', np.float16], + ['fueling_events_count', 'number of fuelings', np.int32], + ['fuel_type', 'Fuel description', np.object], + ['engine_max_power', 'power [kW]', np.int32], + ['fuel_consumption_declared', 'NEDC [l/100km] ', np.float16], + ['co2_real_world', 'NEDC CO2 ICE (combined)[g/km]', np.int32], + ['co2_nedc_declared', 'NEDC CO2 hybrid (weighted) [g/km]', np.int32], + ['environmental_class_ec_approval', 'Environmental class EC Approval (light)', np.object], + ['oem', 'OEM', np.object], + ['commercial_name', 'Model', np.object], + ['engine_capacity', 'Engine size [cc]', np.int32], + ['mass', 'empty registration mass [kg]', np.int32], + ['type_approval_number', 'Type approval number', np.object], + ['variant', 'Variant', np.object], + ['uitvoering', 'Uitvoering', np.object], + ['year', 'registration year', np.int32], + ], columns=['db_names', 'names', 'coltype']), + } + + TRAVELCARD_DIESEL = { + 'year': '-', + 'file_path': '/eos/jeodpp/data/projects/LEGENT/realworld/TravelcardDiesel4JRC.xlsx', + 'file_data_category': 'diesel', + 'encoding': 'utf-8', + 'column_properties': pd.DataFrame([ + ['fuel_consumption_real_world', 'realworld FC [l/100km]', np.float16], + ['fueling_events_count', 'number of fuelings', np.int32], + ['fuel_type', 'Fuel description', np.object], + ['engine_max_power', 'power [kW]', np.int32], + ['fuel_consumption_declared', 'NEDC [l/100km] ', np.float16], + ['co2_real_world', 'NEDC CO2 ICE (combined)[g/km]', np.int32], + ['co2_nedc_declared', 'NEDC CO2 hybrid (weighted) [g/km]', np.int32], + ['environmental_class_ec_approval', 'Environmental class EC Approval (light)', np.object], + ['oem', 'OEM', np.object], + ['commercial_name', 'Model', np.object], + ['engine_capacity', 'Engine size [cc]', np.int32], + ['mass', 'empty registration mass [kg]', np.int32], + ['type_approval_number', 'Type approval number', np.object], + ['variant', 'Variant', np.object], + ['uitvoering', 'Uitvoering', np.object], + ['year', 'registration year', np.int32], + ], columns=['db_names', 'names', 'coltype']), + } + + SPRITMONITOR_2020 = { + 'year': 2020, + 'file_path': '/eos/jeodpp/data/projects/LEGENT/realworld/Spritmonitor_2020.xlsx', + 'encoding': 'utf-8', + 'column_properties': pd.DataFrame([ + ['id', 'VehicleID', np.int32], + ['oem_make', 'Make', np.object], + ['commercial_name', 'Model', np.object], + ['commercial_name_2', 'User_model_name', np.object], + ['engine_max_power', 'Power (kW)', np.int32], + ['year', 'ConstYear', np.int32], + ['fuel_mode', 'Fueltype', np.object], + ['gearbox_type', 'Gearing', np.object], + ['country', 'Country', np.object], + ['fuel_consumption_declared', 'Catalog Mileage (l/100km kg/100km kWh/100km)', np.float16], + ['fuel_type', 'Tank', np.object], + ['winter_tires_distance', 'Trip_wintertires (km)', np.float16], + ['summer_tires_distance', 'Trip_summertires (km)', np.float16], + ['all_year_tires_distance', 'Trip_allyeartires (km)', np.float16], + ['slow_trip_distance', 'Trip_slow (km)', np.float16], + ['normal_trip_distance', 'Trip_normal (km)', np.float16], + ['fast_trip_distance', 'Trip_fast (km)', np.float16], + ['ac_on_distance', 'Trip_ac (km)', np.float16], + ['with_trailer_distance', 'Trip_trailer (km)', np.float16], + ['motorway_distance', 'Trip_autobahn (km)', np.float16], + ['urban_distance', 'Trip_city (km)', np.float16], + ['rural_distance', 'Trip_country (km)', np.float16], + ['heating_on_distance', 'Trip_heating (km)', np.float16], + ['winter_tires_total_fuel', 'Quantity_wintertires (l kg kWh)', np.float16], + ['summer_tires_total_fuel', 'Quantity_summertires (l kg kWh)', np.float16], + ['all_year_tires_total_fuel', 'Quantity_allyeartires (l kg kWh)', np.float16], + ['slow_trip_total_fuel', 'Quantity_slow (l kg kWh)', np.float16], + ['normal_trip_total_fuel', 'Quantity_normal (l kg kWh)', np.float16], + ['fast_trip_total_fuel', 'Quantity_fast (l kg kWh)', np.float16], + ['ac_on_total_fuel', 'Quantity_ac (l kg kWh)', np.float16], + ['with_trailer_total_fuel', 'Quantity_trailer (l kg kWh)', np.float16], + ['motorway_total_fuel', 'Quantity_autobahn (l kg kWh)', np.float16], + ['urban_total_fuel', 'Quantity_city (l kg kWh)', np.float16], + ['rural_total_fuel', 'Quantity_country (l kg kWh)', np.float16], + ['heating_on_total_fuel', 'Quantity_heating (l kg kWh)', np.float16], + ['mileage', 'Trip (km)', np.float16], + ['total_fuel', 'Quantity (l kg kWh)', np.float16], + ], columns=['db_names', 'names', 'coltype']), + } + + SPRITMONITOR_2021 = { + 'year': 2021, + 'file_path': '/eos/jeodpp/data/projects/LEGENT/realworld/Spritmonitor_2021.xlsx', + 'encoding': 'utf-8', + 'column_properties': pd.DataFrame([ + ['id', 'VehicleID', np.int32], + ['oem_make', 'Make', np.object], + ['commercial_name', 'Model', np.object], + ['commercial_name_2', 'User_model_name', np.object], + ['engine_max_power', 'Power (kW)', np.int32], + ['year', 'ConstYear', np.int32], + ['fuel_mode', 'Fueltype', np.object], + ['gearbox_type', 'Gearing', np.object], + ['country', 'Country', np.object], + ['fuel_consumption_declared', 'Catalog Mileage (l/100km, kg/100km, kWh/100km)', np.float16], + ['fuel_type', 'Tank', np.object], + ['winter_tires_distance', 'Trip_wintertires (km)', np.float16], + ['summer_tires_distance', 'Trip_summertires (km)', np.float16], + ['all_year_tires_distance', 'Trip_allyeartires (km)', np.float16], + ['slow_trip_distance', 'Trip_slow (km)', np.float16], + ['normal_trip_distance', 'Trip_normal (km)', np.float16], + ['fast_trip_distance', 'Trip_fast (km)', np.float16], + ['ac_on_distance', 'Trip_ac (km)', np.float16], + ['with_trailer_distance', 'Trip_trailer (km)', np.float16], + ['motorway_distance', 'Trip_autobahn (km)', np.float16], + ['urban_distance', 'Trip_city (km)', np.float16], + ['rural_distance', 'Trip_country (km)', np.float16], + ['heating_on_distance', 'Trip_heating (km)', np.float16], + ['winter_tires_total_fuel', 'Quantity_wintertires (l, kg, kWh)', np.float16], + ['summer_tires_total_fuel', 'Quantity_summertires (l, kg, kWh)', np.float16], + ['all_year_tires_total_fuel', 'Quantity_allyeartires (l, kg, kWh)', np.float16], + ['slow_trip_total_fuel', 'Quantity_slow (l, kg, kWh)', np.float16], + ['normal_trip_total_fuel', 'Quantity_normal (l, kg, kWh)', np.float16], + ['fast_trip_total_fuel', 'Quantity_fast (l, kg, kWh)', np.float16], + ['ac_on_total_fuel', 'Quantity_ac (l, kg, kWh)', np.float16], + ['with_trailer_total_fuel', 'Quantity_trailer (l, kg, kWh)', np.float16], + ['motorway_total_fuel', 'Quantity_autobahn (l, kg, kWh)', np.float16], + ['urban_total_fuel', 'Quantity_city (l, kg, kWh)', np.float16], + ['rural_total_fuel', 'Quantity_country (l, kg, kWh)', np.float16], + ['heating_on_total_fuel', 'Quantity_heating (l, kg, kWh)', np.float16], + ['mileage', 'Trip (km)', np.float16], + ['total_fuel', 'Quantity (l, kg, kWh)', np.float16], + ], columns=['db_names', 'names', 'coltype']), + } + + ATCT_LUXEMBURG = { + 'year': 2021, + 'file_path': '/eos/jeodpp/data/projects/LEGENT/atct/ATCT_SNCH_20210426_Luxembourg.xlsx', + 'data': 'Luxemburg', + 'encoding': 'utf-8', + 'skip_rows': 0, + 'column_properties': pd.DataFrame([ + ['atct_family', 'ATCT_Family ID', np.object], + ['co2_14_celsius', 'CO2_14', np.float16], + ['co2_23_celsius', 'CO2_23', np.float16], + ['family_correction_factor', 'FCF', np.float16], + ['type', 'Type', np.object], + ['emission_approval_number_extension', 'Emission approval number and extension', np.object], + ['part_of_atct_family_1', '', np.object], + ['part_of_atct_family_2', '', np.object], + ['part_of_atct_family_3', '', np.object], + ], columns=['db_names', 'names', 'coltype']) + } + + ATCT_IRISH = { + 'year': 2021, + 'file_path': '/eos/jeodpp/data/projects/LEGENT/atct/20210504_Export_ATCT_data_Irish.xlsx', + 'data': 'Irish', + 'encoding': 'utf-8', + 'skip_rows': 0, + 'column_properties': pd.DataFrame([ + ['ip_family', 'corresponding IP-Family', np.object], + ['atct_family', 'ATCT_Family ID', np.object], + ['family_correction_factor', 'FCF', np.float16], + ['co2_23_celsius', 'CO2 23°C test', np.float16], + ['co2_14_celsius', 'CO2 14°C test', np.float16], + ], columns=['db_names', 'names', 'coltype']) + } + + ATCT_IDIADA = { + 'year': 2021, + 'file_path': '/eos/jeodpp/data/projects/LEGENT/atct/e9_ATCT_IDIADA.xlsx', + 'data': 'Idiada', + 'encoding': 'utf-8', + 'skip_rows': 3, + 'column_properties': pd.DataFrame([ + ['ip_family', 'Interpolation family', np.object], + ['emission_approval_number_extension', 'Approval number', np.object], + ['co2_14_celsius', 'CO2 14°C test', np.float16], + ['co2_23_celsius', 'CO2 23°C test', np.float16], + ['family_correction_factor', 'Family correction factor (FCF)', np.float16], + ], columns=['db_names', 'names', 'coltype']), + } + + INTA_COLUMNS = pd.DataFrame([ + ['ip_family', 'Tipo', np.object], + ['emission_approval_number_extension', 'Contraseña', np.object], + ['fuel_type', 'FUEL', np.object], + ['co2_14_celsius', 'ATCT (14ºC) MCO2 Treg', np.float16], + ['co2_23_celsius', 'Type 1 (23ºC) M CO2 23ºC', np.float16], + ['family_correction_factor', 'Family correction factor (FCF)', np.float16], + ], columns=['db_names', 'names', 'coltype']) + + ATCT_INTA_1 = { + 'year': 2021, + 'file_path': '/eos/jeodpp/data/projects/LEGENT/atct/e9_ATCT_INTA_1.xlsx', + 'data': 'Inta_1', + 'encoding': 'utf-8', + 'skip_rows': 1, + 'column_properties': INTA_COLUMNS + } + + ATCT_INTA_2 = { + 'year': 2021, + 'file_path': '/eos/jeodpp/data/projects/LEGENT/atct/e9_ATCT_INTA_2.xlsx', + 'data': 'Inta_2', + 'encoding': 'utf-8', + 'skip_rows': 1, + 'column_properties': INTA_COLUMNS + } + + ATCT_INTA_3 = { + 'year': 2021, + 'file_path': '/eos/jeodpp/data/projects/LEGENT/atct/e9_ATCT_INTA_3.xlsx', + 'data': 'Inta_3', + 'encoding': 'utf-8', + 'skip_rows': 1, + 'column_properties': INTA_COLUMNS + } diff --git a/src/uds4jrc/db.py b/src/uds4jrc/db.py index 36f01f8..3b1c9d1 100755 --- a/src/uds4jrc/db.py +++ b/src/uds4jrc/db.py @@ -15,10 +15,18 @@ db = client["legent_db"] # collections definitions eea_raw_data = db["eea_raw_data"] fiat_data = db["fiat_500_data"] +geco_data = db["geco_air_data"] +spritmonitor_data = db["spritmonitor_data"] +travelcard_data = db["travelcard_data"] +atct_data = db["atct_data"] # reference collections definitions eea_reference = db["eea_properties_reference"] fiat_reference = db["fiat_500_properties_reference"] +geco_reference = db["geco_air_properties_reference"] +travelcard_reference = db["travelcard_properties_reference"] +spritmonitor_reference = db["spritmonitor_properties_reference"] +atct_reference = db["atct_properties_reference"] # EEA views definitions eea_2013_flattened = db["eea_2013_flattened"] -- GitLab