From e1c8dcc7d9b2b6a8c58dc70cf0288c5d63bcba83 Mon Sep 17 00:00:00 2001
From: Thomas Vliagkoftis <thomas.vliagkoftis@gmail.com>
Date: Tue, 5 Jul 2022 10:23:41 +0200
Subject: [PATCH] enh: README.md, environment.yml, db.py and config.py

---
 README.md             |  14 ++-
 environment.yml       |   2 +
 src/uds4jrc/config.py | 279 ++++++++++++++++++++++++++++++++++++++++++
 src/uds4jrc/db.py     |   8 ++
 4 files changed, 299 insertions(+), 4 deletions(-)

diff --git a/README.md b/README.md
index 86b057d..ebdfa9f 100644
--- a/README.md
+++ b/README.md
@@ -13,6 +13,8 @@ preloaded in *eos-dirs* (read section below about Nextcloud mapping).
 * data from those C4 projects & activities:
   * EEA - vehicle registrations in EU, reported by MSs & packaged by EEA (raw data)
   * Fiat500x - campaign with OBD data from JRC "amateur" drivers - TODO
+  * RealWorld (Travelcard, Geco Air, Spritmonitor)
+  * ATCT 
   * DICE - TODO
   * commercial vehicle specs - TODO
   * ...
@@ -168,7 +170,11 @@ save_to_excel(df, Config.UEOS, 'test.xlsx')
 
 ## 3. Data
 
-| Name     |      Type     |  Path                                   |
-|:---------|:-------------:|:---------------------------------------:|
-| EEA      |raw data       | /eos/jeodpp/data/projects/LEGENT/eea    |
-| f500x    |processed      | /eos/jeodpp/data/projects/LEGENT/fiat500|
\ No newline at end of file
+| Name         |   Type    |                          Path                           |
+|:-------------|:---------:|:-------------------------------------------------------:|
+| EEA          | raw data  |          /eos/jeodpp/data/projects/LEGENT/eea           |
+| f500x        | processed |        /eos/jeodpp/data/projects/LEGENT/fiat500         |
+| Geco Air     | raw data  |     /eos/jeodpp/data/projects/LEGENT/realworld/geco     |
+| Travelcard   | raw data  |  /eos/jeodpp/data/projects/LEGENT/realworld/travelcard  |
+| Spritmonitor | raw data  | /eos/jeodpp/data/projects/LEGENT/realworld/spritmonitor |
+| ATCT         | raw data  |        /eos/jeodpp/data/projects/LEGENT/atct            |
\ No newline at end of file
diff --git a/environment.yml b/environment.yml
index 17433b5..a8ed3c6 100644
--- a/environment.yml
+++ b/environment.yml
@@ -18,4 +18,6 @@ dependencies:
     - jupyterlab
     - black
     - boltons
+    - openpyxl
+    - xlrd
     - -e .
diff --git a/src/uds4jrc/config.py b/src/uds4jrc/config.py
index bc50e8d..0c4c858 100755
--- a/src/uds4jrc/config.py
+++ b/src/uds4jrc/config.py
@@ -341,3 +341,282 @@ class Config(mysecrets.Secrets):
             ['fuel_consumption_rate', 'fcr [l/100km]', np.float16],
         ], columns=['db_names', 'names', 'coltype'])
     }
+
+    GECO_2020 = {
+        'year': 2020,
+        'file_path': '/eos/jeodpp/data/projects/LEGENT/realworld/Geco_air_2020.xlsx',
+        'encoding': 'utf-8',
+        'column_properties': pd.DataFrame([
+            ['id', '', np.int32],
+            ['commercial_name', 'Model_name ', np.object],
+            ['commercial_name_2', 'Model_name2', np.object],
+            ['commercial_name_details', 'Model_name_details', np.object],
+            ['oem', 'OEM', np.object],
+            ['fuel_mode', 'Powertrain_type', np.int32],
+            ['engine_capacity', 'Engine_displacement', np.int32],
+            ['gearbox_type', 'Gearbox_type', np.object],
+            ['engine_max_power', 'Car_power', np.int32],
+            ['year', 'Car_year', np.int32],
+            ['vehicle_segment', 'Car_segment', np.object],
+            ['co2_declared', 'CO2_approval', np.int32],
+            ['co2_real_world', 'CO2_estimated', np.int32],
+            ['mileage', 'Distance_cumulated', np.float16],
+            ['time', 'Time_cumulated', np.int32],
+            ['fuel_consumption_declared', 'FC_approval(l/100km)', np.float16],
+            ['fuel_consumption_real_world', 'FC_estimated(l/100km)', np.float16],
+            ['powetrain', 'Powetrain', np.object],
+        ], columns=['db_names', 'names', 'coltype']),
+    }
+
+    GECO_2021 = {
+        'year': 2021,
+        'file_path': '/eos/jeodpp/data/projects/LEGENT/realworld/Geco_air_2021.xls',
+        'encoding': 'utf-8',
+        'column_properties': pd.DataFrame([
+            ['id', '', np.int32],
+            ['trip_count', 'Trips_number ', np.int32],
+            ['time', 'Time_cumulated', np.float16],
+            ['mileage', 'Distance_cumulated', np.float16],
+            ['co2_real_world', 'CO2_estimated', np.float16],
+            ['vehicle_segment', 'Car_segment', np.object],
+            ['body_type', 'bodyType', np.object],
+            ['mass', 'emptyWeight', np.int32],
+            ['co2_declared', 'CO2_approval', np.int32],
+            ['engine_capacity', 'Engine_displacement', np.int32],
+            ['fuel_type', 'Car_energy', np.object],
+            ['engine_max_power', 'Car_power', np.float16],
+            ['emission_standard', 'Euro_norm', np.object],
+            ['gearbox_type', 'Gearbox_type', np.object],
+            ['date', 'Car_year', np.object],
+            ['fuel_mode', 'Powertrain_type', np.object],
+            ['oem', 'OEM', np.object],
+            ['commercial_name_2', 'Model_name2', np.object],
+            ['commercial_name_details', 'Model_name_details', np.object],
+            ['commercial_name', 'Model_name ', np.object],
+        ], columns=['db_names', 'names', 'coltype']),
+    }
+
+    TRAVELCARD_PETROL = {
+        'year': '-',
+        'file_path': '/eos/jeodpp/data/projects/LEGENT/realworld/TravelcardPetrol4JRC.xlsx',
+        'file_data_category': 'petrol',
+        'encoding': 'utf-8',
+        'column_properties': pd.DataFrame([
+            ['fuel_consumption_real_world', 'realworld FC [l/100km]', np.float16],
+            ['fueling_events_count', 'number of fuelings', np.int32],
+            ['fuel_type', 'Fuel description', np.object],
+            ['engine_max_power', 'power [kW]', np.int32],
+            ['fuel_consumption_declared', 'NEDC [l/100km] ', np.float16],
+            ['co2_real_world', 'NEDC CO2 ICE (combined)[g/km]', np.int32],
+            ['co2_nedc_declared', 'NEDC CO2 hybrid (weighted) [g/km]', np.int32],
+            ['environmental_class_ec_approval', 'Environmental class EC Approval (light)', np.object],
+            ['oem', 'OEM', np.object],
+            ['commercial_name', 'Model', np.object],
+            ['engine_capacity', 'Engine size [cc]', np.int32],
+            ['mass', 'empty registration mass [kg]', np.int32],
+            ['type_approval_number', 'Type approval number', np.object],
+            ['variant', 'Variant', np.object],
+            ['uitvoering', 'Uitvoering', np.object],
+            ['year', 'registration year', np.int32],
+        ], columns=['db_names', 'names', 'coltype']),
+    }
+
+    TRAVELCARD_DIESEL = {
+        'year': '-',
+        'file_path': '/eos/jeodpp/data/projects/LEGENT/realworld/TravelcardDiesel4JRC.xlsx',
+        'file_data_category': 'diesel',
+        'encoding': 'utf-8',
+        'column_properties': pd.DataFrame([
+            ['fuel_consumption_real_world', 'realworld FC [l/100km]', np.float16],
+            ['fueling_events_count', 'number of fuelings', np.int32],
+            ['fuel_type', 'Fuel description', np.object],
+            ['engine_max_power', 'power [kW]', np.int32],
+            ['fuel_consumption_declared', 'NEDC [l/100km] ', np.float16],
+            ['co2_real_world', 'NEDC CO2 ICE (combined)[g/km]', np.int32],
+            ['co2_nedc_declared', 'NEDC CO2 hybrid (weighted) [g/km]', np.int32],
+            ['environmental_class_ec_approval', 'Environmental class EC Approval (light)', np.object],
+            ['oem', 'OEM', np.object],
+            ['commercial_name', 'Model', np.object],
+            ['engine_capacity', 'Engine size [cc]', np.int32],
+            ['mass', 'empty registration mass [kg]', np.int32],
+            ['type_approval_number', 'Type approval number', np.object],
+            ['variant', 'Variant', np.object],
+            ['uitvoering', 'Uitvoering', np.object],
+            ['year', 'registration year', np.int32],
+        ], columns=['db_names', 'names', 'coltype']),
+    }
+
+    SPRITMONITOR_2020 = {
+        'year': 2020,
+        'file_path': '/eos/jeodpp/data/projects/LEGENT/realworld/Spritmonitor_2020.xlsx',
+        'encoding': 'utf-8',
+        'column_properties': pd.DataFrame([
+            ['id', 'VehicleID', np.int32],
+            ['oem_make', 'Make', np.object],
+            ['commercial_name', 'Model', np.object],
+            ['commercial_name_2', 'User_model_name', np.object],
+            ['engine_max_power', 'Power (kW)', np.int32],
+            ['year', 'ConstYear', np.int32],
+            ['fuel_mode', 'Fueltype', np.object],
+            ['gearbox_type', 'Gearing', np.object],
+            ['country', 'Country', np.object],
+            ['fuel_consumption_declared', 'Catalog Mileage (l/100km kg/100km kWh/100km)', np.float16],
+            ['fuel_type', 'Tank', np.object],
+            ['winter_tires_distance', 'Trip_wintertires (km)', np.float16],
+            ['summer_tires_distance', 'Trip_summertires (km)', np.float16],
+            ['all_year_tires_distance', 'Trip_allyeartires (km)', np.float16],
+            ['slow_trip_distance', 'Trip_slow (km)', np.float16],
+            ['normal_trip_distance', 'Trip_normal (km)', np.float16],
+            ['fast_trip_distance', 'Trip_fast (km)', np.float16],
+            ['ac_on_distance', 'Trip_ac (km)', np.float16],
+            ['with_trailer_distance', 'Trip_trailer (km)', np.float16],
+            ['motorway_distance', 'Trip_autobahn (km)', np.float16],
+            ['urban_distance', 'Trip_city (km)', np.float16],
+            ['rural_distance', 'Trip_country (km)', np.float16],
+            ['heating_on_distance', 'Trip_heating (km)', np.float16],
+            ['winter_tires_total_fuel', 'Quantity_wintertires (l kg kWh)', np.float16],
+            ['summer_tires_total_fuel', 'Quantity_summertires (l kg kWh)', np.float16],
+            ['all_year_tires_total_fuel', 'Quantity_allyeartires (l kg kWh)', np.float16],
+            ['slow_trip_total_fuel', 'Quantity_slow (l kg kWh)', np.float16],
+            ['normal_trip_total_fuel', 'Quantity_normal (l kg kWh)', np.float16],
+            ['fast_trip_total_fuel', 'Quantity_fast (l kg kWh)', np.float16],
+            ['ac_on_total_fuel', 'Quantity_ac (l kg kWh)', np.float16],
+            ['with_trailer_total_fuel', 'Quantity_trailer (l kg kWh)', np.float16],
+            ['motorway_total_fuel', 'Quantity_autobahn (l kg kWh)', np.float16],
+            ['urban_total_fuel', 'Quantity_city (l kg kWh)', np.float16],
+            ['rural_total_fuel', 'Quantity_country (l kg kWh)', np.float16],
+            ['heating_on_total_fuel', 'Quantity_heating (l kg kWh)', np.float16],
+            ['mileage', 'Trip (km)', np.float16],
+            ['total_fuel', 'Quantity (l kg kWh)', np.float16],
+        ], columns=['db_names', 'names', 'coltype']),
+    }
+
+    SPRITMONITOR_2021 = {
+        'year': 2021,
+        'file_path': '/eos/jeodpp/data/projects/LEGENT/realworld/Spritmonitor_2021.xlsx',
+        'encoding': 'utf-8',
+        'column_properties': pd.DataFrame([
+            ['id', 'VehicleID', np.int32],
+            ['oem_make', 'Make', np.object],
+            ['commercial_name', 'Model', np.object],
+            ['commercial_name_2', 'User_model_name', np.object],
+            ['engine_max_power', 'Power (kW)', np.int32],
+            ['year', 'ConstYear', np.int32],
+            ['fuel_mode', 'Fueltype', np.object],
+            ['gearbox_type', 'Gearing', np.object],
+            ['country', 'Country', np.object],
+            ['fuel_consumption_declared', 'Catalog Mileage (l/100km, kg/100km, kWh/100km)', np.float16],
+            ['fuel_type', 'Tank', np.object],
+            ['winter_tires_distance', 'Trip_wintertires (km)', np.float16],
+            ['summer_tires_distance', 'Trip_summertires (km)', np.float16],
+            ['all_year_tires_distance', 'Trip_allyeartires (km)', np.float16],
+            ['slow_trip_distance', 'Trip_slow (km)', np.float16],
+            ['normal_trip_distance', 'Trip_normal (km)', np.float16],
+            ['fast_trip_distance', 'Trip_fast (km)', np.float16],
+            ['ac_on_distance', 'Trip_ac (km)', np.float16],
+            ['with_trailer_distance', 'Trip_trailer (km)', np.float16],
+            ['motorway_distance', 'Trip_autobahn (km)', np.float16],
+            ['urban_distance', 'Trip_city (km)', np.float16],
+            ['rural_distance', 'Trip_country (km)', np.float16],
+            ['heating_on_distance', 'Trip_heating (km)', np.float16],
+            ['winter_tires_total_fuel', 'Quantity_wintertires (l, kg, kWh)', np.float16],
+            ['summer_tires_total_fuel', 'Quantity_summertires (l, kg, kWh)', np.float16],
+            ['all_year_tires_total_fuel', 'Quantity_allyeartires (l, kg, kWh)', np.float16],
+            ['slow_trip_total_fuel', 'Quantity_slow (l, kg, kWh)', np.float16],
+            ['normal_trip_total_fuel', 'Quantity_normal (l, kg, kWh)', np.float16],
+            ['fast_trip_total_fuel', 'Quantity_fast (l, kg, kWh)', np.float16],
+            ['ac_on_total_fuel', 'Quantity_ac (l, kg, kWh)', np.float16],
+            ['with_trailer_total_fuel', 'Quantity_trailer (l, kg, kWh)', np.float16],
+            ['motorway_total_fuel', 'Quantity_autobahn (l, kg, kWh)', np.float16],
+            ['urban_total_fuel', 'Quantity_city (l, kg, kWh)', np.float16],
+            ['rural_total_fuel', 'Quantity_country (l, kg, kWh)', np.float16],
+            ['heating_on_total_fuel', 'Quantity_heating (l, kg, kWh)', np.float16],
+            ['mileage', 'Trip (km)', np.float16],
+            ['total_fuel', 'Quantity (l, kg, kWh)', np.float16],
+        ], columns=['db_names', 'names', 'coltype']),
+    }
+
+    ATCT_LUXEMBURG = {
+        'year': 2021,
+        'file_path': '/eos/jeodpp/data/projects/LEGENT/atct/ATCT_SNCH_20210426_Luxembourg.xlsx',
+        'data': 'Luxemburg',
+        'encoding': 'utf-8',
+        'skip_rows': 0,
+        'column_properties': pd.DataFrame([
+            ['atct_family', 'ATCT_Family ID', np.object],
+            ['co2_14_celsius', 'CO2_14', np.float16],
+            ['co2_23_celsius', 'CO2_23', np.float16],
+            ['family_correction_factor', 'FCF', np.float16],
+            ['type', 'Type', np.object],
+            ['emission_approval_number_extension', 'Emission approval number and extension', np.object],
+            ['part_of_atct_family_1', '', np.object],
+            ['part_of_atct_family_2', '', np.object],
+            ['part_of_atct_family_3', '', np.object],
+        ], columns=['db_names', 'names', 'coltype'])
+    }
+
+    ATCT_IRISH = {
+        'year': 2021,
+        'file_path': '/eos/jeodpp/data/projects/LEGENT/atct/20210504_Export_ATCT_data_Irish.xlsx',
+        'data': 'Irish',
+        'encoding': 'utf-8',
+        'skip_rows': 0,
+        'column_properties': pd.DataFrame([
+            ['ip_family', 'corresponding IP-Family', np.object],
+            ['atct_family', 'ATCT_Family ID', np.object],
+            ['family_correction_factor', 'FCF', np.float16],
+            ['co2_23_celsius', 'CO2 23°C test', np.float16],
+            ['co2_14_celsius', 'CO2 14°C test', np.float16],
+        ], columns=['db_names', 'names', 'coltype'])
+    }
+
+    ATCT_IDIADA = {
+        'year': 2021,
+        'file_path': '/eos/jeodpp/data/projects/LEGENT/atct/e9_ATCT_IDIADA.xlsx',
+        'data': 'Idiada',
+        'encoding': 'utf-8',
+        'skip_rows': 3,
+        'column_properties': pd.DataFrame([
+            ['ip_family', 'Interpolation family', np.object],
+            ['emission_approval_number_extension', 'Approval number', np.object],
+            ['co2_14_celsius', 'CO2 14°C test', np.float16],
+            ['co2_23_celsius', 'CO2 23°C test', np.float16],
+            ['family_correction_factor', 'Family correction factor (FCF)', np.float16],
+        ], columns=['db_names', 'names', 'coltype']),
+    }
+
+    INTA_COLUMNS = pd.DataFrame([
+        ['ip_family', 'Tipo', np.object],
+        ['emission_approval_number_extension', 'Contraseña', np.object],
+        ['fuel_type', 'FUEL', np.object],
+        ['co2_14_celsius', 'ATCT (14ºC) MCO2 Treg', np.float16],
+        ['co2_23_celsius', 'Type 1 (23ºC) M CO2 23ºC', np.float16],
+        ['family_correction_factor', 'Family correction factor (FCF)', np.float16],
+    ], columns=['db_names', 'names', 'coltype'])
+
+    ATCT_INTA_1 = {
+        'year': 2021,
+        'file_path': '/eos/jeodpp/data/projects/LEGENT/atct/e9_ATCT_INTA_1.xlsx',
+        'data': 'Inta_1',
+        'encoding': 'utf-8',
+        'skip_rows': 1,
+        'column_properties': INTA_COLUMNS
+    }
+
+    ATCT_INTA_2 = {
+        'year': 2021,
+        'file_path': '/eos/jeodpp/data/projects/LEGENT/atct/e9_ATCT_INTA_2.xlsx',
+        'data': 'Inta_2',
+        'encoding': 'utf-8',
+        'skip_rows': 1,
+        'column_properties': INTA_COLUMNS
+    }
+
+    ATCT_INTA_3 = {
+        'year': 2021,
+        'file_path': '/eos/jeodpp/data/projects/LEGENT/atct/e9_ATCT_INTA_3.xlsx',
+        'data': 'Inta_3',
+        'encoding': 'utf-8',
+        'skip_rows': 1,
+        'column_properties': INTA_COLUMNS
+    }
diff --git a/src/uds4jrc/db.py b/src/uds4jrc/db.py
index 36f01f8..3b1c9d1 100755
--- a/src/uds4jrc/db.py
+++ b/src/uds4jrc/db.py
@@ -15,10 +15,18 @@ db = client["legent_db"]
 # collections definitions
 eea_raw_data = db["eea_raw_data"]
 fiat_data = db["fiat_500_data"]
+geco_data = db["geco_air_data"]
+spritmonitor_data = db["spritmonitor_data"]
+travelcard_data = db["travelcard_data"]
+atct_data = db["atct_data"]
 
 # reference collections definitions
 eea_reference = db["eea_properties_reference"]
 fiat_reference = db["fiat_500_properties_reference"]
+geco_reference = db["geco_air_properties_reference"]
+travelcard_reference = db["travelcard_properties_reference"]
+spritmonitor_reference = db["spritmonitor_properties_reference"]
+atct_reference = db["atct_properties_reference"]
 
 # EEA views definitions
 eea_2013_flattened = db["eea_2013_flattened"]
-- 
GitLab