From 1b239ddb9af1f290219b0e26fceee47becaace03 Mon Sep 17 00:00:00 2001 From: Thomas Vliagkoftis <thomas.vliagkoftis@gmail.com> Date: Tue, 5 Jul 2022 10:24:41 +0200 Subject: [PATCH] add realworld data impoerters --- importers/gecoair_importer.py | 49 ++++++++++++++++++++++++++++++ importers/spritmonitor_importer.py | 46 ++++++++++++++++++++++++++++ importers/travelcard_importer.py | 46 ++++++++++++++++++++++++++++ 3 files changed, 141 insertions(+) create mode 100755 importers/gecoair_importer.py create mode 100755 importers/spritmonitor_importer.py create mode 100755 importers/travelcard_importer.py diff --git a/importers/gecoair_importer.py b/importers/gecoair_importer.py new file mode 100755 index 0000000..d7982c2 --- /dev/null +++ b/importers/gecoair_importer.py @@ -0,0 +1,49 @@ +#!/usr/bin/env/ python + +from collections import defaultdict + +import pandas as pd +from munch import Munch + +from src.uds4jrc.config import Config +from src.uds4jrc.db import geco_data, geco_reference + +file_config = Munch(Config.GECO_2021) + + +def _import_data(): + df = pd.read_excel( + file_config.file_path, + header=0, + dtype=defaultdict( + lambda: object, file_config.column_properties.iloc[:, 1:3].values), + names=file_config.column_properties.iloc[:, 0].values, + index_col=False + ) + + del df['id'] + df['file_year'] = file_config.year + df['version'] = 1.0 + + # Need to calculate the year for Geco Air 2021 data only + df['year'] = df.apply (lambda row: str(row['date'])[0:4], axis=1) + + geco_data.insert_many(df.to_dict('records')) + + +def _import_reference_data(): + df = file_config.column_properties + reference_docs = [] + + for i in range(len(df)): + reference_docs.append({ + 'db_property_name': df.iloc[i, 0], + 'file_property_name': df.iloc[i, 1], + 'file_year': file_config.year + }) + + geco_reference.insert_many(reference_docs) + + +_import_data() +# _import_reference_data() diff --git a/importers/spritmonitor_importer.py b/importers/spritmonitor_importer.py new file mode 100755 index 0000000..8ba7a8e --- /dev/null +++ b/importers/spritmonitor_importer.py @@ -0,0 +1,46 @@ +#!/usr/bin/env/ python + +from collections import defaultdict + +import pandas as pd +from munch import Munch + +from src.uds4jrc.config import Config +from src.uds4jrc.db import spritmonitor_data, spritmonitor_reference + +file_config = Munch(Config.SPRITMONITOR_2021) + + +def _import_data(): + df = pd.read_excel( + file_config.file_path, + header=0, + dtype=defaultdict( + lambda: object, file_config.column_properties.iloc[:, 1:3].values), + names=file_config.column_properties.iloc[:, 0].values, + index_col=False + ) + + del df['id'] + df['file_year'] = file_config.year + df['version'] = 1.0 + + spritmonitor_data.insert_many(df.to_dict('records')) + + +def _import_reference_data(): + df = file_config.column_properties + reference_docs = [] + + for i in range(len(df)): + reference_docs.append({ + 'db_property_name': df.iloc[i, 0], + 'file_property_name': df.iloc[i, 1], + 'file_year': file_config.year + }) + + spritmonitor_reference.insert_many(reference_docs) + + +_import_data() +_import_reference_data() diff --git a/importers/travelcard_importer.py b/importers/travelcard_importer.py new file mode 100755 index 0000000..d653ee1 --- /dev/null +++ b/importers/travelcard_importer.py @@ -0,0 +1,46 @@ +#!/usr/bin/env/ python + +from collections import defaultdict + +import pandas as pd +from munch import Munch + +from src.uds4jrc.config import Config +from src.uds4jrc.db import travelcard_data, travelcard_reference + +file_config = Munch(Config.TRAVELCARD_DIESEL) + + +def _import_data(): + df = pd.read_excel( + file_config.file_path, + header=0, + dtype=defaultdict( + lambda: object, file_config.column_properties.iloc[:, 1:3].values), + names=file_config.column_properties.iloc[:, 0].values, + index_col=False + ) + + df['file_data_category'] = file_config.file_data_category + df['file_year'] = file_config.year + df['version'] = 1.0 + + travelcard_data.insert_many(df.to_dict('records')) + + +def _import_reference_data(): + df = file_config.column_properties + reference_docs = [] + + for i in range(len(df)): + reference_docs.append({ + 'db_property_name': df.iloc[i, 0], + 'file_property_name': df.iloc[i, 1], + 'file_data_category': file_config.file_data_category + }) + + travelcard_reference.insert_many(reference_docs) + + +_import_data() +_import_reference_data() -- GitLab