Code development platform for open source projects from the European Union institutions

Skip to content
Snippets Groups Projects
main.py 796 B
Newer Older
Thomas Vliagkoftis's avatar
Thomas Vliagkoftis committed
#!/usr/bin/env/ python

from config import Config
from utils import save_to_parquet
from db import eea_raw_data
from munch import Munch
from collections import defaultdict

import pandas as pd

file_config = Munch(Config.EEA_2018)

chunks = pd.read_csv(
    '/eos/jeodpp/data/projects/LEGENT/transfer/EEA_passenger_cars_2018_final_test.csv',
    delimiter=file_config.delimiter,
    header=0,
    dtype=defaultdict(lambda: object, file_config.column_properties.iloc[:, 1:3].values),
    names=file_config.column_properties.iloc[:, 0].values,
    chunksize=100000,
    encoding=file_config.encoding,
    index_col=False,
    low_memory=False
)

for df in chunks:

    del df['id']
    df['file_year'] = file_config.year
    df['version'] = 1.0

    eea_raw_data.insert_many(df.to_dict('records'))