Newer
Older
#!/usr/bin/env/ python
from config import Config
from utils import save_to_parquet
from db import eea_raw_data
from munch import Munch
from collections import defaultdict
import pandas as pd
file_config = Munch(Config.EEA_2018)
chunks = pd.read_csv(
'/eos/jeodpp/data/projects/LEGENT/transfer/EEA_passenger_cars_2018_final_test.csv',
delimiter=file_config.delimiter,
header=0,
dtype=defaultdict(lambda: object, file_config.column_properties.iloc[:, 1:3].values),
names=file_config.column_properties.iloc[:, 0].values,
chunksize=100000,
encoding=file_config.encoding,
index_col=False,
low_memory=False
)
for df in chunks:
del df['id']
df['file_year'] = file_config.year
df['version'] = 1.0
eea_raw_data.insert_many(df.to_dict('records'))