diff --git a/.sagify.json b/.sagify.json
index 2d4be0ea75a44c8f0632fa524f646c3a9da96f4f..5a539159354e37aabff667926e86a33291ee67f3 100644
--- a/.sagify.json
+++ b/.sagify.json
@@ -5,6 +5,6 @@
     "python_version": "3.8",
     "requirements_dir": "requirements.txt",
     "sagify_module_dir": "src",
-	"experiment_id": "cpv_v0.0.1",
+	"experiment_id": "cpv_v0.0.2",
     "docker_image_base_url": "python/3.8"
 }
diff --git a/ci/train.sh b/ci/train.sh
index 7d884e5dcd9ccb0007a09e80a2f1fe055eb52d39..df3294c1b6669fb6deef4f9144c738aeb1adbbc2 100644
--- a/ci/train.sh
+++ b/ci/train.sh
@@ -33,7 +33,7 @@ job_arn=$(
     --role-arn "arn:aws:iam::528719223857:role/sagemaker_notebooks" \
     --input-data-config '{"ChannelName":"training","DataSource":{"S3DataSource":{"S3DataType":"S3Prefix","S3Uri":"'"$s3_training_data_prefix"'","S3DataDistributionType":"FullyReplicated"}}}' \
     --output-data-config "S3OutputPath=$s3_output_prefix" \
-    --resource-config "InstanceType=ml.m5.large,InstanceCount=1,VolumeSizeInGB=30" \
+    --resource-config "InstanceType=ml.m5.4xlarge,InstanceCount=1,VolumeSizeInGB=30" \
     --stopping-condition "MaxRuntimeInSeconds=86400" \
     --query TrainingJobArn \
     --region eu-west-1 \
diff --git a/src/sagify_base/local_test/test_dir/input/config/hyperparameters.json b/src/sagify_base/local_test/test_dir/input/config/hyperparameters.json
index c1ebf16a9c76c4248a75851db835c4741220bf3a..cbd2c5cb1a041dc111fc50e6ab134b300523aed9 100644
--- a/src/sagify_base/local_test/test_dir/input/config/hyperparameters.json
+++ b/src/sagify_base/local_test/test_dir/input/config/hyperparameters.json
@@ -1,3 +1,5 @@
 {
-  "c_param": "15.25"
+  "c_param": "7.9165",
+  "min_df": "1",
+  "max_df": "0.2063"
 }
diff --git a/src/sagify_base/prediction/prediction.py b/src/sagify_base/prediction/prediction.py
index efc2e332e29fd8d9c577d16b5a94a14d030691fa..0f927c7eca4f201ff81212629a6f04825964b268 100644
--- a/src/sagify_base/prediction/prediction.py
+++ b/src/sagify_base/prediction/prediction.py
@@ -8,10 +8,9 @@ import re
 
 _MODEL_PATH = os.path.join('/opt/ml/', 'model')  # Path where all your model(s) live in
 
-
-ALL_CPVS = ['85', '44', '50', '80', '73', '45', '71', '79', '90', '30', '35', '33', '55', '72', '48', '38', '09',
-            '75', '66', '64', '42', '34', '60', '92', '39', '31', '98', '51', '32', '65', '77', '22', '63', '15',
-            '70', '18', '03', '24', '43', '19', '41', '37', '14', '16', '76']
+ALL_CPVS = ['85', '44', '50', '80', '73', '45', '71', '79', '90', '30', '35', '33', '55', '72', '48', '38', '09', '75',
+            '66', '64', '42', '34', '60', '92', '39', '31', '98', '51', '32', '65', '77', '22', '63', '15', '70', '18',
+            '03', '24', '43', '19', '41', '37', '14', '16', '76']
 
 CPV_MAPPING = {
     "03": "Agricultural, farming, fishing, forestry and related products",
@@ -74,8 +73,6 @@ MONTHS = [" january ", " february ", " march ", " april ", " may ", " june ", "
           " jan ", " feb ", " mar ", " apr ", " jun ", " jul ", " aug ", " sep ", " oct ", " nov ", " dec "]
 
 
-
-
 def _remove_multiple_spaces(text: str) -> str:
     return re.sub('\s+', ' ', text)
 
diff --git a/src/sagify_base/training/training.py b/src/sagify_base/training/training.py
index b679b3ebbbcccb526d7cfab7a9fec62f37759822..0822eb4de0c9f3bbdd11cf4ac55695359043724a 100644
--- a/src/sagify_base/training/training.py
+++ b/src/sagify_base/training/training.py
@@ -63,25 +63,33 @@ def train(input_data_path: str, model_save_path: str, hyperparams_path: str = No
         raise ValueError(f"No file found in {input_data_folder}")
     raw_data = [pd.read_csv(file, index_col=0, engine="python") for file in input_files]
     df = pd.concat(raw_data)
-    print(f"Full dataset contains {len(df.index)} records.")
+    print(f"Full dataset contains {len(df)} records.")
     train_df, test_df = train_test_split(df, test_size=0.1)
 
-    x_train = train_df['title_texte']
+    x_train = train_df['title_texte'].values.astype(str)
     y_train = train_df.drop(['title_texte'], axis=1)
-    print(f"Training set contains {len(train_df.index)} records.")
+    print(f"Training set contains {len(train_df)} records.")
 
-    x_test = test_df['title_texte']
+    x_test = test_df['title_texte'].values.astype(str)
     y_test = test_df.drop(['title_texte'], axis=1)
-    print(f"Test set contains {len(x_test.index)} records.")
+    print(f"Test set contains {len(x_test)} records.")
 
     # Train model
     if "c_param" in hyperparameters.keys():
         c_param = float(hyperparameters.get("c_param"))
     else:
         raise Exception(f"c_param missing from hyper parameters config")
+    if "min_df" in hyperparameters.keys():
+        min_df = int(hyperparameters.get("min_df"))
+    else:
+        raise Exception(f"min_df missing from hyper parameters config")
+    if "max_df" in hyperparameters.keys():
+        max_df = float(hyperparameters.get("max_df"))
+    else:
+        raise Exception(f"max_df missing from hyper parameters config")
 
     svc_pipeline = Pipeline([
-        ('tfidf', TfidfVectorizer(ngram_range=(1, 3))),
+        ('tfidf', TfidfVectorizer(ngram_range=(1, 3), min_df=min_df, max_df=max_df)),
         ('clf', OneVsRestClassifier(LinearSVC(max_iter=10000, C=c_param, random_state=736283))),
     ])
     svc_pipeline.fit(x_train, y_train)