Enrico UBALDI
--- a/dsa_tdb/cli.py
+++ b/dsa_tdb/cli.py
+            logger.info(f"Extracting {tmp_zip_name}...")
+            with ZipFile(tmp_zip_name, "r") as zip_ref:
+                zip_ref.extractall(root_folder)
+            # Remove the zip file
+            os.remove(tmp_zip_name)
+            logger.info(f"Extracted {tmp_zip_name}.")
+    logger.info("Checking for missing days from the aggregates done.")
+
+    logger.info("Starting up the spark session")
+    spark = dsa_tdb.utils.spark_session_factory(
+        app_name="Advanced data factory",
+        memory_limit=memory_limit,
+        n_workers=n_workers,
+        spark_local_dir=spark_local_dir,
+    )
+    df_raw = spark.read.parquet(os.path.join(target_folder, CHUNKED_FILES_SUBFOLDER_NAME, "sor-*/part-*.parquet"))
--- a/dsa_tdb/cli.py
+++ b/dsa_tdb/cli.py
+            + T.ADVANCED_FILE_AGGREGATE_DATE_PATTERN.format(aggregation=T.TDB_agg_data_versions.complete)
+        ),
+        local_filename=agg_dates_file,
+        check_sha1=False,
+    )
+
+    dates_file_df = pd.read_csv(agg_dates_file)
+    days_from_agg_file = dates_file_df["date"].unique()
+
+    table["day_str"] = table["date_str"]
+    logger.debug(f"Dates from the aggregates: {days_from_agg_file}")
+    logger.debug(f"Dates from the table: {table['day_str'].values}")
+    missing_days = set(table["day_str"].values) - set(days_from_agg_file)
+    # Then remove the days that are already available locally
+    local_table_df = dsa_tdb.fetch.check_local_storage(root_folder=root_folder)
+    local_table_dates = local_table_df["date"].dt.strftime("%Y-%m-%d").values
--- a/dsa_tdb/cli.py
+++ b/dsa_tdb/cli.py
    "--loglevel", type=str, help="The logging level. [DEBUG|INFO|WARNING|ERROR|CRITICAL]", default="INFO"
 )

+# Advanced data preparation pipeline command:
+parserAdvancedData = subparsers.add_parser(