From 12d7889ae49b1bf81eb7f20e482e9bb1b77ecef4 Mon Sep 17 00:00:00 2001
From: Felix Woestmann <felix.wostmann@edps.europa.eu>
Date: Tue, 11 Mar 2025 16:33:37 +0100
Subject: [PATCH 1/9] Add code to generate Report with DNT and DNT disabled
 output

---
 src/server/runCollection.ts | 1 +
 1 file changed, 1 insertion(+)

diff --git a/src/server/runCollection.ts b/src/server/runCollection.ts
index fda49bd..83fdb9f 100644
--- a/src/server/runCollection.ts
+++ b/src/server/runCollection.ts
@@ -3,6 +3,7 @@ import { Collector } from "../collector/index.js";
 import Inspector from "../inspector/inspector.js";
 import { Cookie } from "./server.js";
 import { Logger } from "winston";
+import { template } from "lodash";
 
 export interface RunCollectionArguments {
   website_url: string;
-- 
GitLab


From 9231e5b9d60b0b4c1347f6ce05051438c6549aec Mon Sep 17 00:00:00 2001
From: Felix Woestmann <felix.wostmann@edps.europa.eu>
Date: Tue, 11 Mar 2025 17:15:18 +0100
Subject: [PATCH 2/9] refactor: Add more types to Collector

---
 src/server/runCollection.ts | 1 -
 1 file changed, 1 deletion(-)

diff --git a/src/server/runCollection.ts b/src/server/runCollection.ts
index 83fdb9f..fda49bd 100644
--- a/src/server/runCollection.ts
+++ b/src/server/runCollection.ts
@@ -3,7 +3,6 @@ import { Collector } from "../collector/index.js";
 import Inspector from "../inspector/inspector.js";
 import { Cookie } from "./server.js";
 import { Logger } from "winston";
-import { template } from "lodash";
 
 export interface RunCollectionArguments {
   website_url: string;
-- 
GitLab


From ff01cd84a42d7e748e3ca288699be15e52106c6c Mon Sep 17 00:00:00 2001
From: Felix Woestmann <felix.wostmann@edps.europa.eu>
Date: Tue, 11 Mar 2025 16:51:24 +0100
Subject: [PATCH 3/9] refactor: reporter.ts and reporterCommand.ts

---
 src/commands/collectorCommand.ts |  29 +--
 src/commands/reporterCommand.ts  | 204 +++++---------------
 src/reporter/reporter.ts         | 321 ++++++++++++++++---------------
 src/server/runCollection.ts      |  10 +-
 4 files changed, 231 insertions(+), 333 deletions(-)

diff --git a/src/commands/collectorCommand.ts b/src/commands/collectorCommand.ts
index 9989154..1769bc5 100644
--- a/src/commands/collectorCommand.ts
+++ b/src/commands/collectorCommand.ts
@@ -9,7 +9,7 @@
 import { create } from "../lib/logger.js";
 import { CollectionResult, Collector } from "../collector/index.js";
 import Inspector from "../inspector/inspector.js";
-import { Reporter, ReporterArguments } from "../reporter/reporter.js";
+import { Reporter, ReporterOptions } from "../reporter/reporter.js";
 
 let collectorCommand = "collect";
 
@@ -186,7 +186,7 @@ async function runCollector(args: CollectorCommandArguments): Promise<any> {
 
   const inspectionResult: any = inspector.run();
 
-  let reporterArgs: ReporterArguments = {
+  let reporterArgs: ReporterOptions = {
     outputPath: args.output,
     json: args.json,
     yaml: args.yaml,
@@ -197,20 +197,25 @@ async function runCollector(args: CollectorCommandArguments): Promise<any> {
 
   const reporter = new Reporter(reporterArgs);
 
-  reporter.saveJson(
+  reporter.saveJsonToFile(
     inspectionResult.websocketLog,
     "websockets-log.json",
     false,
   );
-  reporter.saveJson(inspectionResult, "inspection.json");
-  reporter.saveYaml(inspectionResult.cookies, "cookies.yml", false);
-  reporter.saveYaml(inspectionResult.localStorage, "local-storage.yml", false);
-  reporter.saveYaml(inspectionResult.beacons, "beacons.yml", false);
-  reporter.saveYaml(inspectionResult, "inspection.yml");
-  reporter.generateHtml(inspectionResult);
-  await reporter.generateOfficeDoc(inspectionResult);
-  await reporter.convertHtmlToPdf();
-  reporter.saveSource(collectionResult.source);
+  reporter.saveJsonToFile(inspectionResult, "inspection.json");
+  reporter.saveYamlToFile(inspectionResult.cookies, "cookies.yml", false);
+  reporter.saveYamlToFile(
+    inspectionResult.localStorage,
+    "local-storage.yml",
+    false,
+  );
+  reporter.saveYamlToFile(inspectionResult.beacons, "beacons.yml", false);
+  reporter.saveYamlToFile(inspectionResult, "inspection.yml");
+  const htmlReport = reporter.generateHtmlReport(inspectionResult);
+  await reporter.saveAsOfficeDoc(inspectionResult);
+  const pdfReport = await reporter.convertHtmlToPdfInMemory(htmlReport);
+  reporter.saveFile("inspection.pdf", pdfReport);
+  reporter.saveFile("source.html", collectionResult.source);
 
   return inspectionResult;
 }
diff --git a/src/commands/reporterCommand.ts b/src/commands/reporterCommand.ts
index 3bf46d6..74203fb 100644
--- a/src/commands/reporterCommand.ts
+++ b/src/commands/reporterCommand.ts
@@ -10,20 +10,8 @@
 import yaml from "js-yaml";
 import fs from "fs";
 import path from "path";
-import { marked } from "marked";
-import { markedSmartypants } from "marked-smartypants";
-import pug from "pug";
-import groupBy from "lodash/groupBy.js";
-import { spawnSync } from "node:child_process";
-import puppeteer from "puppeteer";
-import { fileURLToPath } from "url";
-import { createRequire } from "module";
 import { all as unsafe } from "js-yaml-js-types-esm";
-import HTMLtoDOCX from "html-to-docx";
-
-const __filename = fileURLToPath(import.meta.url);
-const __dirname = path.dirname(__filename);
-const require = createRequire(import.meta.url);
+import { Reporter } from "../reporter/reporter.js";
 
 yaml.DEFAULT_SCHEMA = yaml.DEFAULT_SCHEMA.extend(unsafe);
 
@@ -87,173 +75,79 @@ export default {
 };
 
 async function runReporter(args: ParsedArgsReporter) {
-  let output = JSON.parse(fs.readFileSync(args.inspectionJsonPath, "utf8"));
-
-  let html_template =
-    args.htmlTemplate || path.join(__dirname, "../assets/template.pug");
-  let office_template =
-    args.officeTemplate ||
-    path.join(__dirname, "../assets/template-office.pug");
+  const collectionData = JSON.parse(
+    fs.readFileSync(args.inspectionJsonPath, "utf8"),
+  );
 
-  // it is surprising that https://github.com/jstransformers/jstransformer-marked picks up this object (undocumented API)
-  // source of this call: https://github.com/markedjs/marked-custom-heading-id/blob/main/src/index.js (MIT License, Copyright (c) 2021 @markedjs)
-  marked.use({
-    renderer: {
-      heading(text, level, _) {
-        // WEC patch: add \:
-        const headingIdRegex = /(?: +|^)\{#([a-z][\:\w-]*)\}(?: +|$)/i;
-        const hasId = text.match(headingIdRegex);
-        if (!hasId) {
-          // fallback to original heading renderer
-          return false;
-        }
-        return `<h${level} id="${hasId[1]}">${text.replace(headingIdRegex, "")}</h${level}>\n`;
-      },
-    },
+  const reporter = new Reporter({
+    htmlTemplate: args.htmlTemplate,
+    officeTemplate: args.officeTemplate,
+    usePandoc: args.usePandoc || false,
+    extraFiles: args.extraFiles,
   });
-  marked.use(markedSmartypants());
-
-  const make_office =
-    args.outputFile &&
-    (args.outputFile.endsWith(".docx") || args.outputFile.endsWith(".odt"));
-  const make_pdf = args.outputFile && args.outputFile.endsWith(".pdf");
 
-  let html_dump = pug.renderFile(
-    make_office ? office_template : html_template,
-    Object.assign({}, output, {
-      pretty: true,
-      basedir: path.resolve(path.join(__dirname, "../assets")), // determines root director for pug
-      // expose some libraries to pug templates
-      groupBy: groupBy,
-      marked: marked, // we need to pass the markdown engine to template for access at render-time (as opposed to comile time), see https://github.com/pugjs/pug/issues/1171
-      fs: fs,
-      yaml: yaml,
-      path: path,
-      inlineCSS: fs.readFileSync(
-        require.resolve("github-markdown-css/github-markdown.css"),
-        "utf8",
-      ),
-      inspection: output,
-      extra: args.extraFiles,
-      filterOptions: { marked: {} },
-    }),
-  );
+  const htmlReport = reporter.generateHtmlReport(collectionData);
 
+  // Print to console when no output file is specified
   if (!args.outputFile) {
-    console.log(html_dump);
-    return;
-  }
-
-  if (make_office) {
-    await generateOfficeFile(
-      args.usePandoc,
-      html_dump,
-      output,
-      args.outputFile,
-    );
-    return;
-  }
-
-  if (make_pdf) {
-    await generatePdf(args.outputFile, html_dump);
+    console.log(htmlReport);
     return;
   }
 
-  fs.writeFileSync(path.join(args.outputFile), html_dump);
-}
+  const fileExtension = path.extname(args.outputFile).toLowerCase();
+  const outputDir = path.dirname(args.outputFile);
 
-async function generateOfficeFile(
-  usePandoc: boolean,
-  html_dump: string,
-  output: any,
-  outputFile?: string,
-) {
-  if (usePandoc) {
-    // console.warn("Using pandoc to generate", argv.outputFile);
-    // pandoc infers the output format from the output file name
-    let ret = spawnSync(
-      "pandoc",
-      ["-f", "html", "--number-sections", "--toc", "--output", outputFile],
-      {
-        // cwd: '.',
-        input: html_dump,
-        encoding: "utf8",
-      },
-    );
-    if (ret[2]) {
-      console.log(ret[2]);
-    }
-    return;
-  }
-  if (outputFile.endsWith(".odt")) {
-    console.error(
-      "To generate .odt, you must have pandoc installed and specify --use-pandoc.",
-    );
-    process.exit(1);
+  // Create the output directory if it doesn't exist
+  if (!fs.existsSync(outputDir)) {
+    fs.mkdirSync(outputDir, { recursive: true });
   }
 
-  // console.warn("Using NPM html-to-docx to generate", argv.outputFile);
-  const documentOptions = {
-    // decodeUnicode: true,
-    orientation: "portrait",
-    pageSize: { width: "21.0cm", height: "29.7cm" },
-    pageNumber: true,
-    // lineNumber: true,
-    // lineNumberOptions: {countBy: 5},
-    title: output.title,
-    lang: "en-UK",
-    creator: `EDPS Website Evidence Collector v${output.script.version.npm} using NPM html-to-docx`,
-  };
-
-  try {
-    let docx = await HTMLtoDOCX(html_dump, null, documentOptions, null);
-    fs.writeFileSync(path.join(outputFile), docx);
-  } catch (e) {
-    console.error(e);
+  switch (fileExtension) {
+    case ".pdf":
+      const htmlContent = reporter.generateHtmlReport(
+        collectionData,
+        undefined,
+        false,
+      );
+      const pdfBuffer = await reporter.convertHtmlToPdfInMemory(htmlContent);
+      reporter.saveFile(args.outputFile, pdfBuffer);
+      break;
+    case ".docx":
+    case ".odt":
+      await reporter.saveAsOfficeDoc(collectionData, args.outputFile);
+      break;
+    case ".html":
+      const html = reporter.generateHtmlReport(
+        collectionData,
+        undefined,
+        false,
+      );
+      reporter.saveFile(args.outputFile, html);
+      break;
+    default:
+      console.log(`File extension ${fileExtension} is not supported.`);
+      break;
   }
 }
 
-async function generatePdf(outputFile: string, html_dump: string) {
-  const browser = await puppeteer.launch({});
-  const pages = await browser.pages();
-  await pages[0].setContent(html_dump);
-  await pages[0].pdf({
-    path: path.resolve(path.join(outputFile)),
-    format: "A4",
-    printBackground: true,
-    displayHeaderFooter: true,
-    headerTemplate: `
-            <div class="page-footer" style="width: 100%; font-size: 11px; padding: 5px 5px 0; position: relative;">
-                <div style="bottom: 5px; text-align: center;"><span class="title"></span></div>
-            </div>
-          `,
-    footerTemplate: `
-            <div class="page-header" style="width: 100%; font-size: 11px; padding: 5px 5px 0; position: relative;">
-                <div style="top: 5px; text-align: center;"><span class="pageNumber"></span>/<span class="totalPages"></span></div>
-            </div>
-          `,
-    // this is needed to prevent content from being placed over the footer
-    margin: { top: "1.5cm", bottom: "1cm" },
-  });
-  await browser.close();
-}
 function transformArgsToObject(parsingResult: any): ParsedArgsReporter {
   return {
     _: parsingResult._ as string[],
     inspectionJsonPath: parsingResult._[1] as string,
-    outputFile: parsingResult["outputFile"] as string,
-    htmlTemplate: parsingResult["htmlTemplate"] as string | undefined,
-    officeTemplate: parsingResult["officeTemplate"] as string | undefined,
-    extraFiles: parsingResult["extraFile"] as string[] | undefined,
-    usePandoc: parsingResult["usePandoc"] as boolean | undefined,
+    outputFile: parsingResult["output-file"] as string,
+    htmlTemplate: parsingResult["html-template"] as string | undefined,
+    officeTemplate: parsingResult["office-template"] as string | undefined,
+    extraFiles: parsingResult["extra-file"] as any[] | undefined,
+    usePandoc: parsingResult["use-pandoc"] as boolean | undefined,
   };
 }
+
 interface ParsedArgsReporter {
   _: (string | number)[];
   inspectionJsonPath: string;
   outputFile?: string;
   htmlTemplate?: string;
   officeTemplate?: string;
-  extraFiles?: string[];
+  extraFiles?: any[];
   usePandoc?: boolean;
 }
diff --git a/src/reporter/reporter.ts b/src/reporter/reporter.ts
index e0eed4f..92ba978 100644
--- a/src/reporter/reporter.ts
+++ b/src/reporter/reporter.ts
@@ -1,4 +1,3 @@
-// jshint esversion: 8
 import fs from "fs";
 import path from "path";
 import pug from "pug";
@@ -12,6 +11,7 @@ import { markedSmartypants } from "marked-smartypants";
 
 import { fileURLToPath } from "url";
 import { createRequire } from "module";
+import { Logger } from "winston";
 
 const __filename = fileURLToPath(import.meta.url);
 const __dirname = path.dirname(__filename);
@@ -35,202 +35,203 @@ marked.use({
 });
 marked.use(markedSmartypants());
 
-export interface ReporterArguments {
+export interface ReporterOptions {
   outputPath?: string;
-  json: boolean;
-  yaml: boolean;
-  html: boolean;
-  pdf: boolean;
-  usePandoc: boolean;
-  "html-template"?: string;
+  json?: boolean;
+  yaml?: boolean;
+  html?: boolean;
+  pdf?: boolean;
+  usePandoc?: boolean;
+  htmlTemplate?: string;
+  officeTemplate?: string;
+  extraFiles?: any[];
 }
 
 export class Reporter {
-  constructor(private args: ReporterArguments) {}
+  private options: ReporterOptions;
+  private logger: Logger;
 
-  saveJson(data, filename, log = true) {
-    const json_dump = JSON.stringify(data, null, 2);
-
-    if (this.args.outputPath) {
-      fs.writeFileSync(path.join(this.args.outputPath, filename), json_dump);
-    }
-
-    if (log && this.args.json) {
-      console.log(json_dump);
-    }
-  }
-
-  saveYaml(data, filename, log = true) {
-    const yaml_dump = yaml.dump(data, {
-      noRefs: true,
-      replacer: function replacer(_, value) {
-        return value instanceof URL ? value.toString() : value;
-      },
-    });
-
-    if (this.args.outputPath) {
-      fs.writeFileSync(path.join(this.args.outputPath, filename), yaml_dump);
-    }
-
-    if (log && this.args.yaml) {
-      console.log(yaml_dump);
-    }
+  constructor(options: ReporterOptions = {}, logger: Logger) {
+    this.options = options;
+    this.logger = logger;
   }
 
-  readYaml(filename) {
-    return yaml.load(
-      fs.readFileSync(path.join(this.args.outputPath, filename), "utf8"),
-    );
-  }
-
-  generateHtml(
-    data,
-    filename = "inspection.html",
+  /*
+   * Takes the output of the inspector and generates a HTML report.
+   */
+  generateHtmlReport(
+    inspectionData: object,
+    outputFilename = "inspection.html",
     log = true,
-    template = "../assets/template.pug",
-    extraData?,
+    customTemplate?: string,
   ) {
-    const html_template =
-      this.args["html-template"] || path.join(__dirname, template);
-
-    const html_dump = pug.renderFile(
-      html_template,
-      Object.assign({}, data, {
-        pretty: true,
-        basedir: path.join(__dirname, "../assets"),
-        groupBy: groupBy,
-        marked: marked, // we need to pass the markdown engine to template for access at render-time (as opposed to comile time), see https://github.com/pugjs/pug/issues/1171
-        fs: fs,
-        yaml: yaml,
-        path: path,
-        inlineCSS: fs.readFileSync(
-          require.resolve("github-markdown-css/github-markdown.css"),
-        ),
-        filterOptions: { marked: {} },
-        extra: extraData,
-      }),
-    );
+    const templatePath =
+      this.options.htmlTemplate ||
+      customTemplate ||
+      path.join(__dirname, "../assets/template.pug");
 
-    if (this.args.outputPath) {
-      fs.writeFileSync(path.join(this.args.outputPath, filename), html_dump);
-    }
+    const templateData = fs.readFileSync(templatePath, "utf8");
+
+    const htmlReport = this.renderHtmlReport(inspectionData, templateData);
 
-    if (log && this.args.html) {
-      console.log(html_dump);
+    this.saveFile(outputFilename, htmlReport);
+
+    if (log && this.options.html) {
+      this.logger.info(htmlReport);
     }
 
-    return html_dump;
+    return htmlReport;
   }
 
-  async convertHtmlToPdf(
-    htmlfilename = "inspection.html",
-    pdffilename = "inspection.pdf",
-  ) {
-    if (this.args.pdf && this.args.outputPath) {
-      let content = fs.readFileSync(
-        path.resolve(path.join(this.args.outputPath, htmlfilename)),
+  renderHtmlReport(inspectionData: object, template: string) {
+    const pugInputData = Object.assign({}, inspectionData, {
+      pretty: true,
+      basedir: path.join(__dirname, "../assets"),
+      groupBy: groupBy,
+      marked: marked, // we need to pass the markdown engine to template for access at render-time (as opposed to comile time), see https://github.com/pugjs/pug/issues/1171
+      fs: fs,
+      yaml: yaml,
+      path: path,
+      inlineCSS: fs.readFileSync(
+        require.resolve("github-markdown-css/github-markdown.css"),
         "utf8",
-      );
-      let pdfBuffer = await this.convertHtmlToPdfInMemory(content);
-      fs.writeFileSync(
-        path.resolve(path.join(this.args.outputPath, pdffilename)),
-        pdfBuffer,
-      );
-    }
+      ),
+      inspection: inspectionData,
+      extra: this.options.extraFiles,
+      filterOptions: { marked: {} },
+    });
+
+    return pug.render(template, pugInputData);
   }
 
   async convertHtmlToPdfInMemory(htmlContent: string): Promise<Uint8Array> {
     const browser = await puppeteer.launch({});
-    const page = await browser.newPage();
-    await page.setContent(htmlContent, { waitUntil: "networkidle0" });
-    let pdfBuffer = await page.pdf({
+    const pages = await browser.pages();
+    await pages[0].setContent(htmlContent);
+    const pdfBuffer = await pages[0].pdf({
       format: "A4",
       printBackground: true,
       displayHeaderFooter: true,
       headerTemplate: `
-                      <div style="width: 100%; font-size: 11px; padding: 5px 5px 0; position: relative;">
-                          <div style="bottom: 5px; text-align: center;"><span class="title"></span></div>
-                      </div>`,
+            <div class="page-footer" style="width: 100%; font-size: 11px; padding: 5px 5px 0; position: relative;">
+                <div style="bottom: 5px; text-align: center;"><span class="title"></span></div>
+            </div>
+          `,
       footerTemplate: `
-                      <div style="width: 100%; font-size: 11px; padding: 5px 5px 0; position: relative;">
-                          <div style="top: 5px; text-align: center;"><span class="pageNumber"></span>/<span class="totalPages"></span></div>
-                      </div>`,
+            <div class="page-header" style="width: 100%; font-size: 11px; padding: 5px 5px 0; position: relative;">
+                <div style="top: 5px; text-align: center;"><span class="pageNumber"></span>/<span class="totalPages"></span></div>
+            </div>
+          `,
+      // this is needed to prevent content from being placed over the footer
       margin: { top: "1.5cm", bottom: "1cm" },
     });
     await browser.close();
     return pdfBuffer;
   }
 
-  async generateOfficeDoc(
-    data,
-    filename = "inspection.docx",
-    log = true,
-    template = "../assets/template-office.pug",
+  /*
+   * Generates either a .docx or .odt.
+   * For .docx a conversion with pandoc or with js is available
+   * For .odt only pandoc is available.
+   */
+  async saveAsOfficeDoc(
+    inspectionData: object,
+    outputFilePath = "inspection.docx",
+    customTemplate?: string,
   ) {
-    if (this.args.outputPath) {
-      const office_template =
-        this.args["office-template"] || path.join(__dirname, template);
-      const html_dump = pug.renderFile(
-        office_template,
-        Object.assign({}, data, {
-          pretty: true,
-          basedir: path.join(__dirname, "../assets"),
-          jsondir: ".", // images in the folder of the inspection.json
-          groupBy: groupBy,
-          marked: marked, // we need to pass the markdown engine to template for access at render-time (as opposed to comile time), see https://github.com/pugjs/pug/issues/1171
-          fs: fs,
-          yaml: yaml,
-          path: path,
-          inlineCSS: fs.readFileSync(
-            require.resolve("github-markdown-css/github-markdown.css"),
-          ),
-          filterOptions: { marked: {} },
-        }),
+    const office_template =
+      this.options.officeTemplate ||
+      customTemplate ||
+      path.join(__dirname, "../assets/template-office.pug");
+
+    const templateData = fs.readFileSync(office_template, "utf-8");
+
+    const htmlReport = this.renderHtmlReport(inspectionData, templateData);
+
+    if (this.options.usePandoc) {
+      await this.htmlToOfficeWithPandoc(htmlReport, outputFilePath);
+      return;
+    }
+    if (outputFilePath.endsWith(".odt")) {
+      this.logger.error(
+        "To generate .odt, you must have pandoc installed and specify --use-pandoc.",
       );
+      process.exit(1);
+    }
 
-      if (this.args.usePandoc) {
-        const ret = spawnSync(
-          "pandoc",
-          ["-f", "html", "--number-sections", "--toc", "--output", filename],
-          {
-            cwd: this.args.outputPath,
-            input: html_dump,
-            encoding: "utf8",
-          },
-        );
-        if (ret[2]) {
-          console.log(ret[2]);
-        }
-      } else {
-        if (filename.endsWith(".odt")) {
-          console.error(
-            "To generate .odt, you must have pandoc installed and specify --use-pandoc.",
-          );
-          process.exit(1);
-        }
-
-        const documentOptions = {
-          orientation: "portrait",
-          pageSize: { width: "21.0cm", height: "29.7cm" },
-          pageNumber: true,
-          title: data.title,
-          lang: "en-UK",
-          creator: `EDPS Website Evidence Collector v${data.script.version.npm} using NPM html-to-docx`,
-        };
-        const fileBuffer = await HTMLtoDOCX(
-          html_dump,
-          null,
-          documentOptions,
-          null,
-        );
-        fs.writeFileSync(path.join(this.args.outputPath, filename), fileBuffer);
-      }
+    await this.htmlToDocxJavascript(inspectionData, htmlReport, outputFilePath);
+  }
+
+  private async htmlToOfficeWithPandoc(html_dump: string, outputFile: string) {
+    const ret = spawnSync(
+      "pandoc",
+      ["-f", "html", "--number-sections", "--toc", "--output", outputFile],
+      {
+        input: html_dump,
+        encoding: "utf8",
+      },
+    );
+    if (ret[2]) {
+      this.logger.info(ret[2]);
     }
   }
 
-  saveSource(source, filename = "source.html") {
-    if (this.args.outputPath) {
-      fs.writeFileSync(path.join(this.args.outputPath, filename), source);
+  private async htmlToDocxJavascript(
+    inspectionData: any,
+    htmlReport: string,
+    outputFileName: string,
+  ) {
+    const documentOptions = {
+      orientation: "portrait",
+      pageSize: { width: "21.0cm", height: "29.7cm" },
+      pageNumber: true,
+      title: inspectionData.title,
+      lang: "en-UK",
+      creator: `EDPS Website Evidence Collector v${inspectionData.script.version.npm} using NPM html-to-docx`,
+    };
+
+    try {
+      let docx = await HTMLtoDOCX(htmlReport, null, documentOptions, null);
+      this.saveFile(outputFileName, docx);
+    } catch (e) {
+      this.logger.error(e);
+    }
+  }
+
+  saveJsonToFile(data: any, filename: string, log = true) {
+    const json_dump = JSON.stringify(data, null, 2);
+
+    this.saveFile(filename, json_dump);
+
+    if (log && this.options.json) {
+      this.logger.info(json_dump);
+    }
+  }
+
+  saveYamlToFile(data: any, filename: string, log = true) {
+    const yaml_dump = yaml.dump(data, {
+      noRefs: true,
+      replacer: function replacer(_, value) {
+        return value instanceof URL ? value.toString() : value;
+      },
+    });
+
+    this.saveFile(filename, yaml_dump);
+
+    if (log && this.options.yaml) {
+      this.logger.info(yaml_dump);
+    }
+  }
+
+  loadYamlFile(filename: string) {
+    return yaml.load(
+      fs.readFileSync(path.join(this.options.outputPath, filename), "utf8"),
+    );
+  }
+
+  saveFile(filename: string, data: any) {
+    if (this.options.outputPath) {
+      fs.writeFileSync(path.join(this.options.outputPath, filename), data);
     }
   }
 }
diff --git a/src/server/runCollection.ts b/src/server/runCollection.ts
index fda49bd..2ee1297 100644
--- a/src/server/runCollection.ts
+++ b/src/server/runCollection.ts
@@ -1,4 +1,4 @@
-import { Reporter, ReporterArguments } from "../reporter/reporter.js";
+import { Reporter, ReporterOptions } from "../reporter/reporter.js";
 import { Collector } from "../collector/index.js";
 import Inspector from "../inspector/inspector.js";
 import { Cookie } from "./server.js";
@@ -38,8 +38,8 @@ export async function runCollection(
   return inspector.run();
 }
 
-export async function generateHtmlAndPdf(inspectionOutput, extraOuptut?) {
-  let reporterArgs: ReporterArguments = {
+export async function generateHtmlAndPdf(inspectionOutput: object) {
+  let reporterArgs: ReporterOptions = {
     html: true,
     pdf: true,
     json: false,
@@ -49,12 +49,10 @@ export async function generateHtmlAndPdf(inspectionOutput, extraOuptut?) {
   };
 
   const reporter = new Reporter(reporterArgs);
-  let html = reporter.generateHtml(
+  let html = reporter.generateHtmlReport(
     inspectionOutput,
     "inspection.html",
     false,
-    extraOuptut ? "path/to/alternative/template" : undefined,
-    extraOuptut,
   );
   let pdfBuffer = await reporter.convertHtmlToPdfInMemory(html);
   return {
-- 
GitLab


From db34a82332bfc2ba824e9061c16f14bbfed90e02 Mon Sep 17 00:00:00 2001
From: Felix Woestmann <felix.wostmann@edps.europa.eu>
Date: Fri, 21 Mar 2025 16:46:06 +0100
Subject: [PATCH 4/9] fix: properly call reporter with logger

---
 src/commands/collectorCommand.ts |  2 +-
 src/commands/reporterCommand.ts  | 22 ++++++++++++++--------
 src/server/runCollection.ts      |  7 +++++--
 src/server/server.ts             |  5 ++++-
 4 files changed, 24 insertions(+), 12 deletions(-)

diff --git a/src/commands/collectorCommand.ts b/src/commands/collectorCommand.ts
index 1769bc5..cfc98e3 100644
--- a/src/commands/collectorCommand.ts
+++ b/src/commands/collectorCommand.ts
@@ -195,7 +195,7 @@ async function runCollector(args: CollectorCommandArguments): Promise<any> {
     usePandoc: args.usePandoc,
   };
 
-  const reporter = new Reporter(reporterArgs);
+  const reporter = new Reporter(reporterArgs, logger);
 
   reporter.saveJsonToFile(
     inspectionResult.websocketLog,
diff --git a/src/commands/reporterCommand.ts b/src/commands/reporterCommand.ts
index 74203fb..dedde67 100644
--- a/src/commands/reporterCommand.ts
+++ b/src/commands/reporterCommand.ts
@@ -12,6 +12,7 @@ import fs from "fs";
 import path from "path";
 import { all as unsafe } from "js-yaml-js-types-esm";
 import { Reporter } from "../reporter/reporter.js";
+import { create } from "../lib/logger.js";
 
 yaml.DEFAULT_SCHEMA = yaml.DEFAULT_SCHEMA.extend(unsafe);
 
@@ -75,22 +76,27 @@ export default {
 };
 
 async function runReporter(args: ParsedArgsReporter) {
+  const logger = create({});
+
   const collectionData = JSON.parse(
     fs.readFileSync(args.inspectionJsonPath, "utf8"),
   );
 
-  const reporter = new Reporter({
-    htmlTemplate: args.htmlTemplate,
-    officeTemplate: args.officeTemplate,
-    usePandoc: args.usePandoc || false,
-    extraFiles: args.extraFiles,
-  });
+  const reporter = new Reporter(
+    {
+      htmlTemplate: args.htmlTemplate,
+      officeTemplate: args.officeTemplate,
+      usePandoc: args.usePandoc || false,
+      extraFiles: args.extraFiles,
+    },
+    logger,
+  );
 
   const htmlReport = reporter.generateHtmlReport(collectionData);
 
   // Print to console when no output file is specified
   if (!args.outputFile) {
-    console.log(htmlReport);
+    logger.info(htmlReport);
     return;
   }
 
@@ -125,7 +131,7 @@ async function runReporter(args: ParsedArgsReporter) {
       reporter.saveFile(args.outputFile, html);
       break;
     default:
-      console.log(`File extension ${fileExtension} is not supported.`);
+      logger.warn(`File extension ${fileExtension} is not supported.`);
       break;
   }
 }
diff --git a/src/server/runCollection.ts b/src/server/runCollection.ts
index 2ee1297..7e5a02b 100644
--- a/src/server/runCollection.ts
+++ b/src/server/runCollection.ts
@@ -38,7 +38,10 @@ export async function runCollection(
   return inspector.run();
 }
 
-export async function generateHtmlAndPdf(inspectionOutput: object) {
+export async function generateHtmlAndPdf(
+  inspectionOutput: object,
+  logger: Logger,
+) {
   let reporterArgs: ReporterOptions = {
     html: true,
     pdf: true,
@@ -48,7 +51,7 @@ export async function generateHtmlAndPdf(inspectionOutput: object) {
     yaml: false,
   };
 
-  const reporter = new Reporter(reporterArgs);
+  const reporter = new Reporter(reporterArgs, logger);
   let html = reporter.generateHtmlReport(
     inspectionOutput,
     "inspection.html",
diff --git a/src/server/server.ts b/src/server/server.ts
index 5dc11a2..6464404 100644
--- a/src/server/server.ts
+++ b/src/server/server.ts
@@ -121,7 +121,10 @@ function configureRoutes(browser_options: any[]): Router {
           requestLogger,
         );
 
-        let htmlAndPdf = await generateHtmlAndPdf(collectionOutput);
+        let htmlAndPdf = await generateHtmlAndPdf(
+          collectionOutput,
+          requestLogger,
+        );
         res.send(htmlAndPdf);
         requestLogger.info("Finished serving request");
       } catch (e: any) {
-- 
GitLab


From 8c0ebda3d423dedafebcd1ad00189b73d2af574b Mon Sep 17 00:00:00 2001
From: Felix Woestmann <felix.wostmann@edps.europa.eu>
Date: Fri, 21 Mar 2025 17:01:02 +0100
Subject: [PATCH 5/9] refactor: Use paths again for rendering pug files, as
 using the template as a loaded string requires specifying the filename anyway

---
 src/reporter/reporter.ts | 19 +++++++++----------
 1 file changed, 9 insertions(+), 10 deletions(-)

diff --git a/src/reporter/reporter.ts b/src/reporter/reporter.ts
index 92ba978..85c30d7 100644
--- a/src/reporter/reporter.ts
+++ b/src/reporter/reporter.ts
@@ -70,9 +70,7 @@ export class Reporter {
       customTemplate ||
       path.join(__dirname, "../assets/template.pug");
 
-    const templateData = fs.readFileSync(templatePath, "utf8");
-
-    const htmlReport = this.renderHtmlReport(inspectionData, templateData);
+    const htmlReport = this.renderHtmlReport(inspectionData, templatePath);
 
     this.saveFile(outputFilename, htmlReport);
 
@@ -83,10 +81,10 @@ export class Reporter {
     return htmlReport;
   }
 
-  renderHtmlReport(inspectionData: object, template: string) {
+  renderHtmlReport(inspectionData: object, templatePath: string) {
     const pugInputData = Object.assign({}, inspectionData, {
       pretty: true,
-      basedir: path.join(__dirname, "../assets"),
+      basedir: path.resolve(path.join(__dirname, "../assets")),
       groupBy: groupBy,
       marked: marked, // we need to pass the markdown engine to template for access at render-time (as opposed to comile time), see https://github.com/pugjs/pug/issues/1171
       fs: fs,
@@ -101,7 +99,7 @@ export class Reporter {
       filterOptions: { marked: {} },
     });
 
-    return pug.render(template, pugInputData);
+    return pug.renderFile(templatePath, pugInputData);
   }
 
   async convertHtmlToPdfInMemory(htmlContent: string): Promise<Uint8Array> {
@@ -139,14 +137,15 @@ export class Reporter {
     outputFilePath = "inspection.docx",
     customTemplate?: string,
   ) {
-    const office_template =
+    const officeTemplatePath =
       this.options.officeTemplate ||
       customTemplate ||
       path.join(__dirname, "../assets/template-office.pug");
 
-    const templateData = fs.readFileSync(office_template, "utf-8");
-
-    const htmlReport = this.renderHtmlReport(inspectionData, templateData);
+    const htmlReport = this.renderHtmlReport(
+      inspectionData,
+      officeTemplatePath,
+    );
 
     if (this.options.usePandoc) {
       await this.htmlToOfficeWithPandoc(htmlReport, outputFilePath);
-- 
GitLab


From 46165cd0fa79c5d42ee68b3fc1a219be08868a9c Mon Sep 17 00:00:00 2001
From: Felix Woestmann <felix.wostmann@edps.europa.eu>
Date: Fri, 21 Mar 2025 17:45:28 +0100
Subject: [PATCH 6/9] refactor: Have only ReporterOptions which decide what is
 printed to console during generation.

---
 src/commands/collectorCommand.ts | 23 ++++++++---------------
 src/commands/reporterCommand.ts  | 18 +++++++-----------
 src/reporter/reporter.ts         | 25 ++++++++++++-------------
 src/server/runCollection.ts      | 13 +++++--------
 4 files changed, 32 insertions(+), 47 deletions(-)

diff --git a/src/commands/collectorCommand.ts b/src/commands/collectorCommand.ts
index cfc98e3..5fe194c 100644
--- a/src/commands/collectorCommand.ts
+++ b/src/commands/collectorCommand.ts
@@ -188,28 +188,21 @@ async function runCollector(args: CollectorCommandArguments): Promise<any> {
 
   let reporterArgs: ReporterOptions = {
     outputPath: args.output,
-    json: args.json,
-    yaml: args.yaml,
-    html: args.html,
+    printJsonToConsole: args.json,
+    printYamlToConsole: args.yaml,
+    printHtmlToConsole: args.html,
     pdf: args.pdf,
     usePandoc: args.usePandoc,
+    extraFiles: [],
   };
 
   const reporter = new Reporter(reporterArgs, logger);
 
-  reporter.saveJsonToFile(
-    inspectionResult.websocketLog,
-    "websockets-log.json",
-    false,
-  );
+  reporter.saveJsonToFile(inspectionResult.websocketLog, "websockets-log.json");
   reporter.saveJsonToFile(inspectionResult, "inspection.json");
-  reporter.saveYamlToFile(inspectionResult.cookies, "cookies.yml", false);
-  reporter.saveYamlToFile(
-    inspectionResult.localStorage,
-    "local-storage.yml",
-    false,
-  );
-  reporter.saveYamlToFile(inspectionResult.beacons, "beacons.yml", false);
+  reporter.saveYamlToFile(inspectionResult.cookies, "cookies.yml");
+  reporter.saveYamlToFile(inspectionResult.localStorage, "local-storage.yml");
+  reporter.saveYamlToFile(inspectionResult.beacons, "beacons.yml");
   reporter.saveYamlToFile(inspectionResult, "inspection.yml");
   const htmlReport = reporter.generateHtmlReport(inspectionResult);
   await reporter.saveAsOfficeDoc(inspectionResult);
diff --git a/src/commands/reporterCommand.ts b/src/commands/reporterCommand.ts
index dedde67..4a81558 100644
--- a/src/commands/reporterCommand.ts
+++ b/src/commands/reporterCommand.ts
@@ -88,6 +88,11 @@ async function runReporter(args: ParsedArgsReporter) {
       officeTemplate: args.officeTemplate,
       usePandoc: args.usePandoc || false,
       extraFiles: args.extraFiles,
+      printHtmlToConsole: !args.outputFile,
+      printJsonToConsole: false,
+      pdf: false,
+      printYamlToConsole: false,
+      outputPath: ".",
     },
     logger,
   );
@@ -110,11 +115,7 @@ async function runReporter(args: ParsedArgsReporter) {
 
   switch (fileExtension) {
     case ".pdf":
-      const htmlContent = reporter.generateHtmlReport(
-        collectionData,
-        undefined,
-        false,
-      );
+      const htmlContent = reporter.generateHtmlReport(collectionData);
       const pdfBuffer = await reporter.convertHtmlToPdfInMemory(htmlContent);
       reporter.saveFile(args.outputFile, pdfBuffer);
       break;
@@ -123,12 +124,7 @@ async function runReporter(args: ParsedArgsReporter) {
       await reporter.saveAsOfficeDoc(collectionData, args.outputFile);
       break;
     case ".html":
-      const html = reporter.generateHtmlReport(
-        collectionData,
-        undefined,
-        false,
-      );
-      reporter.saveFile(args.outputFile, html);
+      reporter.generateHtmlReport(collectionData, args.outputFile);
       break;
     default:
       logger.warn(`File extension ${fileExtension} is not supported.`);
diff --git a/src/reporter/reporter.ts b/src/reporter/reporter.ts
index 85c30d7..fcb452d 100644
--- a/src/reporter/reporter.ts
+++ b/src/reporter/reporter.ts
@@ -37,21 +37,21 @@ marked.use(markedSmartypants());
 
 export interface ReporterOptions {
   outputPath?: string;
-  json?: boolean;
-  yaml?: boolean;
-  html?: boolean;
-  pdf?: boolean;
-  usePandoc?: boolean;
+  printJsonToConsole: boolean;
+  printYamlToConsole: boolean;
+  printHtmlToConsole: boolean;
+  pdf: boolean;
+  usePandoc: boolean;
   htmlTemplate?: string;
   officeTemplate?: string;
-  extraFiles?: any[];
+  extraFiles: string[];
 }
 
 export class Reporter {
   private options: ReporterOptions;
   private logger: Logger;
 
-  constructor(options: ReporterOptions = {}, logger: Logger) {
+  constructor(options: ReporterOptions, logger: Logger) {
     this.options = options;
     this.logger = logger;
   }
@@ -62,7 +62,6 @@ export class Reporter {
   generateHtmlReport(
     inspectionData: object,
     outputFilename = "inspection.html",
-    log = true,
     customTemplate?: string,
   ) {
     const templatePath =
@@ -74,7 +73,7 @@ export class Reporter {
 
     this.saveFile(outputFilename, htmlReport);
 
-    if (log && this.options.html) {
+    if (this.options.printHtmlToConsole) {
       this.logger.info(htmlReport);
     }
 
@@ -197,17 +196,17 @@ export class Reporter {
     }
   }
 
-  saveJsonToFile(data: any, filename: string, log = true) {
+  saveJsonToFile(data: any, filename: string) {
     const json_dump = JSON.stringify(data, null, 2);
 
     this.saveFile(filename, json_dump);
 
-    if (log && this.options.json) {
+    if (this.options.printJsonToConsole) {
       this.logger.info(json_dump);
     }
   }
 
-  saveYamlToFile(data: any, filename: string, log = true) {
+  saveYamlToFile(data: any, filename: string) {
     const yaml_dump = yaml.dump(data, {
       noRefs: true,
       replacer: function replacer(_, value) {
@@ -217,7 +216,7 @@ export class Reporter {
 
     this.saveFile(filename, yaml_dump);
 
-    if (log && this.options.yaml) {
+    if (this.options.printYamlToConsole) {
       this.logger.info(yaml_dump);
     }
   }
diff --git a/src/server/runCollection.ts b/src/server/runCollection.ts
index 7e5a02b..406b359 100644
--- a/src/server/runCollection.ts
+++ b/src/server/runCollection.ts
@@ -43,20 +43,17 @@ export async function generateHtmlAndPdf(
   logger: Logger,
 ) {
   let reporterArgs: ReporterOptions = {
-    html: true,
+    printHtmlToConsole: false,
     pdf: true,
-    json: false,
+    printJsonToConsole: false,
     outputPath: undefined,
     usePandoc: false,
-    yaml: false,
+    printYamlToConsole: false,
+    extraFiles: [],
   };
 
   const reporter = new Reporter(reporterArgs, logger);
-  let html = reporter.generateHtmlReport(
-    inspectionOutput,
-    "inspection.html",
-    false,
-  );
+  let html = reporter.generateHtmlReport(inspectionOutput, "inspection.html");
   let pdfBuffer = await reporter.convertHtmlToPdfInMemory(html);
   return {
     html: html,
-- 
GitLab


From 9284452dc8021f1035b1edaf2150a6280231e29f Mon Sep 17 00:00:00 2001
From: Felix Woestmann <felix.wostmann@edps.europa.eu>
Date: Fri, 21 Mar 2025 17:45:55 +0100
Subject: [PATCH 7/9] refactor: Remove superfluous call of generateHTML report

---
 src/commands/reporterCommand.ts | 8 --------
 1 file changed, 8 deletions(-)

diff --git a/src/commands/reporterCommand.ts b/src/commands/reporterCommand.ts
index 4a81558..a98dae1 100644
--- a/src/commands/reporterCommand.ts
+++ b/src/commands/reporterCommand.ts
@@ -97,14 +97,6 @@ async function runReporter(args: ParsedArgsReporter) {
     logger,
   );
 
-  const htmlReport = reporter.generateHtmlReport(collectionData);
-
-  // Print to console when no output file is specified
-  if (!args.outputFile) {
-    logger.info(htmlReport);
-    return;
-  }
-
   const fileExtension = path.extname(args.outputFile).toLowerCase();
   const outputDir = path.dirname(args.outputFile);
 
-- 
GitLab


From 3436325db86bccfefaf7bcad4de5ae4776701302 Mon Sep 17 00:00:00 2001
From: Felix Woestmann <felix.wostmann@edps.europa.eu>
Date: Fri, 21 Mar 2025 17:46:29 +0100
Subject: [PATCH 8/9] refactor: Improve typing of arguments for RunReporter
 function

---
 src/commands/reporterCommand.ts | 48 ++++++++++++++++-----------------
 1 file changed, 23 insertions(+), 25 deletions(-)

diff --git a/src/commands/reporterCommand.ts b/src/commands/reporterCommand.ts
index a98dae1..dbf11cc 100644
--- a/src/commands/reporterCommand.ts
+++ b/src/commands/reporterCommand.ts
@@ -65,17 +65,37 @@ export default {
       .nargs("output-file", 1)
       .alias("output-file", "o")
       .string("output-file")
-      .check((argv: ParsedArgsReporter) => {
+      .check((argv: any) => {
         if (!argv._[1]) {
           return "Error: You must provide a file name or path";
         }
         return true;
       });
   },
-  handler: async (argv: any) => await runReporter(transformArgsToObject(argv)),
+  handler: async (argv: any) => {
+    const runReporterArgs: RunReporterArgs = {
+      inspectionJsonPath: argv._[1],
+      htmlTemplate: argv.htmlTemplate,
+      officeTemplate: argv.officeTemplate,
+      usePandoc: argv.usePandoc,
+      extraFiles: argv.extraFile || [],
+      outputFile: argv.outputFile,
+    };
+
+    await runReporter(runReporterArgs);
+  },
+};
+
+type RunReporterArgs = {
+  htmlTemplate?: string;
+  officeTemplate?: string;
+  usePandoc: boolean;
+  extraFiles: string[];
+  outputFile?: string;
+  inspectionJsonPath: string;
 };
 
-async function runReporter(args: ParsedArgsReporter) {
+async function runReporter(args: RunReporterArgs) {
   const logger = create({});
 
   const collectionData = JSON.parse(
@@ -123,25 +143,3 @@ async function runReporter(args: ParsedArgsReporter) {
       break;
   }
 }
-
-function transformArgsToObject(parsingResult: any): ParsedArgsReporter {
-  return {
-    _: parsingResult._ as string[],
-    inspectionJsonPath: parsingResult._[1] as string,
-    outputFile: parsingResult["output-file"] as string,
-    htmlTemplate: parsingResult["html-template"] as string | undefined,
-    officeTemplate: parsingResult["office-template"] as string | undefined,
-    extraFiles: parsingResult["extra-file"] as any[] | undefined,
-    usePandoc: parsingResult["use-pandoc"] as boolean | undefined,
-  };
-}
-
-interface ParsedArgsReporter {
-  _: (string | number)[];
-  inspectionJsonPath: string;
-  outputFile?: string;
-  htmlTemplate?: string;
-  officeTemplate?: string;
-  extraFiles?: any[];
-  usePandoc?: boolean;
-}
-- 
GitLab


From 7a7433c792feb7a9ea8f24dc477c19c62f5611e5 Mon Sep 17 00:00:00 2001
From: Felix Woestmann <felix.wostmann@edps.europa.eu>
Date: Fri, 21 Mar 2025 17:46:46 +0100
Subject: [PATCH 9/9] refactor: add logging of filenames and change default log
 level

---
 src/lib/logger.ts        | 5 +++--
 src/reporter/reporter.ts | 4 +++-
 2 files changed, 6 insertions(+), 3 deletions(-)

diff --git a/src/lib/logger.ts b/src/lib/logger.ts
index 4f8096b..d120b19 100644
--- a/src/lib/logger.ts
+++ b/src/lib/logger.ts
@@ -39,11 +39,12 @@ const create = (
   options: CreateLoggerOptions,
   outputFilePath?: string,
   defaultMeta?: {},
+  defaultLogLevel = "info",
 ): Logger => {
   const defaults: CreateLoggerOptions = {
     console: {
       silent: false,
-      level: "debug",
+      level: defaultLogLevel,
       stderrLevels: ["error", "debug", "info", "warn"],
       format: process.stdout.isTTY
         ? format.combine(format.colorize(), format.simple(), format.metadata())
@@ -51,7 +52,7 @@ const create = (
     },
     file: {
       enabled: true,
-      level: "silly",
+      level: defaultLogLevel,
       format: format.combine(format.json(), format.metadata()),
     },
   };
diff --git a/src/reporter/reporter.ts b/src/reporter/reporter.ts
index fcb452d..c0e081c 100644
--- a/src/reporter/reporter.ts
+++ b/src/reporter/reporter.ts
@@ -229,7 +229,9 @@ export class Reporter {
 
   saveFile(filename: string, data: any) {
     if (this.options.outputPath) {
-      fs.writeFileSync(path.join(this.options.outputPath, filename), data);
+      let effectivePath = path.join(this.options.outputPath, filename);
+      this.logger.debug(`Saving file to ${effectivePath}`);
+      fs.writeFileSync(effectivePath, data);
     }
   }
 }
-- 
GitLab