From 1b9115f9b5e8f332cd8635b3434c26fd41f8eed1 Mon Sep 17 00:00:00 2001
From: Felix Woestmann <felix.wostmann@edps.europa.eu>
Date: Tue, 11 Mar 2025 11:54:09 +0100
Subject: [PATCH 1/5] feature: add declaration files to compiled output

---
 tsconfig.json | 1 +
 1 file changed, 1 insertion(+)

diff --git a/tsconfig.json b/tsconfig.json
index 469d179..d15a775 100644
--- a/tsconfig.json
+++ b/tsconfig.json
@@ -1,6 +1,7 @@
 {
   "compilerOptions": {
     "outDir": "./build",
+    "declaration": true,
     "allowJs": true,
     "target": "es2020",
     "moduleResolution": "nodenext",
-- 
GitLab


From d3916bd411a235384be0bf12cab5419822a844cd Mon Sep 17 00:00:00 2001
From: Felix Woestmann <felix.wostmann@edps.europa.eu>
Date: Tue, 11 Mar 2025 12:44:56 +0100
Subject: [PATCH 2/5] refactor: Update the EvidenceCollectorForm.vue to align
 with the form in wc-online. Add additional type for runCollection() to
 decouple the type used in form from the one used in the function

---
 .../src/components/EvidenceCollectorForm.vue  | 91 ++++++++++++-------
 src/server/runCollection.ts                   | 50 ++++++----
 src/server/server.ts                          | 58 +++++++-----
 3 files changed, 127 insertions(+), 72 deletions(-)

diff --git a/frontend/src/components/EvidenceCollectorForm.vue b/frontend/src/components/EvidenceCollectorForm.vue
index 3ea2f62..53add89 100644
--- a/frontend/src/components/EvidenceCollectorForm.vue
+++ b/frontend/src/components/EvidenceCollectorForm.vue
@@ -18,11 +18,15 @@
           />
           <TextElement
             name="website_url"
-            :rules="['required']"
+            field-name="url"
+            :rules="['required', 'url']"
             input-type="url"
+            :debounce="500"
             placeholder="http://example.com"
+            :floating="false"
             :columns="{ lg: { container: 12 } }"
-            label="Website to scan"
+            label="Website for collection (mandatory)"
+            info="Enter the URL of the website you want to collect evidence from, e.g., `http://example.com`"
           />
           <GroupElement name="container2_1">
             <GroupElement
@@ -30,15 +34,16 @@
               :columns="{ default: { container: 8 } }"
             >
               <TextElement
-                name="max_option_input"
+                name="max_additional_links"
                 input-type="number"
-                :rules="['nullable', 'min:0', 'max:150', 'integer']"
+                onkeydown="return (!(event.key === 'e' || event.key === '+' || event.key === '.'))"
+                :rules="['required', 'nullable', 'min:0', 'max:150', 'integer']"
                 label="Maximum additional links to browse"
                 autocomplete="off"
                 placeholder="0"
                 default="0"
                 :floating="false"
-                info="The maximum number of links that will be browsed."
+                info="Set the maximum number of additional links to browse beyond the specified URLs"
               />
             </GroupElement>
             <GroupElement
@@ -46,8 +51,10 @@
               :columns="{ default: { container: 4 } }"
             >
               <ToggleElement
-                label="Run with TestSSL"
-                name="testssl_input_option"
+                label="Check the security of the encrypted connection"
+                name="run_testSSL"
+                info="Enable this option to assess the website's SSL/TLS configuration"
+                info-position="left"
               />
             </GroupElement>
           </GroupElement>
@@ -55,46 +62,56 @@
           <GroupElement name="container2">
             <GroupElement name="column1" :columns="{ container: 6 }">
               <TextElement
-                name="sleep_option_input"
+                name="post_page_load_delay_seconds"
                 input-type="number"
-                label="Delay After Page Load"
-                :rules="['nullable', 'min:0', 'integer']"
+                label="Delay after page load"
+                onkeydown="return (!(event.key === 'e' || event.key === '+' || event.key === '.'))"
+                :rules="['nullable', 'min:1', 'integer']"
+                :messages="{
+                  min: 'A minimum delay of 1 second is necessary to ensure the program has enough time to load the page',
+                }"
                 autocomplete="off"
-                info="Amount of sleep in milliseconds after a page load"
-                placeholder="3000"
-                default="3000"
+                info="Specify the time (in seconds) to wait after each page load"
+                placeholder="3"
+                default="3"
                 :floating="false"
-                :addons="{ after: 'milliseconds' }"
+                :addons="{ after: 'seconds' }"
               />
             </GroupElement>
             <GroupElement name="column2" :columns="{ container: 6 }">
               <TextElement
-                name="timout_input_option"
-                label="Page Load Timeout"
+                name="timeout_seconds"
+                label="Page load timeout"
                 input-type="number"
+                onkeydown="return (!(event.key === 'e' || event.key === '+' || event.key === '.'))"
                 :rules="['nullable', 'min:0', 'integer']"
                 autocomplete="off"
-                info="Page load imeout in milliseconds "
+                info="Set the timeout (in seconds) for page loads. Set to 0 to disable the timeout"
+                info-position="left"
                 placeholder="0"
                 :floating="false"
                 default="0"
-                :addons="{ after: 'milliseconds' }"
+                :addons="{ after: 'seconds' }"
                 :attrs="{ 'hide-spin-buttons': 'true' }"
               />
             </GroupElement>
           </GroupElement>
           <TextElement
-            name="seed_option_input"
+            name="link_selection_seed"
             label="Seed for deterministic link selection"
+            info="The software randomly samples links from the website. Setting a seed value ensures you get the same random sample each time – useful for reproducible results."
             placeholder="no seed"
             autocomplete="off"
             :floating="false"
           />
           <ListElement
-            :add-class="{ container: ['bg-slate-50', 'p-2', 'rounded-md'] }"
-            name="first_party_uri_option_input"
+            :add-class="{
+              container: ['bg-eu-neutral-40', 'p-2', 'rounded-md'],
+            }"
+            name="first_party_uris"
             add-text="+ Add URI"
-            label="URIs considered First-Party"
+            label="URIs considered first party"
+            info="Add URIs that should be considered as first-party for the collected website"
           >
             <!-- @vue-ignore -->
             <template #default="{ index }">
@@ -102,21 +119,30 @@
                 :name="index"
                 input-type="url"
                 placeholder="http://www.example.com"
+                :debounce="500"
+                :floating="false"
+                field-name="uri"
                 :rules="['url', 'nullable']"
               />
             </template>
           </ListElement>
           <ListElement
-            :add-class="{ container: ['bg-slate-50', 'p-2', 'rounded-md'] }"
-            name="browse_link_option_input"
-            label="Links to include in collection"
-            add-text="+ Add Link"
+            :add-class="{
+              container: ['bg-eu-neutral-40', 'p-2', 'rounded-md'],
+            }"
+            name="links_to_include"
+            label="Web pages to include in collection"
+            add-text="+ Add web page"
+            info="Add specific pages to include in the collection process"
           >
             <!-- @vue-ignore -->
             <template #default="{ index }">
               <TextElement
                 :name="index"
                 input-type="url"
+                field-name="link"
+                :debounce="500"
+                :floating="false"
                 placeholder="http://example.com/test.html"
                 :rules="['url', 'nullable']"
               />
@@ -124,10 +150,13 @@
           </ListElement>
           <GroupElement
             name="cookies"
-            :add-class="{ container: ['bg-slate-50', 'p-2', 'rounded-md'] }"
+            :add-class="{
+              container: ['bg-eu-neutral-40', 'p-2', 'rounded-md'],
+            }"
           >
             <MatrixElement
-              name="cookie_input"
+              name="cookies"
+              info="Set cookies that will be used by the browser during the evidence collection process"
               :cols="[
                 {
                   label: 'Key',
@@ -148,7 +177,7 @@
               :input-type="{
                 type: 'text',
               }"
-              add-text="+ Add Cookie"
+              add-text="+ Add cookie"
               label="Cookies"
             />
           </GroupElement>
@@ -427,7 +456,7 @@ const downloadHtml = () => {
   --vf-primary: #3860ed;
   --vf-primary-darker: #0a1f6c;
   --vf-color-on-primary: #ffffff;
-  --vf-danger: #ffffff;
+  --vf-danger: #da1e28;
   --vf-danger-lighter: #da1e28;
   --vf-success: #ffffff;
   --vf-success-lighter: #24a148;
@@ -594,7 +623,7 @@ const downloadHtml = () => {
   --vf-bg-checkbox-success: #ffffff;
   --vf-bg-disabled: var(--vf-gray-200);
   --vf-bg-selected: #1118270d;
-  --vf-bg-passive: var(--vf-gray-300);
+  --vf-bg-passive: var(--vf-gray-400);
   --vf-bg-icon: var(--vf-gray-500);
   --vf-bg-danger: var(--vf-danger-lighter);
   --vf-bg-success: var(--vf-success-lighter);
diff --git a/src/server/runCollection.ts b/src/server/runCollection.ts
index d5151bf..bde9122 100644
--- a/src/server/runCollection.ts
+++ b/src/server/runCollection.ts
@@ -1,11 +1,24 @@
 import { Reporter, ReporterArguments } from "../reporter/reporter.js";
 import { Collector } from "../collector/index.js";
 import Inspector from "../inspector/inspector.js";
-import { Cookie, StartCollectionRequestBody } from "./server.js";
+import { Cookie } from "./server.js";
 import { Logger } from "winston";
 
+export interface RunCollectionArguments {
+  website_url: string;
+  max_additional_links: number;
+  post_page_load_delay_milliseconds: number;
+  timeout_milliseconds: number;
+  first_party_uris: string[];
+  links_to_include: string[];
+  link_selection_seed: string;
+  run_testSSL: boolean;
+  cookies: Cookie[];
+  use_DNT: boolean;
+}
+
 export async function runCollection(
-  args: StartCollectionRequestBody,
+  args: RunCollectionArguments,
   browser_options: any[],
   logger: Logger,
 ): Promise<{}> {
@@ -44,27 +57,26 @@ export async function generateHtmlAndPdf(inspectionOutput) {
   };
 }
 
+/**
+ * Constructs a JSON object containing all Arguments as it is expected by the underlying implementation.
+ */
 function sanitizeInputAndConstructCollectionArgs(
-  args: StartCollectionRequestBody,
+  args: RunCollectionArguments,
   browser_options: any[],
 ): {} {
-  let sleepOption = isEmptyNumber(args.sleep_option_input)
+  let sleepOption = isEmptyNumber(args.post_page_load_delay_milliseconds)
     ? 3000
-    : args.sleep_option_input;
-  let pageTimeout = isEmptyNumber(args.timeout_input_option)
+    : args.post_page_load_delay_milliseconds;
+  let pageTimeout = isEmptyNumber(args.timeout_milliseconds)
     ? 0
-    : args.timeout_input_option;
-  let maxLinks = isEmptyNumber(args.max_option_input)
+    : args.timeout_milliseconds;
+  let maxLinks = isEmptyNumber(args.max_additional_links)
     ? 0
-    : args.max_option_input;
+    : args.max_additional_links;
 
   // Links and URIs can be null when send by the backend. Therefore, we filter.
-  let browseLinks = args.browse_link_option_input.filter(
-    (value) => value != null,
-  );
-  let firstPartyUris = args.first_party_uri_option_input.filter(
-    (value) => value != null,
-  );
+  let browseLinks = args.links_to_include.filter((value) => value != null);
+  let firstPartyUris = args.first_party_uris.filter((value) => value != null);
 
   // Check that Links are URLs and FirstPartyUris only consist of domains.
   let areAllExtraLinksUrls = browseLinks.every((link: string) =>
@@ -81,7 +93,7 @@ function sanitizeInputAndConstructCollectionArgs(
     throw new Error("Not all extra links are invalid.");
   }
 
-  let sanitizedCookies = args.cookie_input
+  let sanitizedCookies = args.cookies
     .filter((cookie: Cookie) => cookie.value != null && cookie.key != null)
     .filter(
       (cookie: Cookie) =>
@@ -116,12 +128,12 @@ function sanitizeInputAndConstructCollectionArgs(
     sleep: sleepOption,
     firstPartyUri: firstPartyUris,
     pageTimeout: pageTimeout,
-    testssl: args.testssl_input_option,
-    seed: args.seed_option_input,
+    testssl: args.run_testSSL,
+    seed: args.link_selection_seed,
     setCookie: cookieString,
     headless: true,
     screenshots: true,
-    dnt: false,
+    dnt: args.use_DNT,
     dntJs: false,
     output: undefined,
     overwrite: false,
diff --git a/src/server/server.ts b/src/server/server.ts
index 854b44d..5dc11a2 100644
--- a/src/server/server.ts
+++ b/src/server/server.ts
@@ -6,7 +6,11 @@ import express, {
   Router,
 } from "express";
 import bodyParser from "body-parser";
-import { generateHtmlAndPdf, runCollection } from "./runCollection.js";
+import {
+  generateHtmlAndPdf,
+  runCollection,
+  RunCollectionArguments,
+} from "./runCollection.js";
 import path from "path";
 import { create } from "../lib/logger.js";
 import crypto from "crypto";
@@ -81,32 +85,42 @@ function configureRoutes(browser_options: any[]): Router {
       let requestId = crypto.randomBytes(16).toString("hex");
       let requestLogger = create({}, undefined, { request_id: requestId });
 
+      const runCollectionArgs: RunCollectionArguments = {
+        website_url: req.body.website_url,
+        max_additional_links: req.body.max_additional_links,
+        post_page_load_delay_milliseconds: Math.floor(
+          req.body.post_page_load_delay_seconds / 1000,
+        ),
+        timeout_milliseconds: Math.floor(req.body.timeout_seconds / 1000),
+        first_party_uris: req.body.first_party_uris,
+        links_to_include: req.body.links_to_include,
+        link_selection_seed: req.body.link_selection_seed,
+        run_testSSL: req.body.run_testSSL,
+        cookies: req.body.cookies,
+        use_DNT: false,
+      };
+
       try {
-        const website_url = req.body.website_url;
         requestLogger.info(`Received /start-collection request`, {
-          website_url: req.body.website_url,
-          max_links_option: req.body.max_option_input,
-          sleep_option_input: req.body.sleep_option_input,
-          timeout_input_option: req.body.timeout_input_option,
-          first_party_uri_option_input: req.body.first_party_uri_option_input,
-          browse_link_option_input: req.body.browse_link_option_input,
-          seed_option_input: req.body.seed_option_input,
-          testssl_input_option: req.body.testssl_input_option,
-          cookie_input: req.body.cookie_input,
+          ...runCollectionArgs,
         });
 
-        if (!URL.canParse(website_url)) {
+        if (!URL.canParse(runCollectionArgs.website_url)) {
           res.status(400).send({ reason: "malformatted_url" });
           return;
         }
 
-        requestLogger.log("info", `Running collection for: ${website_url}`);
+        requestLogger.log(
+          "info",
+          `Running collection for: ${runCollectionArgs.website_url}`,
+        );
 
         let collectionOutput = await runCollection(
-          req.body,
+          runCollectionArgs,
           browser_options,
           requestLogger,
         );
+
         let htmlAndPdf = await generateHtmlAndPdf(collectionOutput);
         res.send(htmlAndPdf);
         requestLogger.info("Finished serving request");
@@ -141,14 +155,14 @@ function handleShutdownSignal(signal: string) {
 
 export interface StartCollectionRequestBody {
   website_url: string;
-  max_option_input: number;
-  sleep_option_input: number;
-  timeout_input_option: number;
-  first_party_uri_option_input: string[];
-  browse_link_option_input: string[];
-  seed_option_input: string;
-  testssl_input_option: boolean;
-  cookie_input: Cookie[];
+  max_additional_links: number;
+  post_page_load_delay_seconds: number;
+  timeout_seconds: number;
+  first_party_uris: string[];
+  links_to_include: string[];
+  link_selection_seed: string;
+  run_testSSL: boolean;
+  cookies: Cookie[];
 }
 
 export interface Cookie {
-- 
GitLab


From 1797dc6ad12a8d533d35e473b2e15c9dba1e07f9 Mon Sep 17 00:00:00 2001
From: Felix Woestmann <felix.wostmann@edps.europa.eu>
Date: Tue, 11 Mar 2025 16:33:37 +0100
Subject: [PATCH 3/5] Add code to generate Report with DNT and DNT disabled
 output

---
 src/reporter/reporter.ts    |  2 ++
 src/server/runCollection.ts | 11 +++++++++--
 2 files changed, 11 insertions(+), 2 deletions(-)

diff --git a/src/reporter/reporter.ts b/src/reporter/reporter.ts
index 774f7cc..e0eed4f 100644
--- a/src/reporter/reporter.ts
+++ b/src/reporter/reporter.ts
@@ -88,6 +88,7 @@ export class Reporter {
     filename = "inspection.html",
     log = true,
     template = "../assets/template.pug",
+    extraData?,
   ) {
     const html_template =
       this.args["html-template"] || path.join(__dirname, template);
@@ -106,6 +107,7 @@ export class Reporter {
           require.resolve("github-markdown-css/github-markdown.css"),
         ),
         filterOptions: { marked: {} },
+        extra: extraData,
       }),
     );
 
diff --git a/src/server/runCollection.ts b/src/server/runCollection.ts
index bde9122..83fdb9f 100644
--- a/src/server/runCollection.ts
+++ b/src/server/runCollection.ts
@@ -3,6 +3,7 @@ import { Collector } from "../collector/index.js";
 import Inspector from "../inspector/inspector.js";
 import { Cookie } from "./server.js";
 import { Logger } from "winston";
+import { template } from "lodash";
 
 export interface RunCollectionArguments {
   website_url: string;
@@ -38,7 +39,7 @@ export async function runCollection(
   return inspector.run();
 }
 
-export async function generateHtmlAndPdf(inspectionOutput) {
+export async function generateHtmlAndPdf(inspectionOutput, extraOuptut?) {
   let reporterArgs: ReporterArguments = {
     html: true,
     pdf: true,
@@ -49,7 +50,13 @@ export async function generateHtmlAndPdf(inspectionOutput) {
   };
 
   const reporter = new Reporter(reporterArgs);
-  let html = reporter.generateHtml(inspectionOutput, "inspection.html", false);
+  let html = reporter.generateHtml(
+    inspectionOutput,
+    "inspection.html",
+    false,
+    extraOuptut ? "path/to/alternative/template" : undefined,
+    extraOuptut,
+  );
   let pdfBuffer = await reporter.convertHtmlToPdfInMemory(html);
   return {
     html: html,
-- 
GitLab


From eaf2142b55f699815c0d51abeb01e2ee420bcd0f Mon Sep 17 00:00:00 2001
From: Felix Woestmann <felix.wostmann@edps.europa.eu>
Date: Tue, 11 Mar 2025 17:15:18 +0100
Subject: [PATCH 4/5] refactor: Add more types to Collector

---
 src/collector/index.ts           | 10 +++++++---
 src/collector/output.ts          |  6 ++++++
 src/commands/collectorCommand.ts |  4 ++--
 src/inspector/inspector.ts       |  3 ++-
 4 files changed, 17 insertions(+), 6 deletions(-)

diff --git a/src/collector/index.ts b/src/collector/index.ts
index 8c28850..ab23398 100644
--- a/src/collector/index.ts
+++ b/src/collector/index.ts
@@ -1,5 +1,9 @@
 import { createOutputDirectory } from "./io.js";
-import { CreateOutputArgs, createOutputObject } from "./output.js";
+import {
+  CollectorOutput,
+  CreateOutputArgs,
+  createOutputObject,
+} from "./output.js";
 
 import { testSSL, testHttps } from "./connection.js";
 import {
@@ -17,13 +21,13 @@ import { Logger } from "winston";
 import { PageSession } from "./page-session.js";
 
 export interface CollectionResult {
-  output: any;
+  output: CollectorOutput;
   pageSession: PageSession;
   source: string;
 }
 
 export class Collector {
-  private output: any;
+  private output: CollectorOutput;
   private browserSession: BrowserSession;
   private pageSession: PageSession;
   private logger: Logger;
diff --git a/src/collector/output.ts b/src/collector/output.ts
index 9796429..705be3f 100644
--- a/src/collector/output.ts
+++ b/src/collector/output.ts
@@ -74,6 +74,12 @@ export interface CollectorOutput {
   websockets: any;
   start_time: Date;
   end_time: Date | null;
+  websocketLog?: any;
+  screenshots?: {
+    screenshot_top: string;
+    screenshot_bottom: string;
+    screenshot_full: string;
+  };
 }
 
 export function createOutputObject(args: CreateOutputArgs): CollectorOutput {
diff --git a/src/commands/collectorCommand.ts b/src/commands/collectorCommand.ts
index 4cee5ad..9989154 100644
--- a/src/commands/collectorCommand.ts
+++ b/src/commands/collectorCommand.ts
@@ -173,7 +173,7 @@ export default {
   handler: async (argv) => await runCollector(argv),
 };
 
-async function runCollector(args: ParsedArgsCollector): Promise<any> {
+async function runCollector(args: CollectorCommandArguments): Promise<any> {
   const logger = create({}, args.output);
 
   const collector = new Collector(args, logger);
@@ -215,7 +215,7 @@ async function runCollector(args: ParsedArgsCollector): Promise<any> {
   return inspectionResult;
 }
 
-interface ParsedArgsCollector {
+interface CollectorCommandArguments {
   _: (string | number)[];
   command: string;
   max: number;
diff --git a/src/inspector/inspector.ts b/src/inspector/inspector.ts
index 6b6d87e..5f0b445 100644
--- a/src/inspector/inspector.ts
+++ b/src/inspector/inspector.ts
@@ -5,12 +5,13 @@ import { isFirstParty } from "../lib/tools.js";
 import { PageSession } from "../collector/page-session.js";
 import { CollectedCookie } from "../collector/recorder/cookie-recorder.js";
 import { EnhancedCookie } from "../collector/collector_inspector.js";
+import { CollectorOutput } from "../collector/output.js";
 
 class Inspector {
   private output: any;
   private pageSession: PageSession;
 
-  constructor(pageSession: PageSession, output: any) {
+  constructor(pageSession: PageSession, output: CollectorOutput) {
     this.output = output;
     this.pageSession = pageSession;
   }
-- 
GitLab


From f28f635dcee39466e724c8e2c728b539f285a149 Mon Sep 17 00:00:00 2001
From: Felix Woestmann <felix.wostmann@edps.europa.eu>
Date: Tue, 11 Mar 2025 17:15:18 +0100
Subject: [PATCH 5/5] refactor: Add more types to Collector

---
 src/server/runCollection.ts | 1 -
 1 file changed, 1 deletion(-)

diff --git a/src/server/runCollection.ts b/src/server/runCollection.ts
index 83fdb9f..fda49bd 100644
--- a/src/server/runCollection.ts
+++ b/src/server/runCollection.ts
@@ -3,7 +3,6 @@ import { Collector } from "../collector/index.js";
 import Inspector from "../inspector/inspector.js";
 import { Cookie } from "./server.js";
 import { Logger } from "winston";
-import { template } from "lodash";
 
 export interface RunCollectionArguments {
   website_url: string;
-- 
GitLab