From 1b9115f9b5e8f332cd8635b3434c26fd41f8eed1 Mon Sep 17 00:00:00 2001 From: Felix Woestmann <felix.wostmann@edps.europa.eu> Date: Tue, 11 Mar 2025 11:54:09 +0100 Subject: [PATCH 1/5] feature: add declaration files to compiled output --- tsconfig.json | 1 + 1 file changed, 1 insertion(+) diff --git a/tsconfig.json b/tsconfig.json index 469d179..d15a775 100644 --- a/tsconfig.json +++ b/tsconfig.json @@ -1,6 +1,7 @@ { "compilerOptions": { "outDir": "./build", + "declaration": true, "allowJs": true, "target": "es2020", "moduleResolution": "nodenext", -- GitLab From d3916bd411a235384be0bf12cab5419822a844cd Mon Sep 17 00:00:00 2001 From: Felix Woestmann <felix.wostmann@edps.europa.eu> Date: Tue, 11 Mar 2025 12:44:56 +0100 Subject: [PATCH 2/5] refactor: Update the EvidenceCollectorForm.vue to align with the form in wc-online. Add additional type for runCollection() to decouple the type used in form from the one used in the function --- .../src/components/EvidenceCollectorForm.vue | 91 ++++++++++++------- src/server/runCollection.ts | 50 ++++++---- src/server/server.ts | 58 +++++++----- 3 files changed, 127 insertions(+), 72 deletions(-) diff --git a/frontend/src/components/EvidenceCollectorForm.vue b/frontend/src/components/EvidenceCollectorForm.vue index 3ea2f62..53add89 100644 --- a/frontend/src/components/EvidenceCollectorForm.vue +++ b/frontend/src/components/EvidenceCollectorForm.vue @@ -18,11 +18,15 @@ /> <TextElement name="website_url" - :rules="['required']" + field-name="url" + :rules="['required', 'url']" input-type="url" + :debounce="500" placeholder="http://example.com" + :floating="false" :columns="{ lg: { container: 12 } }" - label="Website to scan" + label="Website for collection (mandatory)" + info="Enter the URL of the website you want to collect evidence from, e.g., `http://example.com`" /> <GroupElement name="container2_1"> <GroupElement @@ -30,15 +34,16 @@ :columns="{ default: { container: 8 } }" > <TextElement - name="max_option_input" + name="max_additional_links" input-type="number" - :rules="['nullable', 'min:0', 'max:150', 'integer']" + onkeydown="return (!(event.key === 'e' || event.key === '+' || event.key === '.'))" + :rules="['required', 'nullable', 'min:0', 'max:150', 'integer']" label="Maximum additional links to browse" autocomplete="off" placeholder="0" default="0" :floating="false" - info="The maximum number of links that will be browsed." + info="Set the maximum number of additional links to browse beyond the specified URLs" /> </GroupElement> <GroupElement @@ -46,8 +51,10 @@ :columns="{ default: { container: 4 } }" > <ToggleElement - label="Run with TestSSL" - name="testssl_input_option" + label="Check the security of the encrypted connection" + name="run_testSSL" + info="Enable this option to assess the website's SSL/TLS configuration" + info-position="left" /> </GroupElement> </GroupElement> @@ -55,46 +62,56 @@ <GroupElement name="container2"> <GroupElement name="column1" :columns="{ container: 6 }"> <TextElement - name="sleep_option_input" + name="post_page_load_delay_seconds" input-type="number" - label="Delay After Page Load" - :rules="['nullable', 'min:0', 'integer']" + label="Delay after page load" + onkeydown="return (!(event.key === 'e' || event.key === '+' || event.key === '.'))" + :rules="['nullable', 'min:1', 'integer']" + :messages="{ + min: 'A minimum delay of 1 second is necessary to ensure the program has enough time to load the page', + }" autocomplete="off" - info="Amount of sleep in milliseconds after a page load" - placeholder="3000" - default="3000" + info="Specify the time (in seconds) to wait after each page load" + placeholder="3" + default="3" :floating="false" - :addons="{ after: 'milliseconds' }" + :addons="{ after: 'seconds' }" /> </GroupElement> <GroupElement name="column2" :columns="{ container: 6 }"> <TextElement - name="timout_input_option" - label="Page Load Timeout" + name="timeout_seconds" + label="Page load timeout" input-type="number" + onkeydown="return (!(event.key === 'e' || event.key === '+' || event.key === '.'))" :rules="['nullable', 'min:0', 'integer']" autocomplete="off" - info="Page load imeout in milliseconds " + info="Set the timeout (in seconds) for page loads. Set to 0 to disable the timeout" + info-position="left" placeholder="0" :floating="false" default="0" - :addons="{ after: 'milliseconds' }" + :addons="{ after: 'seconds' }" :attrs="{ 'hide-spin-buttons': 'true' }" /> </GroupElement> </GroupElement> <TextElement - name="seed_option_input" + name="link_selection_seed" label="Seed for deterministic link selection" + info="The software randomly samples links from the website. Setting a seed value ensures you get the same random sample each time – useful for reproducible results." placeholder="no seed" autocomplete="off" :floating="false" /> <ListElement - :add-class="{ container: ['bg-slate-50', 'p-2', 'rounded-md'] }" - name="first_party_uri_option_input" + :add-class="{ + container: ['bg-eu-neutral-40', 'p-2', 'rounded-md'], + }" + name="first_party_uris" add-text="+ Add URI" - label="URIs considered First-Party" + label="URIs considered first party" + info="Add URIs that should be considered as first-party for the collected website" > <!-- @vue-ignore --> <template #default="{ index }"> @@ -102,21 +119,30 @@ :name="index" input-type="url" placeholder="http://www.example.com" + :debounce="500" + :floating="false" + field-name="uri" :rules="['url', 'nullable']" /> </template> </ListElement> <ListElement - :add-class="{ container: ['bg-slate-50', 'p-2', 'rounded-md'] }" - name="browse_link_option_input" - label="Links to include in collection" - add-text="+ Add Link" + :add-class="{ + container: ['bg-eu-neutral-40', 'p-2', 'rounded-md'], + }" + name="links_to_include" + label="Web pages to include in collection" + add-text="+ Add web page" + info="Add specific pages to include in the collection process" > <!-- @vue-ignore --> <template #default="{ index }"> <TextElement :name="index" input-type="url" + field-name="link" + :debounce="500" + :floating="false" placeholder="http://example.com/test.html" :rules="['url', 'nullable']" /> @@ -124,10 +150,13 @@ </ListElement> <GroupElement name="cookies" - :add-class="{ container: ['bg-slate-50', 'p-2', 'rounded-md'] }" + :add-class="{ + container: ['bg-eu-neutral-40', 'p-2', 'rounded-md'], + }" > <MatrixElement - name="cookie_input" + name="cookies" + info="Set cookies that will be used by the browser during the evidence collection process" :cols="[ { label: 'Key', @@ -148,7 +177,7 @@ :input-type="{ type: 'text', }" - add-text="+ Add Cookie" + add-text="+ Add cookie" label="Cookies" /> </GroupElement> @@ -427,7 +456,7 @@ const downloadHtml = () => { --vf-primary: #3860ed; --vf-primary-darker: #0a1f6c; --vf-color-on-primary: #ffffff; - --vf-danger: #ffffff; + --vf-danger: #da1e28; --vf-danger-lighter: #da1e28; --vf-success: #ffffff; --vf-success-lighter: #24a148; @@ -594,7 +623,7 @@ const downloadHtml = () => { --vf-bg-checkbox-success: #ffffff; --vf-bg-disabled: var(--vf-gray-200); --vf-bg-selected: #1118270d; - --vf-bg-passive: var(--vf-gray-300); + --vf-bg-passive: var(--vf-gray-400); --vf-bg-icon: var(--vf-gray-500); --vf-bg-danger: var(--vf-danger-lighter); --vf-bg-success: var(--vf-success-lighter); diff --git a/src/server/runCollection.ts b/src/server/runCollection.ts index d5151bf..bde9122 100644 --- a/src/server/runCollection.ts +++ b/src/server/runCollection.ts @@ -1,11 +1,24 @@ import { Reporter, ReporterArguments } from "../reporter/reporter.js"; import { Collector } from "../collector/index.js"; import Inspector from "../inspector/inspector.js"; -import { Cookie, StartCollectionRequestBody } from "./server.js"; +import { Cookie } from "./server.js"; import { Logger } from "winston"; +export interface RunCollectionArguments { + website_url: string; + max_additional_links: number; + post_page_load_delay_milliseconds: number; + timeout_milliseconds: number; + first_party_uris: string[]; + links_to_include: string[]; + link_selection_seed: string; + run_testSSL: boolean; + cookies: Cookie[]; + use_DNT: boolean; +} + export async function runCollection( - args: StartCollectionRequestBody, + args: RunCollectionArguments, browser_options: any[], logger: Logger, ): Promise<{}> { @@ -44,27 +57,26 @@ export async function generateHtmlAndPdf(inspectionOutput) { }; } +/** + * Constructs a JSON object containing all Arguments as it is expected by the underlying implementation. + */ function sanitizeInputAndConstructCollectionArgs( - args: StartCollectionRequestBody, + args: RunCollectionArguments, browser_options: any[], ): {} { - let sleepOption = isEmptyNumber(args.sleep_option_input) + let sleepOption = isEmptyNumber(args.post_page_load_delay_milliseconds) ? 3000 - : args.sleep_option_input; - let pageTimeout = isEmptyNumber(args.timeout_input_option) + : args.post_page_load_delay_milliseconds; + let pageTimeout = isEmptyNumber(args.timeout_milliseconds) ? 0 - : args.timeout_input_option; - let maxLinks = isEmptyNumber(args.max_option_input) + : args.timeout_milliseconds; + let maxLinks = isEmptyNumber(args.max_additional_links) ? 0 - : args.max_option_input; + : args.max_additional_links; // Links and URIs can be null when send by the backend. Therefore, we filter. - let browseLinks = args.browse_link_option_input.filter( - (value) => value != null, - ); - let firstPartyUris = args.first_party_uri_option_input.filter( - (value) => value != null, - ); + let browseLinks = args.links_to_include.filter((value) => value != null); + let firstPartyUris = args.first_party_uris.filter((value) => value != null); // Check that Links are URLs and FirstPartyUris only consist of domains. let areAllExtraLinksUrls = browseLinks.every((link: string) => @@ -81,7 +93,7 @@ function sanitizeInputAndConstructCollectionArgs( throw new Error("Not all extra links are invalid."); } - let sanitizedCookies = args.cookie_input + let sanitizedCookies = args.cookies .filter((cookie: Cookie) => cookie.value != null && cookie.key != null) .filter( (cookie: Cookie) => @@ -116,12 +128,12 @@ function sanitizeInputAndConstructCollectionArgs( sleep: sleepOption, firstPartyUri: firstPartyUris, pageTimeout: pageTimeout, - testssl: args.testssl_input_option, - seed: args.seed_option_input, + testssl: args.run_testSSL, + seed: args.link_selection_seed, setCookie: cookieString, headless: true, screenshots: true, - dnt: false, + dnt: args.use_DNT, dntJs: false, output: undefined, overwrite: false, diff --git a/src/server/server.ts b/src/server/server.ts index 854b44d..5dc11a2 100644 --- a/src/server/server.ts +++ b/src/server/server.ts @@ -6,7 +6,11 @@ import express, { Router, } from "express"; import bodyParser from "body-parser"; -import { generateHtmlAndPdf, runCollection } from "./runCollection.js"; +import { + generateHtmlAndPdf, + runCollection, + RunCollectionArguments, +} from "./runCollection.js"; import path from "path"; import { create } from "../lib/logger.js"; import crypto from "crypto"; @@ -81,32 +85,42 @@ function configureRoutes(browser_options: any[]): Router { let requestId = crypto.randomBytes(16).toString("hex"); let requestLogger = create({}, undefined, { request_id: requestId }); + const runCollectionArgs: RunCollectionArguments = { + website_url: req.body.website_url, + max_additional_links: req.body.max_additional_links, + post_page_load_delay_milliseconds: Math.floor( + req.body.post_page_load_delay_seconds / 1000, + ), + timeout_milliseconds: Math.floor(req.body.timeout_seconds / 1000), + first_party_uris: req.body.first_party_uris, + links_to_include: req.body.links_to_include, + link_selection_seed: req.body.link_selection_seed, + run_testSSL: req.body.run_testSSL, + cookies: req.body.cookies, + use_DNT: false, + }; + try { - const website_url = req.body.website_url; requestLogger.info(`Received /start-collection request`, { - website_url: req.body.website_url, - max_links_option: req.body.max_option_input, - sleep_option_input: req.body.sleep_option_input, - timeout_input_option: req.body.timeout_input_option, - first_party_uri_option_input: req.body.first_party_uri_option_input, - browse_link_option_input: req.body.browse_link_option_input, - seed_option_input: req.body.seed_option_input, - testssl_input_option: req.body.testssl_input_option, - cookie_input: req.body.cookie_input, + ...runCollectionArgs, }); - if (!URL.canParse(website_url)) { + if (!URL.canParse(runCollectionArgs.website_url)) { res.status(400).send({ reason: "malformatted_url" }); return; } - requestLogger.log("info", `Running collection for: ${website_url}`); + requestLogger.log( + "info", + `Running collection for: ${runCollectionArgs.website_url}`, + ); let collectionOutput = await runCollection( - req.body, + runCollectionArgs, browser_options, requestLogger, ); + let htmlAndPdf = await generateHtmlAndPdf(collectionOutput); res.send(htmlAndPdf); requestLogger.info("Finished serving request"); @@ -141,14 +155,14 @@ function handleShutdownSignal(signal: string) { export interface StartCollectionRequestBody { website_url: string; - max_option_input: number; - sleep_option_input: number; - timeout_input_option: number; - first_party_uri_option_input: string[]; - browse_link_option_input: string[]; - seed_option_input: string; - testssl_input_option: boolean; - cookie_input: Cookie[]; + max_additional_links: number; + post_page_load_delay_seconds: number; + timeout_seconds: number; + first_party_uris: string[]; + links_to_include: string[]; + link_selection_seed: string; + run_testSSL: boolean; + cookies: Cookie[]; } export interface Cookie { -- GitLab From 1797dc6ad12a8d533d35e473b2e15c9dba1e07f9 Mon Sep 17 00:00:00 2001 From: Felix Woestmann <felix.wostmann@edps.europa.eu> Date: Tue, 11 Mar 2025 16:33:37 +0100 Subject: [PATCH 3/5] Add code to generate Report with DNT and DNT disabled output --- src/reporter/reporter.ts | 2 ++ src/server/runCollection.ts | 11 +++++++++-- 2 files changed, 11 insertions(+), 2 deletions(-) diff --git a/src/reporter/reporter.ts b/src/reporter/reporter.ts index 774f7cc..e0eed4f 100644 --- a/src/reporter/reporter.ts +++ b/src/reporter/reporter.ts @@ -88,6 +88,7 @@ export class Reporter { filename = "inspection.html", log = true, template = "../assets/template.pug", + extraData?, ) { const html_template = this.args["html-template"] || path.join(__dirname, template); @@ -106,6 +107,7 @@ export class Reporter { require.resolve("github-markdown-css/github-markdown.css"), ), filterOptions: { marked: {} }, + extra: extraData, }), ); diff --git a/src/server/runCollection.ts b/src/server/runCollection.ts index bde9122..83fdb9f 100644 --- a/src/server/runCollection.ts +++ b/src/server/runCollection.ts @@ -3,6 +3,7 @@ import { Collector } from "../collector/index.js"; import Inspector from "../inspector/inspector.js"; import { Cookie } from "./server.js"; import { Logger } from "winston"; +import { template } from "lodash"; export interface RunCollectionArguments { website_url: string; @@ -38,7 +39,7 @@ export async function runCollection( return inspector.run(); } -export async function generateHtmlAndPdf(inspectionOutput) { +export async function generateHtmlAndPdf(inspectionOutput, extraOuptut?) { let reporterArgs: ReporterArguments = { html: true, pdf: true, @@ -49,7 +50,13 @@ export async function generateHtmlAndPdf(inspectionOutput) { }; const reporter = new Reporter(reporterArgs); - let html = reporter.generateHtml(inspectionOutput, "inspection.html", false); + let html = reporter.generateHtml( + inspectionOutput, + "inspection.html", + false, + extraOuptut ? "path/to/alternative/template" : undefined, + extraOuptut, + ); let pdfBuffer = await reporter.convertHtmlToPdfInMemory(html); return { html: html, -- GitLab From eaf2142b55f699815c0d51abeb01e2ee420bcd0f Mon Sep 17 00:00:00 2001 From: Felix Woestmann <felix.wostmann@edps.europa.eu> Date: Tue, 11 Mar 2025 17:15:18 +0100 Subject: [PATCH 4/5] refactor: Add more types to Collector --- src/collector/index.ts | 10 +++++++--- src/collector/output.ts | 6 ++++++ src/commands/collectorCommand.ts | 4 ++-- src/inspector/inspector.ts | 3 ++- 4 files changed, 17 insertions(+), 6 deletions(-) diff --git a/src/collector/index.ts b/src/collector/index.ts index 8c28850..ab23398 100644 --- a/src/collector/index.ts +++ b/src/collector/index.ts @@ -1,5 +1,9 @@ import { createOutputDirectory } from "./io.js"; -import { CreateOutputArgs, createOutputObject } from "./output.js"; +import { + CollectorOutput, + CreateOutputArgs, + createOutputObject, +} from "./output.js"; import { testSSL, testHttps } from "./connection.js"; import { @@ -17,13 +21,13 @@ import { Logger } from "winston"; import { PageSession } from "./page-session.js"; export interface CollectionResult { - output: any; + output: CollectorOutput; pageSession: PageSession; source: string; } export class Collector { - private output: any; + private output: CollectorOutput; private browserSession: BrowserSession; private pageSession: PageSession; private logger: Logger; diff --git a/src/collector/output.ts b/src/collector/output.ts index 9796429..705be3f 100644 --- a/src/collector/output.ts +++ b/src/collector/output.ts @@ -74,6 +74,12 @@ export interface CollectorOutput { websockets: any; start_time: Date; end_time: Date | null; + websocketLog?: any; + screenshots?: { + screenshot_top: string; + screenshot_bottom: string; + screenshot_full: string; + }; } export function createOutputObject(args: CreateOutputArgs): CollectorOutput { diff --git a/src/commands/collectorCommand.ts b/src/commands/collectorCommand.ts index 4cee5ad..9989154 100644 --- a/src/commands/collectorCommand.ts +++ b/src/commands/collectorCommand.ts @@ -173,7 +173,7 @@ export default { handler: async (argv) => await runCollector(argv), }; -async function runCollector(args: ParsedArgsCollector): Promise<any> { +async function runCollector(args: CollectorCommandArguments): Promise<any> { const logger = create({}, args.output); const collector = new Collector(args, logger); @@ -215,7 +215,7 @@ async function runCollector(args: ParsedArgsCollector): Promise<any> { return inspectionResult; } -interface ParsedArgsCollector { +interface CollectorCommandArguments { _: (string | number)[]; command: string; max: number; diff --git a/src/inspector/inspector.ts b/src/inspector/inspector.ts index 6b6d87e..5f0b445 100644 --- a/src/inspector/inspector.ts +++ b/src/inspector/inspector.ts @@ -5,12 +5,13 @@ import { isFirstParty } from "../lib/tools.js"; import { PageSession } from "../collector/page-session.js"; import { CollectedCookie } from "../collector/recorder/cookie-recorder.js"; import { EnhancedCookie } from "../collector/collector_inspector.js"; +import { CollectorOutput } from "../collector/output.js"; class Inspector { private output: any; private pageSession: PageSession; - constructor(pageSession: PageSession, output: any) { + constructor(pageSession: PageSession, output: CollectorOutput) { this.output = output; this.pageSession = pageSession; } -- GitLab From f28f635dcee39466e724c8e2c728b539f285a149 Mon Sep 17 00:00:00 2001 From: Felix Woestmann <felix.wostmann@edps.europa.eu> Date: Tue, 11 Mar 2025 17:15:18 +0100 Subject: [PATCH 5/5] refactor: Add more types to Collector --- src/server/runCollection.ts | 1 - 1 file changed, 1 deletion(-) diff --git a/src/server/runCollection.ts b/src/server/runCollection.ts index 83fdb9f..fda49bd 100644 --- a/src/server/runCollection.ts +++ b/src/server/runCollection.ts @@ -3,7 +3,6 @@ import { Collector } from "../collector/index.js"; import Inspector from "../inspector/inspector.js"; import { Cookie } from "./server.js"; import { Logger } from "winston"; -import { template } from "lodash"; export interface RunCollectionArguments { website_url: string; -- GitLab