From 12d7889ae49b1bf81eb7f20e482e9bb1b77ecef4 Mon Sep 17 00:00:00 2001 From: Felix Woestmann <felix.wostmann@edps.europa.eu> Date: Tue, 11 Mar 2025 16:33:37 +0100 Subject: [PATCH 01/11] Add code to generate Report with DNT and DNT disabled output --- src/server/runCollection.ts | 1 + 1 file changed, 1 insertion(+) diff --git a/src/server/runCollection.ts b/src/server/runCollection.ts index fda49bd..83fdb9f 100644 --- a/src/server/runCollection.ts +++ b/src/server/runCollection.ts @@ -3,6 +3,7 @@ import { Collector } from "../collector/index.js"; import Inspector from "../inspector/inspector.js"; import { Cookie } from "./server.js"; import { Logger } from "winston"; +import { template } from "lodash"; export interface RunCollectionArguments { website_url: string; -- GitLab From 9231e5b9d60b0b4c1347f6ce05051438c6549aec Mon Sep 17 00:00:00 2001 From: Felix Woestmann <felix.wostmann@edps.europa.eu> Date: Tue, 11 Mar 2025 17:15:18 +0100 Subject: [PATCH 02/11] refactor: Add more types to Collector --- src/server/runCollection.ts | 1 - 1 file changed, 1 deletion(-) diff --git a/src/server/runCollection.ts b/src/server/runCollection.ts index 83fdb9f..fda49bd 100644 --- a/src/server/runCollection.ts +++ b/src/server/runCollection.ts @@ -3,7 +3,6 @@ import { Collector } from "../collector/index.js"; import Inspector from "../inspector/inspector.js"; import { Cookie } from "./server.js"; import { Logger } from "winston"; -import { template } from "lodash"; export interface RunCollectionArguments { website_url: string; -- GitLab From ff01cd84a42d7e748e3ca288699be15e52106c6c Mon Sep 17 00:00:00 2001 From: Felix Woestmann <felix.wostmann@edps.europa.eu> Date: Tue, 11 Mar 2025 16:51:24 +0100 Subject: [PATCH 03/11] refactor: reporter.ts and reporterCommand.ts --- src/commands/collectorCommand.ts | 29 +-- src/commands/reporterCommand.ts | 204 +++++--------------- src/reporter/reporter.ts | 321 ++++++++++++++++--------------- src/server/runCollection.ts | 10 +- 4 files changed, 231 insertions(+), 333 deletions(-) diff --git a/src/commands/collectorCommand.ts b/src/commands/collectorCommand.ts index 9989154..1769bc5 100644 --- a/src/commands/collectorCommand.ts +++ b/src/commands/collectorCommand.ts @@ -9,7 +9,7 @@ import { create } from "../lib/logger.js"; import { CollectionResult, Collector } from "../collector/index.js"; import Inspector from "../inspector/inspector.js"; -import { Reporter, ReporterArguments } from "../reporter/reporter.js"; +import { Reporter, ReporterOptions } from "../reporter/reporter.js"; let collectorCommand = "collect"; @@ -186,7 +186,7 @@ async function runCollector(args: CollectorCommandArguments): Promise<any> { const inspectionResult: any = inspector.run(); - let reporterArgs: ReporterArguments = { + let reporterArgs: ReporterOptions = { outputPath: args.output, json: args.json, yaml: args.yaml, @@ -197,20 +197,25 @@ async function runCollector(args: CollectorCommandArguments): Promise<any> { const reporter = new Reporter(reporterArgs); - reporter.saveJson( + reporter.saveJsonToFile( inspectionResult.websocketLog, "websockets-log.json", false, ); - reporter.saveJson(inspectionResult, "inspection.json"); - reporter.saveYaml(inspectionResult.cookies, "cookies.yml", false); - reporter.saveYaml(inspectionResult.localStorage, "local-storage.yml", false); - reporter.saveYaml(inspectionResult.beacons, "beacons.yml", false); - reporter.saveYaml(inspectionResult, "inspection.yml"); - reporter.generateHtml(inspectionResult); - await reporter.generateOfficeDoc(inspectionResult); - await reporter.convertHtmlToPdf(); - reporter.saveSource(collectionResult.source); + reporter.saveJsonToFile(inspectionResult, "inspection.json"); + reporter.saveYamlToFile(inspectionResult.cookies, "cookies.yml", false); + reporter.saveYamlToFile( + inspectionResult.localStorage, + "local-storage.yml", + false, + ); + reporter.saveYamlToFile(inspectionResult.beacons, "beacons.yml", false); + reporter.saveYamlToFile(inspectionResult, "inspection.yml"); + const htmlReport = reporter.generateHtmlReport(inspectionResult); + await reporter.saveAsOfficeDoc(inspectionResult); + const pdfReport = await reporter.convertHtmlToPdfInMemory(htmlReport); + reporter.saveFile("inspection.pdf", pdfReport); + reporter.saveFile("source.html", collectionResult.source); return inspectionResult; } diff --git a/src/commands/reporterCommand.ts b/src/commands/reporterCommand.ts index 3bf46d6..74203fb 100644 --- a/src/commands/reporterCommand.ts +++ b/src/commands/reporterCommand.ts @@ -10,20 +10,8 @@ import yaml from "js-yaml"; import fs from "fs"; import path from "path"; -import { marked } from "marked"; -import { markedSmartypants } from "marked-smartypants"; -import pug from "pug"; -import groupBy from "lodash/groupBy.js"; -import { spawnSync } from "node:child_process"; -import puppeteer from "puppeteer"; -import { fileURLToPath } from "url"; -import { createRequire } from "module"; import { all as unsafe } from "js-yaml-js-types-esm"; -import HTMLtoDOCX from "html-to-docx"; - -const __filename = fileURLToPath(import.meta.url); -const __dirname = path.dirname(__filename); -const require = createRequire(import.meta.url); +import { Reporter } from "../reporter/reporter.js"; yaml.DEFAULT_SCHEMA = yaml.DEFAULT_SCHEMA.extend(unsafe); @@ -87,173 +75,79 @@ export default { }; async function runReporter(args: ParsedArgsReporter) { - let output = JSON.parse(fs.readFileSync(args.inspectionJsonPath, "utf8")); - - let html_template = - args.htmlTemplate || path.join(__dirname, "../assets/template.pug"); - let office_template = - args.officeTemplate || - path.join(__dirname, "../assets/template-office.pug"); + const collectionData = JSON.parse( + fs.readFileSync(args.inspectionJsonPath, "utf8"), + ); - // it is surprising that https://github.com/jstransformers/jstransformer-marked picks up this object (undocumented API) - // source of this call: https://github.com/markedjs/marked-custom-heading-id/blob/main/src/index.js (MIT License, Copyright (c) 2021 @markedjs) - marked.use({ - renderer: { - heading(text, level, _) { - // WEC patch: add \: - const headingIdRegex = /(?: +|^)\{#([a-z][\:\w-]*)\}(?: +|$)/i; - const hasId = text.match(headingIdRegex); - if (!hasId) { - // fallback to original heading renderer - return false; - } - return `<h${level} id="${hasId[1]}">${text.replace(headingIdRegex, "")}</h${level}>\n`; - }, - }, + const reporter = new Reporter({ + htmlTemplate: args.htmlTemplate, + officeTemplate: args.officeTemplate, + usePandoc: args.usePandoc || false, + extraFiles: args.extraFiles, }); - marked.use(markedSmartypants()); - - const make_office = - args.outputFile && - (args.outputFile.endsWith(".docx") || args.outputFile.endsWith(".odt")); - const make_pdf = args.outputFile && args.outputFile.endsWith(".pdf"); - let html_dump = pug.renderFile( - make_office ? office_template : html_template, - Object.assign({}, output, { - pretty: true, - basedir: path.resolve(path.join(__dirname, "../assets")), // determines root director for pug - // expose some libraries to pug templates - groupBy: groupBy, - marked: marked, // we need to pass the markdown engine to template for access at render-time (as opposed to comile time), see https://github.com/pugjs/pug/issues/1171 - fs: fs, - yaml: yaml, - path: path, - inlineCSS: fs.readFileSync( - require.resolve("github-markdown-css/github-markdown.css"), - "utf8", - ), - inspection: output, - extra: args.extraFiles, - filterOptions: { marked: {} }, - }), - ); + const htmlReport = reporter.generateHtmlReport(collectionData); + // Print to console when no output file is specified if (!args.outputFile) { - console.log(html_dump); - return; - } - - if (make_office) { - await generateOfficeFile( - args.usePandoc, - html_dump, - output, - args.outputFile, - ); - return; - } - - if (make_pdf) { - await generatePdf(args.outputFile, html_dump); + console.log(htmlReport); return; } - fs.writeFileSync(path.join(args.outputFile), html_dump); -} + const fileExtension = path.extname(args.outputFile).toLowerCase(); + const outputDir = path.dirname(args.outputFile); -async function generateOfficeFile( - usePandoc: boolean, - html_dump: string, - output: any, - outputFile?: string, -) { - if (usePandoc) { - // console.warn("Using pandoc to generate", argv.outputFile); - // pandoc infers the output format from the output file name - let ret = spawnSync( - "pandoc", - ["-f", "html", "--number-sections", "--toc", "--output", outputFile], - { - // cwd: '.', - input: html_dump, - encoding: "utf8", - }, - ); - if (ret[2]) { - console.log(ret[2]); - } - return; - } - if (outputFile.endsWith(".odt")) { - console.error( - "To generate .odt, you must have pandoc installed and specify --use-pandoc.", - ); - process.exit(1); + // Create the output directory if it doesn't exist + if (!fs.existsSync(outputDir)) { + fs.mkdirSync(outputDir, { recursive: true }); } - // console.warn("Using NPM html-to-docx to generate", argv.outputFile); - const documentOptions = { - // decodeUnicode: true, - orientation: "portrait", - pageSize: { width: "21.0cm", height: "29.7cm" }, - pageNumber: true, - // lineNumber: true, - // lineNumberOptions: {countBy: 5}, - title: output.title, - lang: "en-UK", - creator: `EDPS Website Evidence Collector v${output.script.version.npm} using NPM html-to-docx`, - }; - - try { - let docx = await HTMLtoDOCX(html_dump, null, documentOptions, null); - fs.writeFileSync(path.join(outputFile), docx); - } catch (e) { - console.error(e); + switch (fileExtension) { + case ".pdf": + const htmlContent = reporter.generateHtmlReport( + collectionData, + undefined, + false, + ); + const pdfBuffer = await reporter.convertHtmlToPdfInMemory(htmlContent); + reporter.saveFile(args.outputFile, pdfBuffer); + break; + case ".docx": + case ".odt": + await reporter.saveAsOfficeDoc(collectionData, args.outputFile); + break; + case ".html": + const html = reporter.generateHtmlReport( + collectionData, + undefined, + false, + ); + reporter.saveFile(args.outputFile, html); + break; + default: + console.log(`File extension ${fileExtension} is not supported.`); + break; } } -async function generatePdf(outputFile: string, html_dump: string) { - const browser = await puppeteer.launch({}); - const pages = await browser.pages(); - await pages[0].setContent(html_dump); - await pages[0].pdf({ - path: path.resolve(path.join(outputFile)), - format: "A4", - printBackground: true, - displayHeaderFooter: true, - headerTemplate: ` - <div class="page-footer" style="width: 100%; font-size: 11px; padding: 5px 5px 0; position: relative;"> - <div style="bottom: 5px; text-align: center;"><span class="title"></span></div> - </div> - `, - footerTemplate: ` - <div class="page-header" style="width: 100%; font-size: 11px; padding: 5px 5px 0; position: relative;"> - <div style="top: 5px; text-align: center;"><span class="pageNumber"></span>/<span class="totalPages"></span></div> - </div> - `, - // this is needed to prevent content from being placed over the footer - margin: { top: "1.5cm", bottom: "1cm" }, - }); - await browser.close(); -} function transformArgsToObject(parsingResult: any): ParsedArgsReporter { return { _: parsingResult._ as string[], inspectionJsonPath: parsingResult._[1] as string, - outputFile: parsingResult["outputFile"] as string, - htmlTemplate: parsingResult["htmlTemplate"] as string | undefined, - officeTemplate: parsingResult["officeTemplate"] as string | undefined, - extraFiles: parsingResult["extraFile"] as string[] | undefined, - usePandoc: parsingResult["usePandoc"] as boolean | undefined, + outputFile: parsingResult["output-file"] as string, + htmlTemplate: parsingResult["html-template"] as string | undefined, + officeTemplate: parsingResult["office-template"] as string | undefined, + extraFiles: parsingResult["extra-file"] as any[] | undefined, + usePandoc: parsingResult["use-pandoc"] as boolean | undefined, }; } + interface ParsedArgsReporter { _: (string | number)[]; inspectionJsonPath: string; outputFile?: string; htmlTemplate?: string; officeTemplate?: string; - extraFiles?: string[]; + extraFiles?: any[]; usePandoc?: boolean; } diff --git a/src/reporter/reporter.ts b/src/reporter/reporter.ts index e0eed4f..92ba978 100644 --- a/src/reporter/reporter.ts +++ b/src/reporter/reporter.ts @@ -1,4 +1,3 @@ -// jshint esversion: 8 import fs from "fs"; import path from "path"; import pug from "pug"; @@ -12,6 +11,7 @@ import { markedSmartypants } from "marked-smartypants"; import { fileURLToPath } from "url"; import { createRequire } from "module"; +import { Logger } from "winston"; const __filename = fileURLToPath(import.meta.url); const __dirname = path.dirname(__filename); @@ -35,202 +35,203 @@ marked.use({ }); marked.use(markedSmartypants()); -export interface ReporterArguments { +export interface ReporterOptions { outputPath?: string; - json: boolean; - yaml: boolean; - html: boolean; - pdf: boolean; - usePandoc: boolean; - "html-template"?: string; + json?: boolean; + yaml?: boolean; + html?: boolean; + pdf?: boolean; + usePandoc?: boolean; + htmlTemplate?: string; + officeTemplate?: string; + extraFiles?: any[]; } export class Reporter { - constructor(private args: ReporterArguments) {} + private options: ReporterOptions; + private logger: Logger; - saveJson(data, filename, log = true) { - const json_dump = JSON.stringify(data, null, 2); - - if (this.args.outputPath) { - fs.writeFileSync(path.join(this.args.outputPath, filename), json_dump); - } - - if (log && this.args.json) { - console.log(json_dump); - } - } - - saveYaml(data, filename, log = true) { - const yaml_dump = yaml.dump(data, { - noRefs: true, - replacer: function replacer(_, value) { - return value instanceof URL ? value.toString() : value; - }, - }); - - if (this.args.outputPath) { - fs.writeFileSync(path.join(this.args.outputPath, filename), yaml_dump); - } - - if (log && this.args.yaml) { - console.log(yaml_dump); - } + constructor(options: ReporterOptions = {}, logger: Logger) { + this.options = options; + this.logger = logger; } - readYaml(filename) { - return yaml.load( - fs.readFileSync(path.join(this.args.outputPath, filename), "utf8"), - ); - } - - generateHtml( - data, - filename = "inspection.html", + /* + * Takes the output of the inspector and generates a HTML report. + */ + generateHtmlReport( + inspectionData: object, + outputFilename = "inspection.html", log = true, - template = "../assets/template.pug", - extraData?, + customTemplate?: string, ) { - const html_template = - this.args["html-template"] || path.join(__dirname, template); - - const html_dump = pug.renderFile( - html_template, - Object.assign({}, data, { - pretty: true, - basedir: path.join(__dirname, "../assets"), - groupBy: groupBy, - marked: marked, // we need to pass the markdown engine to template for access at render-time (as opposed to comile time), see https://github.com/pugjs/pug/issues/1171 - fs: fs, - yaml: yaml, - path: path, - inlineCSS: fs.readFileSync( - require.resolve("github-markdown-css/github-markdown.css"), - ), - filterOptions: { marked: {} }, - extra: extraData, - }), - ); + const templatePath = + this.options.htmlTemplate || + customTemplate || + path.join(__dirname, "../assets/template.pug"); - if (this.args.outputPath) { - fs.writeFileSync(path.join(this.args.outputPath, filename), html_dump); - } + const templateData = fs.readFileSync(templatePath, "utf8"); + + const htmlReport = this.renderHtmlReport(inspectionData, templateData); - if (log && this.args.html) { - console.log(html_dump); + this.saveFile(outputFilename, htmlReport); + + if (log && this.options.html) { + this.logger.info(htmlReport); } - return html_dump; + return htmlReport; } - async convertHtmlToPdf( - htmlfilename = "inspection.html", - pdffilename = "inspection.pdf", - ) { - if (this.args.pdf && this.args.outputPath) { - let content = fs.readFileSync( - path.resolve(path.join(this.args.outputPath, htmlfilename)), + renderHtmlReport(inspectionData: object, template: string) { + const pugInputData = Object.assign({}, inspectionData, { + pretty: true, + basedir: path.join(__dirname, "../assets"), + groupBy: groupBy, + marked: marked, // we need to pass the markdown engine to template for access at render-time (as opposed to comile time), see https://github.com/pugjs/pug/issues/1171 + fs: fs, + yaml: yaml, + path: path, + inlineCSS: fs.readFileSync( + require.resolve("github-markdown-css/github-markdown.css"), "utf8", - ); - let pdfBuffer = await this.convertHtmlToPdfInMemory(content); - fs.writeFileSync( - path.resolve(path.join(this.args.outputPath, pdffilename)), - pdfBuffer, - ); - } + ), + inspection: inspectionData, + extra: this.options.extraFiles, + filterOptions: { marked: {} }, + }); + + return pug.render(template, pugInputData); } async convertHtmlToPdfInMemory(htmlContent: string): Promise<Uint8Array> { const browser = await puppeteer.launch({}); - const page = await browser.newPage(); - await page.setContent(htmlContent, { waitUntil: "networkidle0" }); - let pdfBuffer = await page.pdf({ + const pages = await browser.pages(); + await pages[0].setContent(htmlContent); + const pdfBuffer = await pages[0].pdf({ format: "A4", printBackground: true, displayHeaderFooter: true, headerTemplate: ` - <div style="width: 100%; font-size: 11px; padding: 5px 5px 0; position: relative;"> - <div style="bottom: 5px; text-align: center;"><span class="title"></span></div> - </div>`, + <div class="page-footer" style="width: 100%; font-size: 11px; padding: 5px 5px 0; position: relative;"> + <div style="bottom: 5px; text-align: center;"><span class="title"></span></div> + </div> + `, footerTemplate: ` - <div style="width: 100%; font-size: 11px; padding: 5px 5px 0; position: relative;"> - <div style="top: 5px; text-align: center;"><span class="pageNumber"></span>/<span class="totalPages"></span></div> - </div>`, + <div class="page-header" style="width: 100%; font-size: 11px; padding: 5px 5px 0; position: relative;"> + <div style="top: 5px; text-align: center;"><span class="pageNumber"></span>/<span class="totalPages"></span></div> + </div> + `, + // this is needed to prevent content from being placed over the footer margin: { top: "1.5cm", bottom: "1cm" }, }); await browser.close(); return pdfBuffer; } - async generateOfficeDoc( - data, - filename = "inspection.docx", - log = true, - template = "../assets/template-office.pug", + /* + * Generates either a .docx or .odt. + * For .docx a conversion with pandoc or with js is available + * For .odt only pandoc is available. + */ + async saveAsOfficeDoc( + inspectionData: object, + outputFilePath = "inspection.docx", + customTemplate?: string, ) { - if (this.args.outputPath) { - const office_template = - this.args["office-template"] || path.join(__dirname, template); - const html_dump = pug.renderFile( - office_template, - Object.assign({}, data, { - pretty: true, - basedir: path.join(__dirname, "../assets"), - jsondir: ".", // images in the folder of the inspection.json - groupBy: groupBy, - marked: marked, // we need to pass the markdown engine to template for access at render-time (as opposed to comile time), see https://github.com/pugjs/pug/issues/1171 - fs: fs, - yaml: yaml, - path: path, - inlineCSS: fs.readFileSync( - require.resolve("github-markdown-css/github-markdown.css"), - ), - filterOptions: { marked: {} }, - }), + const office_template = + this.options.officeTemplate || + customTemplate || + path.join(__dirname, "../assets/template-office.pug"); + + const templateData = fs.readFileSync(office_template, "utf-8"); + + const htmlReport = this.renderHtmlReport(inspectionData, templateData); + + if (this.options.usePandoc) { + await this.htmlToOfficeWithPandoc(htmlReport, outputFilePath); + return; + } + if (outputFilePath.endsWith(".odt")) { + this.logger.error( + "To generate .odt, you must have pandoc installed and specify --use-pandoc.", ); + process.exit(1); + } - if (this.args.usePandoc) { - const ret = spawnSync( - "pandoc", - ["-f", "html", "--number-sections", "--toc", "--output", filename], - { - cwd: this.args.outputPath, - input: html_dump, - encoding: "utf8", - }, - ); - if (ret[2]) { - console.log(ret[2]); - } - } else { - if (filename.endsWith(".odt")) { - console.error( - "To generate .odt, you must have pandoc installed and specify --use-pandoc.", - ); - process.exit(1); - } - - const documentOptions = { - orientation: "portrait", - pageSize: { width: "21.0cm", height: "29.7cm" }, - pageNumber: true, - title: data.title, - lang: "en-UK", - creator: `EDPS Website Evidence Collector v${data.script.version.npm} using NPM html-to-docx`, - }; - const fileBuffer = await HTMLtoDOCX( - html_dump, - null, - documentOptions, - null, - ); - fs.writeFileSync(path.join(this.args.outputPath, filename), fileBuffer); - } + await this.htmlToDocxJavascript(inspectionData, htmlReport, outputFilePath); + } + + private async htmlToOfficeWithPandoc(html_dump: string, outputFile: string) { + const ret = spawnSync( + "pandoc", + ["-f", "html", "--number-sections", "--toc", "--output", outputFile], + { + input: html_dump, + encoding: "utf8", + }, + ); + if (ret[2]) { + this.logger.info(ret[2]); } } - saveSource(source, filename = "source.html") { - if (this.args.outputPath) { - fs.writeFileSync(path.join(this.args.outputPath, filename), source); + private async htmlToDocxJavascript( + inspectionData: any, + htmlReport: string, + outputFileName: string, + ) { + const documentOptions = { + orientation: "portrait", + pageSize: { width: "21.0cm", height: "29.7cm" }, + pageNumber: true, + title: inspectionData.title, + lang: "en-UK", + creator: `EDPS Website Evidence Collector v${inspectionData.script.version.npm} using NPM html-to-docx`, + }; + + try { + let docx = await HTMLtoDOCX(htmlReport, null, documentOptions, null); + this.saveFile(outputFileName, docx); + } catch (e) { + this.logger.error(e); + } + } + + saveJsonToFile(data: any, filename: string, log = true) { + const json_dump = JSON.stringify(data, null, 2); + + this.saveFile(filename, json_dump); + + if (log && this.options.json) { + this.logger.info(json_dump); + } + } + + saveYamlToFile(data: any, filename: string, log = true) { + const yaml_dump = yaml.dump(data, { + noRefs: true, + replacer: function replacer(_, value) { + return value instanceof URL ? value.toString() : value; + }, + }); + + this.saveFile(filename, yaml_dump); + + if (log && this.options.yaml) { + this.logger.info(yaml_dump); + } + } + + loadYamlFile(filename: string) { + return yaml.load( + fs.readFileSync(path.join(this.options.outputPath, filename), "utf8"), + ); + } + + saveFile(filename: string, data: any) { + if (this.options.outputPath) { + fs.writeFileSync(path.join(this.options.outputPath, filename), data); } } } diff --git a/src/server/runCollection.ts b/src/server/runCollection.ts index fda49bd..2ee1297 100644 --- a/src/server/runCollection.ts +++ b/src/server/runCollection.ts @@ -1,4 +1,4 @@ -import { Reporter, ReporterArguments } from "../reporter/reporter.js"; +import { Reporter, ReporterOptions } from "../reporter/reporter.js"; import { Collector } from "../collector/index.js"; import Inspector from "../inspector/inspector.js"; import { Cookie } from "./server.js"; @@ -38,8 +38,8 @@ export async function runCollection( return inspector.run(); } -export async function generateHtmlAndPdf(inspectionOutput, extraOuptut?) { - let reporterArgs: ReporterArguments = { +export async function generateHtmlAndPdf(inspectionOutput: object) { + let reporterArgs: ReporterOptions = { html: true, pdf: true, json: false, @@ -49,12 +49,10 @@ export async function generateHtmlAndPdf(inspectionOutput, extraOuptut?) { }; const reporter = new Reporter(reporterArgs); - let html = reporter.generateHtml( + let html = reporter.generateHtmlReport( inspectionOutput, "inspection.html", false, - extraOuptut ? "path/to/alternative/template" : undefined, - extraOuptut, ); let pdfBuffer = await reporter.convertHtmlToPdfInMemory(html); return { -- GitLab From db34a82332bfc2ba824e9061c16f14bbfed90e02 Mon Sep 17 00:00:00 2001 From: Felix Woestmann <felix.wostmann@edps.europa.eu> Date: Fri, 21 Mar 2025 16:46:06 +0100 Subject: [PATCH 04/11] fix: properly call reporter with logger --- src/commands/collectorCommand.ts | 2 +- src/commands/reporterCommand.ts | 22 ++++++++++++++-------- src/server/runCollection.ts | 7 +++++-- src/server/server.ts | 5 ++++- 4 files changed, 24 insertions(+), 12 deletions(-) diff --git a/src/commands/collectorCommand.ts b/src/commands/collectorCommand.ts index 1769bc5..cfc98e3 100644 --- a/src/commands/collectorCommand.ts +++ b/src/commands/collectorCommand.ts @@ -195,7 +195,7 @@ async function runCollector(args: CollectorCommandArguments): Promise<any> { usePandoc: args.usePandoc, }; - const reporter = new Reporter(reporterArgs); + const reporter = new Reporter(reporterArgs, logger); reporter.saveJsonToFile( inspectionResult.websocketLog, diff --git a/src/commands/reporterCommand.ts b/src/commands/reporterCommand.ts index 74203fb..dedde67 100644 --- a/src/commands/reporterCommand.ts +++ b/src/commands/reporterCommand.ts @@ -12,6 +12,7 @@ import fs from "fs"; import path from "path"; import { all as unsafe } from "js-yaml-js-types-esm"; import { Reporter } from "../reporter/reporter.js"; +import { create } from "../lib/logger.js"; yaml.DEFAULT_SCHEMA = yaml.DEFAULT_SCHEMA.extend(unsafe); @@ -75,22 +76,27 @@ export default { }; async function runReporter(args: ParsedArgsReporter) { + const logger = create({}); + const collectionData = JSON.parse( fs.readFileSync(args.inspectionJsonPath, "utf8"), ); - const reporter = new Reporter({ - htmlTemplate: args.htmlTemplate, - officeTemplate: args.officeTemplate, - usePandoc: args.usePandoc || false, - extraFiles: args.extraFiles, - }); + const reporter = new Reporter( + { + htmlTemplate: args.htmlTemplate, + officeTemplate: args.officeTemplate, + usePandoc: args.usePandoc || false, + extraFiles: args.extraFiles, + }, + logger, + ); const htmlReport = reporter.generateHtmlReport(collectionData); // Print to console when no output file is specified if (!args.outputFile) { - console.log(htmlReport); + logger.info(htmlReport); return; } @@ -125,7 +131,7 @@ async function runReporter(args: ParsedArgsReporter) { reporter.saveFile(args.outputFile, html); break; default: - console.log(`File extension ${fileExtension} is not supported.`); + logger.warn(`File extension ${fileExtension} is not supported.`); break; } } diff --git a/src/server/runCollection.ts b/src/server/runCollection.ts index 2ee1297..7e5a02b 100644 --- a/src/server/runCollection.ts +++ b/src/server/runCollection.ts @@ -38,7 +38,10 @@ export async function runCollection( return inspector.run(); } -export async function generateHtmlAndPdf(inspectionOutput: object) { +export async function generateHtmlAndPdf( + inspectionOutput: object, + logger: Logger, +) { let reporterArgs: ReporterOptions = { html: true, pdf: true, @@ -48,7 +51,7 @@ export async function generateHtmlAndPdf(inspectionOutput: object) { yaml: false, }; - const reporter = new Reporter(reporterArgs); + const reporter = new Reporter(reporterArgs, logger); let html = reporter.generateHtmlReport( inspectionOutput, "inspection.html", diff --git a/src/server/server.ts b/src/server/server.ts index 5dc11a2..6464404 100644 --- a/src/server/server.ts +++ b/src/server/server.ts @@ -121,7 +121,10 @@ function configureRoutes(browser_options: any[]): Router { requestLogger, ); - let htmlAndPdf = await generateHtmlAndPdf(collectionOutput); + let htmlAndPdf = await generateHtmlAndPdf( + collectionOutput, + requestLogger, + ); res.send(htmlAndPdf); requestLogger.info("Finished serving request"); } catch (e: any) { -- GitLab From 8c0ebda3d423dedafebcd1ad00189b73d2af574b Mon Sep 17 00:00:00 2001 From: Felix Woestmann <felix.wostmann@edps.europa.eu> Date: Fri, 21 Mar 2025 17:01:02 +0100 Subject: [PATCH 05/11] refactor: Use paths again for rendering pug files, as using the template as a loaded string requires specifying the filename anyway --- src/reporter/reporter.ts | 19 +++++++++---------- 1 file changed, 9 insertions(+), 10 deletions(-) diff --git a/src/reporter/reporter.ts b/src/reporter/reporter.ts index 92ba978..85c30d7 100644 --- a/src/reporter/reporter.ts +++ b/src/reporter/reporter.ts @@ -70,9 +70,7 @@ export class Reporter { customTemplate || path.join(__dirname, "../assets/template.pug"); - const templateData = fs.readFileSync(templatePath, "utf8"); - - const htmlReport = this.renderHtmlReport(inspectionData, templateData); + const htmlReport = this.renderHtmlReport(inspectionData, templatePath); this.saveFile(outputFilename, htmlReport); @@ -83,10 +81,10 @@ export class Reporter { return htmlReport; } - renderHtmlReport(inspectionData: object, template: string) { + renderHtmlReport(inspectionData: object, templatePath: string) { const pugInputData = Object.assign({}, inspectionData, { pretty: true, - basedir: path.join(__dirname, "../assets"), + basedir: path.resolve(path.join(__dirname, "../assets")), groupBy: groupBy, marked: marked, // we need to pass the markdown engine to template for access at render-time (as opposed to comile time), see https://github.com/pugjs/pug/issues/1171 fs: fs, @@ -101,7 +99,7 @@ export class Reporter { filterOptions: { marked: {} }, }); - return pug.render(template, pugInputData); + return pug.renderFile(templatePath, pugInputData); } async convertHtmlToPdfInMemory(htmlContent: string): Promise<Uint8Array> { @@ -139,14 +137,15 @@ export class Reporter { outputFilePath = "inspection.docx", customTemplate?: string, ) { - const office_template = + const officeTemplatePath = this.options.officeTemplate || customTemplate || path.join(__dirname, "../assets/template-office.pug"); - const templateData = fs.readFileSync(office_template, "utf-8"); - - const htmlReport = this.renderHtmlReport(inspectionData, templateData); + const htmlReport = this.renderHtmlReport( + inspectionData, + officeTemplatePath, + ); if (this.options.usePandoc) { await this.htmlToOfficeWithPandoc(htmlReport, outputFilePath); -- GitLab From 46165cd0fa79c5d42ee68b3fc1a219be08868a9c Mon Sep 17 00:00:00 2001 From: Felix Woestmann <felix.wostmann@edps.europa.eu> Date: Fri, 21 Mar 2025 17:45:28 +0100 Subject: [PATCH 06/11] refactor: Have only ReporterOptions which decide what is printed to console during generation. --- src/commands/collectorCommand.ts | 23 ++++++++--------------- src/commands/reporterCommand.ts | 18 +++++++----------- src/reporter/reporter.ts | 25 ++++++++++++------------- src/server/runCollection.ts | 13 +++++-------- 4 files changed, 32 insertions(+), 47 deletions(-) diff --git a/src/commands/collectorCommand.ts b/src/commands/collectorCommand.ts index cfc98e3..5fe194c 100644 --- a/src/commands/collectorCommand.ts +++ b/src/commands/collectorCommand.ts @@ -188,28 +188,21 @@ async function runCollector(args: CollectorCommandArguments): Promise<any> { let reporterArgs: ReporterOptions = { outputPath: args.output, - json: args.json, - yaml: args.yaml, - html: args.html, + printJsonToConsole: args.json, + printYamlToConsole: args.yaml, + printHtmlToConsole: args.html, pdf: args.pdf, usePandoc: args.usePandoc, + extraFiles: [], }; const reporter = new Reporter(reporterArgs, logger); - reporter.saveJsonToFile( - inspectionResult.websocketLog, - "websockets-log.json", - false, - ); + reporter.saveJsonToFile(inspectionResult.websocketLog, "websockets-log.json"); reporter.saveJsonToFile(inspectionResult, "inspection.json"); - reporter.saveYamlToFile(inspectionResult.cookies, "cookies.yml", false); - reporter.saveYamlToFile( - inspectionResult.localStorage, - "local-storage.yml", - false, - ); - reporter.saveYamlToFile(inspectionResult.beacons, "beacons.yml", false); + reporter.saveYamlToFile(inspectionResult.cookies, "cookies.yml"); + reporter.saveYamlToFile(inspectionResult.localStorage, "local-storage.yml"); + reporter.saveYamlToFile(inspectionResult.beacons, "beacons.yml"); reporter.saveYamlToFile(inspectionResult, "inspection.yml"); const htmlReport = reporter.generateHtmlReport(inspectionResult); await reporter.saveAsOfficeDoc(inspectionResult); diff --git a/src/commands/reporterCommand.ts b/src/commands/reporterCommand.ts index dedde67..4a81558 100644 --- a/src/commands/reporterCommand.ts +++ b/src/commands/reporterCommand.ts @@ -88,6 +88,11 @@ async function runReporter(args: ParsedArgsReporter) { officeTemplate: args.officeTemplate, usePandoc: args.usePandoc || false, extraFiles: args.extraFiles, + printHtmlToConsole: !args.outputFile, + printJsonToConsole: false, + pdf: false, + printYamlToConsole: false, + outputPath: ".", }, logger, ); @@ -110,11 +115,7 @@ async function runReporter(args: ParsedArgsReporter) { switch (fileExtension) { case ".pdf": - const htmlContent = reporter.generateHtmlReport( - collectionData, - undefined, - false, - ); + const htmlContent = reporter.generateHtmlReport(collectionData); const pdfBuffer = await reporter.convertHtmlToPdfInMemory(htmlContent); reporter.saveFile(args.outputFile, pdfBuffer); break; @@ -123,12 +124,7 @@ async function runReporter(args: ParsedArgsReporter) { await reporter.saveAsOfficeDoc(collectionData, args.outputFile); break; case ".html": - const html = reporter.generateHtmlReport( - collectionData, - undefined, - false, - ); - reporter.saveFile(args.outputFile, html); + reporter.generateHtmlReport(collectionData, args.outputFile); break; default: logger.warn(`File extension ${fileExtension} is not supported.`); diff --git a/src/reporter/reporter.ts b/src/reporter/reporter.ts index 85c30d7..fcb452d 100644 --- a/src/reporter/reporter.ts +++ b/src/reporter/reporter.ts @@ -37,21 +37,21 @@ marked.use(markedSmartypants()); export interface ReporterOptions { outputPath?: string; - json?: boolean; - yaml?: boolean; - html?: boolean; - pdf?: boolean; - usePandoc?: boolean; + printJsonToConsole: boolean; + printYamlToConsole: boolean; + printHtmlToConsole: boolean; + pdf: boolean; + usePandoc: boolean; htmlTemplate?: string; officeTemplate?: string; - extraFiles?: any[]; + extraFiles: string[]; } export class Reporter { private options: ReporterOptions; private logger: Logger; - constructor(options: ReporterOptions = {}, logger: Logger) { + constructor(options: ReporterOptions, logger: Logger) { this.options = options; this.logger = logger; } @@ -62,7 +62,6 @@ export class Reporter { generateHtmlReport( inspectionData: object, outputFilename = "inspection.html", - log = true, customTemplate?: string, ) { const templatePath = @@ -74,7 +73,7 @@ export class Reporter { this.saveFile(outputFilename, htmlReport); - if (log && this.options.html) { + if (this.options.printHtmlToConsole) { this.logger.info(htmlReport); } @@ -197,17 +196,17 @@ export class Reporter { } } - saveJsonToFile(data: any, filename: string, log = true) { + saveJsonToFile(data: any, filename: string) { const json_dump = JSON.stringify(data, null, 2); this.saveFile(filename, json_dump); - if (log && this.options.json) { + if (this.options.printJsonToConsole) { this.logger.info(json_dump); } } - saveYamlToFile(data: any, filename: string, log = true) { + saveYamlToFile(data: any, filename: string) { const yaml_dump = yaml.dump(data, { noRefs: true, replacer: function replacer(_, value) { @@ -217,7 +216,7 @@ export class Reporter { this.saveFile(filename, yaml_dump); - if (log && this.options.yaml) { + if (this.options.printYamlToConsole) { this.logger.info(yaml_dump); } } diff --git a/src/server/runCollection.ts b/src/server/runCollection.ts index 7e5a02b..406b359 100644 --- a/src/server/runCollection.ts +++ b/src/server/runCollection.ts @@ -43,20 +43,17 @@ export async function generateHtmlAndPdf( logger: Logger, ) { let reporterArgs: ReporterOptions = { - html: true, + printHtmlToConsole: false, pdf: true, - json: false, + printJsonToConsole: false, outputPath: undefined, usePandoc: false, - yaml: false, + printYamlToConsole: false, + extraFiles: [], }; const reporter = new Reporter(reporterArgs, logger); - let html = reporter.generateHtmlReport( - inspectionOutput, - "inspection.html", - false, - ); + let html = reporter.generateHtmlReport(inspectionOutput, "inspection.html"); let pdfBuffer = await reporter.convertHtmlToPdfInMemory(html); return { html: html, -- GitLab From 9284452dc8021f1035b1edaf2150a6280231e29f Mon Sep 17 00:00:00 2001 From: Felix Woestmann <felix.wostmann@edps.europa.eu> Date: Fri, 21 Mar 2025 17:45:55 +0100 Subject: [PATCH 07/11] refactor: Remove superfluous call of generateHTML report --- src/commands/reporterCommand.ts | 8 -------- 1 file changed, 8 deletions(-) diff --git a/src/commands/reporterCommand.ts b/src/commands/reporterCommand.ts index 4a81558..a98dae1 100644 --- a/src/commands/reporterCommand.ts +++ b/src/commands/reporterCommand.ts @@ -97,14 +97,6 @@ async function runReporter(args: ParsedArgsReporter) { logger, ); - const htmlReport = reporter.generateHtmlReport(collectionData); - - // Print to console when no output file is specified - if (!args.outputFile) { - logger.info(htmlReport); - return; - } - const fileExtension = path.extname(args.outputFile).toLowerCase(); const outputDir = path.dirname(args.outputFile); -- GitLab From 3436325db86bccfefaf7bcad4de5ae4776701302 Mon Sep 17 00:00:00 2001 From: Felix Woestmann <felix.wostmann@edps.europa.eu> Date: Fri, 21 Mar 2025 17:46:29 +0100 Subject: [PATCH 08/11] refactor: Improve typing of arguments for RunReporter function --- src/commands/reporterCommand.ts | 48 ++++++++++++++++----------------- 1 file changed, 23 insertions(+), 25 deletions(-) diff --git a/src/commands/reporterCommand.ts b/src/commands/reporterCommand.ts index a98dae1..dbf11cc 100644 --- a/src/commands/reporterCommand.ts +++ b/src/commands/reporterCommand.ts @@ -65,17 +65,37 @@ export default { .nargs("output-file", 1) .alias("output-file", "o") .string("output-file") - .check((argv: ParsedArgsReporter) => { + .check((argv: any) => { if (!argv._[1]) { return "Error: You must provide a file name or path"; } return true; }); }, - handler: async (argv: any) => await runReporter(transformArgsToObject(argv)), + handler: async (argv: any) => { + const runReporterArgs: RunReporterArgs = { + inspectionJsonPath: argv._[1], + htmlTemplate: argv.htmlTemplate, + officeTemplate: argv.officeTemplate, + usePandoc: argv.usePandoc, + extraFiles: argv.extraFile || [], + outputFile: argv.outputFile, + }; + + await runReporter(runReporterArgs); + }, +}; + +type RunReporterArgs = { + htmlTemplate?: string; + officeTemplate?: string; + usePandoc: boolean; + extraFiles: string[]; + outputFile?: string; + inspectionJsonPath: string; }; -async function runReporter(args: ParsedArgsReporter) { +async function runReporter(args: RunReporterArgs) { const logger = create({}); const collectionData = JSON.parse( @@ -123,25 +143,3 @@ async function runReporter(args: ParsedArgsReporter) { break; } } - -function transformArgsToObject(parsingResult: any): ParsedArgsReporter { - return { - _: parsingResult._ as string[], - inspectionJsonPath: parsingResult._[1] as string, - outputFile: parsingResult["output-file"] as string, - htmlTemplate: parsingResult["html-template"] as string | undefined, - officeTemplate: parsingResult["office-template"] as string | undefined, - extraFiles: parsingResult["extra-file"] as any[] | undefined, - usePandoc: parsingResult["use-pandoc"] as boolean | undefined, - }; -} - -interface ParsedArgsReporter { - _: (string | number)[]; - inspectionJsonPath: string; - outputFile?: string; - htmlTemplate?: string; - officeTemplate?: string; - extraFiles?: any[]; - usePandoc?: boolean; -} -- GitLab From 7a7433c792feb7a9ea8f24dc477c19c62f5611e5 Mon Sep 17 00:00:00 2001 From: Felix Woestmann <felix.wostmann@edps.europa.eu> Date: Fri, 21 Mar 2025 17:46:46 +0100 Subject: [PATCH 09/11] refactor: add logging of filenames and change default log level --- src/lib/logger.ts | 5 +++-- src/reporter/reporter.ts | 4 +++- 2 files changed, 6 insertions(+), 3 deletions(-) diff --git a/src/lib/logger.ts b/src/lib/logger.ts index 4f8096b..d120b19 100644 --- a/src/lib/logger.ts +++ b/src/lib/logger.ts @@ -39,11 +39,12 @@ const create = ( options: CreateLoggerOptions, outputFilePath?: string, defaultMeta?: {}, + defaultLogLevel = "info", ): Logger => { const defaults: CreateLoggerOptions = { console: { silent: false, - level: "debug", + level: defaultLogLevel, stderrLevels: ["error", "debug", "info", "warn"], format: process.stdout.isTTY ? format.combine(format.colorize(), format.simple(), format.metadata()) @@ -51,7 +52,7 @@ const create = ( }, file: { enabled: true, - level: "silly", + level: defaultLogLevel, format: format.combine(format.json(), format.metadata()), }, }; diff --git a/src/reporter/reporter.ts b/src/reporter/reporter.ts index fcb452d..c0e081c 100644 --- a/src/reporter/reporter.ts +++ b/src/reporter/reporter.ts @@ -229,7 +229,9 @@ export class Reporter { saveFile(filename: string, data: any) { if (this.options.outputPath) { - fs.writeFileSync(path.join(this.options.outputPath, filename), data); + let effectivePath = path.join(this.options.outputPath, filename); + this.logger.debug(`Saving file to ${effectivePath}`); + fs.writeFileSync(effectivePath, data); } } } -- GitLab From b025f278f9a2c1a9dacd829544003bd53fea81c6 Mon Sep 17 00:00:00 2001 From: Felix Woestmann <felix.wostmann@edps.europa.eu> Date: Mon, 31 Mar 2025 16:45:50 +0200 Subject: [PATCH 10/11] Prepare DNT --- package-lock.json | 34 +- src/assets/template-dnt-comparison.pug | 946 +++++++++++++++++++++++++ src/reporter/reporter.ts | 2 +- src/server/runCollection.ts | 38 + 4 files changed, 1002 insertions(+), 18 deletions(-) create mode 100644 src/assets/template-dnt-comparison.pug diff --git a/package-lock.json b/package-lock.json index 5be09fc..5a6bd71 100644 --- a/package-lock.json +++ b/package-lock.json @@ -283,26 +283,26 @@ } }, "node_modules/@babel/helpers": { - "version": "7.26.9", - "resolved": "https://registry.npmjs.org/@babel/helpers/-/helpers-7.26.9.tgz", - "integrity": "sha512-Mz/4+y8udxBKdmzt/UjPACs4G3j5SshJJEFFKxlCGPydG4JAHXxjWjAwjd09tf6oINvl1VfMJo+nB7H2YKQ0dA==", + "version": "7.27.0", + "resolved": "https://registry.npmjs.org/@babel/helpers/-/helpers-7.27.0.tgz", + "integrity": "sha512-U5eyP/CTFPuNE3qk+WZMxFkp/4zUzdceQlfzf7DdGdhp+Fezd7HD+i8Y24ZuTMKX3wQBld449jijbGq6OdGNQg==", "dev": true, "license": "MIT", "dependencies": { - "@babel/template": "^7.26.9", - "@babel/types": "^7.26.9" + "@babel/template": "^7.27.0", + "@babel/types": "^7.27.0" }, "engines": { "node": ">=6.9.0" } }, "node_modules/@babel/parser": { - "version": "7.26.9", - "resolved": "https://registry.npmjs.org/@babel/parser/-/parser-7.26.9.tgz", - "integrity": "sha512-81NWa1njQblgZbQHxWHpxxCzNsa3ZwvFqpUg7P+NNUU6f3UU2jBEg4OlF/J6rl8+PQGh1q6/zWScd001YwcA5A==", + "version": "7.27.0", + "resolved": "https://registry.npmjs.org/@babel/parser/-/parser-7.27.0.tgz", + "integrity": "sha512-iaepho73/2Pz7w2eMS0Q5f83+0RKI7i4xmiYeBmDzfRVbQtTOG7Ts0S4HzJVsTMGI9keU8rNfuZr8DKfSt7Yyg==", "license": "MIT", "dependencies": { - "@babel/types": "^7.26.9" + "@babel/types": "^7.27.0" }, "bin": { "parser": "bin/babel-parser.js" @@ -551,15 +551,15 @@ } }, "node_modules/@babel/template": { - "version": "7.26.9", - "resolved": "https://registry.npmjs.org/@babel/template/-/template-7.26.9.tgz", - "integrity": "sha512-qyRplbeIpNZhmzOysF/wFMuP9sctmh2cFzRAZOn1YapxBsE1i9bJIY586R/WBLfLcmcBlM8ROBiQURnnNy+zfA==", + "version": "7.27.0", + "resolved": "https://registry.npmjs.org/@babel/template/-/template-7.27.0.tgz", + "integrity": "sha512-2ncevenBqXI6qRMukPlXwHKHchC7RyMuu4xv5JBXRfOGVcTy1mXCD12qrp7Jsoxll1EV3+9sE4GugBVRjT2jFA==", "dev": true, "license": "MIT", "dependencies": { "@babel/code-frame": "^7.26.2", - "@babel/parser": "^7.26.9", - "@babel/types": "^7.26.9" + "@babel/parser": "^7.27.0", + "@babel/types": "^7.27.0" }, "engines": { "node": ">=6.9.0" @@ -610,9 +610,9 @@ "license": "MIT" }, "node_modules/@babel/types": { - "version": "7.26.9", - "resolved": "https://registry.npmjs.org/@babel/types/-/types-7.26.9.tgz", - "integrity": "sha512-Y3IR1cRnOxOCDvMmNiym7XpXQ93iGDDPHx+Zj+NM+rg0fBaShfQLkg+hKPaZCEvg5N/LeCo4+Rj/i3FuJsIQaw==", + "version": "7.27.0", + "resolved": "https://registry.npmjs.org/@babel/types/-/types-7.27.0.tgz", + "integrity": "sha512-H45s8fVLYjbhFH62dIJ3WtmJ6RSPt/3DRO0ZcT2SUiYiQyz3BLVb9ADEnLl91m74aQPS3AzzeajZHYOalWe3bg==", "license": "MIT", "dependencies": { "@babel/helper-string-parser": "^7.25.9", diff --git a/src/assets/template-dnt-comparison.pug b/src/assets/template-dnt-comparison.pug new file mode 100644 index 0000000..d2d8714 --- /dev/null +++ b/src/assets/template-dnt-comparison.pug @@ -0,0 +1,946 @@ +doctype html +html(xmlns="http://www.w3.org/1999/xhtml", xml:lang="en", lang="en") + head + meta(charset="utf-8") + meta(name="generator", content="website-evidence-collector") + meta( + name="viewport", + content="width=device-width, initial-scale=1.0, user-scalable=yes" + ) + title #{ title } (#{ uri_ins }) + base(target="_blank")/ + style(type="text/css") !{ inlineCSS } + style(type="text/css"). + /* pandoc */ + code { + white-space: pre-wrap; + } + span.smallcaps { + font-variant: small-caps; + } + span.underline { + text-decoration: underline; + } + div.column { + display: inline-block; + vertical-align: top; + width: 50%; + } + + /* github */ + .markdown-body > article { + box-sizing: border-box; + min-width: 200px; + max-width: 980px; + margin: 0 auto; + padding: 45px; + } + + .markdown-body table { + width: inherit !important; + } + + @media (max-width: 767px) { + .markdown-body > article { + padding: 15px; + } + } + + /* custom */ + .markdown-body { + counter-reset: h1counter; + } + + h1:before { + content: counter(h1counter) "\0000a0\0000a0"; + counter-increment: h1counter; + } + h1 { + counter-reset: h2counter; + } + + h2:before { + content: counter(h1counter) "." counter(h2counter) "\0000a0\0000a0"; + counter-increment: h2counter; + } + h2 { + counter-reset: h3counter; + } + + h3:before { + content: counter(h1counter) "." counter(h2counter) "." counter(h3counter) "\0000a0\0000a0"; + counter-increment: h3counter; + } + + h1.nocount:before, + h2.nocount:before, + h3.nocount:before { + content: none; + counter-increment: none; + } + + /* annex */ + h1[id^="app"]:before { + content: none; + } + h1[id^="app"] { + counter-reset: h2counter; + } + + h2[id^="app"]:before { + content: counter(h2counter, upper-alpha) "\0000a0\0000a0"; + } + + #logo { + width: 40%; + float: right; + background-color: var(--color-canvas-default); + } + + .notrunc { + white-space: nowrap; + } + + .trunc { + white-space: nowrap; + text-overflow: ellipsis; + overflow: hidden; + max-width: 1px; + } + + .markdown-body table td.code { + padding: 0px; + } + + .markdown-body table td.code pre { + margin: 0px; + } + + td.highlighted, + td.highlighted pre, + li.highlighted a { + background-color: red; + color: white; + } + + @media print { + .markdown-body h1, + h2, + h3, + h4, + h5, + h6 { + break-after: avoid-page; + } + + .screen-only { + display: none; + } + } + + .unnumbered::before { + content: none !important; + counter-increment: none; + } + + body.markdown-body: article + header#title-block-header + #logo + include /wec_logo.svg + h1.title.unnumbered= title + h2.subtitle.unnumbered: a(href=uri_ins)= uri_ins + + h1(id="sec:evidence-collection-organisation") Evidence collection organisation + + table + colgroup + col(style="width: 50%") + col(style="width: 50%") + tbody + tr + td Target web service + td: code= uri_ins + tr + td Automated evidence collection start time + td= new Date(start_time).toLocaleString("en-GB") + tr + td Automated evidence collection end time + td= new Date(end_time).toLocaleString("en-GB") + tr + td Software version + td= script.version.commit || script.version.npm + + h1(id="sec:automated-evidence-collection") Automated evidence collection + + p The automated evidence collection is carried out using the tool #[a(href="https://edps.europa.eu/press-publications/edps-inspection-software_en") website evidence collector], WEC (also #[a(href="https://code.europa.eu/EDPS/website-evidence-collector") on Code Europa EU]) in version #{ script.version.commit || script.version.npm } on the platform #{ browser.platform.name } in version #{ browser.platform.version }. The tool employs the browser #{ browser.name } in version #{ browser.version } for browsing the website. + + p The evidence collection tool simulates a browsing session of the web service, capturing traffic between the browser and the Internet, along with any persistent data stored in the browser. While browsing, the tool gathers evidence and performs a number of checks. + + p It captures screenshots from the browser to identify potential cookie banners. It also tests HTTPS/SSL usage to determine whether the website enforces a secure connection. Then, the evidence collection tool scans the first web page for links to common social media and collaboration platforms, gathering data on the overall use of potentially privacy-intrusive third-party web services. + + p The recorded traffic between the browser, the target web service, and involved third-party web services, as well as the browser’s persistent storage, will be analysed in a #[span.citation(data-cites="sec:traffic-and-persistent-data-analysis"): a(href="#traffic-and-persistent-data-analysis") subsequent section]. + + p Generally, the tool browses a random subset of the target web service pages starting from the initial web page. However, the browsing can also include a set of predefined web pages. The exhaustive list of browsed web pages for this specific evidence collection is given in #[span.citation(data-cites="app:history"): a(href="#app:history") the Annex: Browsing history]. + + h2(id="sec:webpage-visit") Web page visit + + p + | On #{ new Date(start_time).toLocaleString("en-GB") }, the evidence collection tool navigated the browser to #[a(href=uri_ins)= uri_ins]. The final location after potential redirects was #[a(href=uri_dest)= uri_dest]. + if script.config.screenshots + | + | The evidence collection tool took two screenshots #[span.citation(data-cites="fig:screenshot-top") to cover the top of the web page] and #[span.citation(data-cites="fig:screenshot-bottom") the bottom]. + + if script.config.screenshots + figure + img(id="fig:screenshot-top", + src=`data:image/png;base64,${screenshots.screenshot_top}`, + alt="Web page top screenshot", + style="width: 100%" + ) + figcaption Web page top screenshot + + figure + img(id="fig:screenshot-bottom", + src=`data:image/png;base64,${screenshots.screenshot_bottom}`, + alt="Web page bottom screenshot", + style="width: 100%" + ) + figcaption Web page bottom screenshot + + h2(id="sec:use-of-httpsssl") Use of HTTPS/SSL + + p HTTP (Hypertext Transfer Protocol) is a communication standard that transmits data between a website and a user’s browser in an unencrypted format, making it vulnerable to interception and eavesdropping. In contrast, HTTPS (Hypertext Transfer Protocol Secure) extends HTTP by adding an extra layer of security through encryption, which protects the confidentiality and integrity of the data exchanged between a website and a user’s browser. + + p The evidence collection tool assessed the behaviour of #{ host } with respect to the use of HTTPS. + + table.use-of-httpsssl + colgroup + col(style="width: 50%") + col(style="width: 50%") + tbody + tr + td Allows connection with HTTPS + td= secure_connection.https_support + tr + td HTTP redirect to HTTPS + td= secure_connection.https_redirect + if secure_connection.redirects + tr + td HTTP redirect location + td: ul + each redirect in secure_connection.redirects + li + a(href=redirect)= redirect + if secure_connection.http_error + tr + td Error when connecting with HTTP + td= secure_connection.http_error + if secure_connection.https_error + tr + td Error when connecting with HTTPS + td= secure_connection.https_error + + if testSSL && testSSL.scanResult[0] + - var results = testSSL.scanResult[0]; + + - + sortSeverity = function(a,b) { + var severityToNumber = { + CRITCAL: 0, + HIGH: 1, + MEDIUM: 2, + LOW: 3, + OK: 4, + INFO: 5, + }; + + return severityToNumber[a.severity]-severityToNumber[b.severity]; + } + + p The software TestSSL from #[a(href="https://testssl.sh") https://testssl.sh] inspected the HTTPS configuration of the web service host #{ results.targetHost }. It classifies detected vulnerabilities by their level of severity #[em low], #[em medium], #[em high], or #[em critical]. The severity ratings are automatically computed by the TestSSL software without considering the security requirements of the individual website. They do not reflect the opinions or views of the website evidence collector's authors. Details of the findings are listed in #[span.citation(data-cites="app:testssl"): a(target="_parent", href="#app:testssl") the Annex: TestSSL scan]. + + table.testssl-summary + thead + tr + th HTTPS/SSL vulnerabilities per severity + th.notrunc Freq. + tbody + - var vulnerabilitiesBySeverity = groupBy(results.vulnerabilities, "severity"); + tr + td Critical + td.notrunc= vulnerabilitiesBySeverity["CRITCAL"] ? vulnerabilitiesBySeverity["CRITCAL"].length : 0 + tr + td High + td.notrunc= vulnerabilitiesBySeverity["HIGH"] ? vulnerabilitiesBySeverity["HIGH"].length : 0 + tr + td Medium + td.notrunc= vulnerabilitiesBySeverity["MEDIUM"] ? vulnerabilitiesBySeverity["MEDIUM"].length : 0 + tr + td Low + td.notrunc= vulnerabilitiesBySeverity["LOW"] ? vulnerabilitiesBySeverity["LOW"].length : 0 + + h2(id="sec:use-of-csp") Use of content security policies (CSPs) + + p Upon a browser's request for a web page, websites can specify a whitelist of mechanisms, domains, and subdomains in the #[a(href="https://developer.mozilla.org/en-US/docs/Web/HTTP/CSP") Content Security Policy] (CSP) metadata sent along with the requested page. Browsers must respect this whitelist when embedding components such as styles, fonts, beacons, videos, and maps. + + if hosts.contentSecurityPolicy.firstParty.length === 0 && hosts.contentSecurityPolicy.thirdParty.length === 0 + p No CSP metadata was found. Consequently, no restrictions apply. + + else + if hosts.contentSecurityPolicy.firstParty.length > 0 + ol + // check if host looks like a host (instead of e.g blob: data: etc.) + each host in hosts.contentSecurityPolicy.firstParty + if host.match(/[^\.]+\.[^\.]+/) + li: a(href=`http://${host}`)= host + else + li= host + + p The website has whitelisted #{ hosts.contentSecurityPolicy.firstParty.length } first-party domains and mechanisms. + else + p No CSP metadata related to first-party URLs was found. + + if hosts.contentSecurityPolicy.thirdParty.length > 0 + h4 Third-party content security policy hosts + + ol + each host in hosts.contentSecurityPolicy.thirdParty + li: a(href=`http://${host}`)= host + + p The website has whitelisted #{ hosts.contentSecurityPolicy.thirdParty.length } distinct third-party host(s). + else + p No third-party content security policy hosts were whitelisted. + + h2(id="sec:use-of-social-media") Use of social media and collaboration platforms + + p The website evidence collection tool found links from #[a(href=uri_dest)= uri_dest] to the following common social media and collaboration platforms. + + if links.social.length > 0 + table.use-of-social-media-and-collaboration-platforms( + style="width: 100%" + ) + colgroup + col(style="width: 100%") + col(style="width: 100%") + thead + tr + th Link URL + th Link caption + tbody + each social in links.social + tr + td.trunc: a(href=social.href)= social.href + td.notrunc= social.inner_text + else + p No corresponding links were found. + + h2#traffic-and-persistent-data-analysis Traffic and persistent data analysis + + p First, the browser visited #[a(href=uri_dest)= uri_dest]. The evidence collection navigated and collected evidence from #{ browsing_history.length > 1 ? browsing_history.length - 1 : "no" } additional web service page(s). + + p The web page(s) were browsed consecutively between #{ new Date(start_time).toLocaleString("en-GB") } and #{ new Date(end_time).toLocaleString("en-GB") }. + + p During the browsing, the HTTP Header #[a(href="https://en.wikipedia.org/wiki/Do_Not_Track") Do Not Track] was #{ browser.extra_headers.dnt ? 'set' : 'not set' }. + + p For the subsequent analysis, the following URLs (hosts with their paths) were defined as first-party: + + ol + each uri in uri_refs + li: a(href=uri)= uri.replace(/(^\w+:|^)\/\//, "") + + h3(id="sec:traffic-analysis") Traffic analysis + + p In the case of a visit to a very simple web page with a given URL (e.g. http://example.com/home.html), the browser sends a #[em request] to the web server configured for the domain specified in the URL (e.g. example.com). The web server, also called the #[em host], then sends a #[em response] in the form of, e.g. an HTML file (e.g. the home.html file), which the browser downloads and displays. Most web pages nowadays are more complex and include content such as images, videos, and fonts, or embed elements like maps, tweets, and comments. To assemble and show the whole web page, the browser sends further requests to the same host (#[em first-party]) or even different hosts (potentially #[em third-party]) to download the required content. A web page is often composed of dozens of elements, and due to the complexity of website architecture, website administrators are often not fully aware of all third parties involved in the functioning of their websites. + + p The evidence collection tool extracted lists of distinct first- and third-party hosts from the browser requests recorded in each browsing session (with DNT signal set and without). These lists are presented below and aim to help by providing a comprehensive overview of all the hosts from which the browser requests elements. Note that subdomains (e.g. admin.example.com) of first-party domains (example.com) are, by default, considered third-party domains, whereas all URLs in the path (e.g. example.com/anysubpage) are treated as first-party by the automated evidence collection tool. More information about hosts and the distinction between first-party and third-party can be found in the glossary in #[span.citation(data-cites="app:glossary"): a(href="#app:glossary") the Annex: Glossary]. + + p A number of techniques allow hosts to track browsing behaviour. A first-party host may instruct the browser to send requests solely for the purpose of providing information embedded in the request (e.g. cookies) to a given first-party or third-party host. These requests are often responded to with an empty file or a 1x1 pixel image. Such files requested for tracking purposes are commonly referred to as #[em web beacons]. + + p The evidence collection tool compares all requests against signature lists compiled to detect potential web beacons or annoyances such as in-page pop-ups. Positive matches with the lists #[a(href="https://easylist.to/#easyprivacy") EasyPrivacy] (#[code easyprivacy.txt]) and #[a(href="https://easylist.to/#fanboy-s-annoyance-list") Fanboy’s Annoyance] (#[code fanboy-annoyance.txt]) from #[a(href="https://easylist.to") https://easylist.to] are presented in #[span.citation(data-cites="app:annex-beacons"): a(href="#app:annex-beacons") the Annex: All potential web beacons]. The list of #[em web beacon hosts] contains hosts of those requests that match the signature list EasyPrivacy. Note that the result may include false positives and may be incomplete due to inaccurate, outdated or incomplete signature lists. + + p #[em Cookies] are small text files stored on a user’s browser that allow websites to track and store information about the user’s interactions. However, they are limited in capacity and are transmitted with every HTTP request. #[em Local storage objects], on the other hand, offer a more modern method for websites to store larger amounts of data locally on a user’s browser, with better control over data access and expiration. Both cookies and local storage objects can be used for tracking purposes. + + p Eventually, the evidence collection tool logged all identified web forms that potentially transmit web form data using an unencrypted connection. + + h4 First-party hosts + + ol + each host in hosts.requests.firstParty + li: a(href=`http://${host}`)= host + + p Requests have been made to #{ hosts.requests.firstParty.length } distinct first-party host(s). + + h4 Third-party hosts + + ol + each host in hosts.requests.thirdParty + li: a(href=`http://${host}`)= host + + p Requests have been made to #{ hosts.requests.thirdParty.length } distinct third-party host(s). + + h4 First-party potential web beacon hosts + + ol + each host in hosts.beacons.firstParty + li: a(href=`http://${host}`)= host + + if hosts.beacons.firstParty.length > 0 + p Potential first-party web beacons were sent to #{ hosts.beacons.firstParty.length } distinct host(s). Corresponding HTTP requests for first- and third-parties are listed in #[span.citation(data-cites="app:annex-beacons"): a(target="_parent", href="#app:annex-beacons") the Annex: All potential web beacons]. + else + p No first-party potential web beacons were found. + + h4 Third-party potential web beacon hosts + + ol + each host in hosts.beacons.thirdParty + li: a(href=`http://${host}`)= host + + if hosts.beacons.thirdParty.length > 0 + p Potential third-party web beacons were sent to #{ hosts.beacons.thirdParty.length } distinct host(s). Corresponding HTTP requests for first- and third-parties are listed in #[span.citation(data-cites="app:annex-beacons"): a(target="_parent", href="#app:annex-beacons") the Annex: All potential web beacons]. + else + p No third-party potential web beacons were found. + + h4(id="sec:unsecure-forms") Web forms with non-encrypted transmission + + if unsafeForms.length > 0 + table.unfase-webforms + colgroup + col + col + thead + tr + th # + th Web form ID + th Recipient URL + th HTTP method + tbody + each form, index in unsafeForms + tr + td= index + 1 + td= form.id + td= form.action + td= form.method + + p The evidence collection tool logged #{ unsafeForms.length } web forms that submit data potentially with no SSL encryption to a different web page. + else + p No web forms submitting data without SSL encryption were detected. + + h3(id="sec:persistent-data-analysis") Persistent data analysis + + p The evidence collection tool analysed cookies after the browsing session. Web pages can also use the persistent HTML5 #[em local storage]. #[span.citation(data-cites="sec:local-storage"): a(target="_parent", href="#sec:local-storage") The subsequent section] lists its content after the browsing. + + - var cookiesByStorage = groupBy(cookies, "firstPartyStorage"); + + each cookieList, index in {'first-party': cookiesByStorage['true'] || [], 'third-party': cookiesByStorage['false'] || []} + h4 Cookies linked to #{ index } hosts + + if cookieList.length > 0 + table.cookies(style="width: 100%") + colgroup + col(width="0%") + col(width="0%") + col(width="0%") + col(width="100%") + col(width="0%") + thead + tr + th.notrunc # + th.notrunc Host + th.notrunc Path + th.trunc Name + th.notrunc Expiry in days + tbody + each cookie, index in cookieList + tr + td.notrunc= index + 1 + td.notrunc: a( + href=`http://${cookie.domain}`, + title=`http://${cookie.domain}` + )= cookie.domain + td.notrunc: a( + href=`http://${cookie.domain}${cookie.path}`, + title=`http://${cookie.domain}${cookie.path}` + )= cookie.path + td.trunc(title=cookie.name)= cookie.name + td.notrunc + if cookie.session + em session + else + = cookie.expiresDays + + p In total, #{ cookieList.length } #{ index.toLowerCase() } cookie(s) were found. + else + p No #{ index.toLowerCase() } cookies were found. + + h4(id="sec:local-storage") Local storage + + if Object.keys(localStorage).length > 0 + table.local-storage(style="width: 100%") + colgroup + col(width="0%") + col(width="20%") + col(width="40%") + col(width="40%") + thead + tr + th.notrunc # + th.trunc Host + th.trunc Key + th.trunc Value + tbody + - let index = 1; + each storage, url in localStorage + each data, key in storage + tr + td.notrunc= index++ + td.trunc: a(href=url, title=url)= url.replace(/(^\w+:|^)\/\//, "") + td.trunc(title=key)= key + td.trunc.code: pre: code= JSON.stringify(data.value, null, 2) + else + p The local storage was found to be empty. + + if extra && extra[0] && extra[0].browser && extra[0].browser.extra_headers && extra[0].browser.extra_headers.dnt + h2(id="traffic-and-persistent-data-analysis-dnt") Traffic and persistent data analysis (DNT set) + + p The same web page(s) were browsed a second time consecutively between #{ new Date(extra[0].start_time || start_time).toLocaleString("en-GB") } and #{ new Date(extra[0].end_time || end_time).toLocaleString("en-GB") }. + + p During this second browsing session, the HTTP Header #[a(href="https://en.wikipedia.org/wiki/Do_Not_Track") Do Not Track] was explicitly set. + + h3(id="sec:traffic-analysis-dnt") Traffic analysis (DNT set) + + h4 First-party hosts (DNT set) + + ol + each host in extra[0].hosts.requests.firstParty + li: a(href=`http://${host}`)= host + + p Requests with DNT set have been made to #{ extra[0].hosts.requests.firstParty.length } distinct first-party host(s). + + h4 Third-party hosts (DNT set) + + ol + each host in extra[0].hosts.requests.thirdParty + li: a(href=`http://${host}`)= host + + p Requests with DNT set have been made to #{ extra[0].hosts.requests.thirdParty.length } distinct third-party host(s). + + h4 First-party potential web beacon hosts (DNT set) + + ol + each host in extra[0].hosts.beacons.firstParty + li: a(href=`http://${host}`)= host + + if extra[0].hosts.beacons.firstParty.length > 0 + p Potential first-party web beacons with DNT set were sent to #{ extra[0].hosts.beacons.firstParty.length } distinct host(s). + else + p No first-party potential web beacons with DNT set were found. + + h4 Third-party potential web beacon hosts (DNT set) + + ol + each host in extra[0].hosts.beacons.thirdParty + li: a(href=`http://${host}`)= host + + if extra[0].hosts.beacons.thirdParty.length > 0 + p Potential third-party web beacons with DNT set were sent to #{ extra[0].hosts.beacons.thirdParty.length } distinct host(s). + else + p No third-party potential web beacons with DNT set were found. + + h4(id="sec:unsecure-forms-dnt") Web forms with non-encrypted transmission (DNT set) + + if extra[0].unsafeForms && extra[0].unsafeForms.length > 0 + table.unfase-webforms + colgroup + col + col + thead + tr + th # + th Web form ID + th Recipient URL + th HTTP method + tbody + each form, index in extra[0].unsafeForms + tr + td= index + 1 + td= form.id + td= form.action + td= form.method + + p The evidence collection tool logged #{ extra[0].unsafeForms.length } web forms that submit data potentially with no SSL encryption to a different web page when DNT is set. + else + p No web forms submitting data without SSL encryption were detected when DNT is set. + + h3(id="sec:persistent-data-analysis-dnt") Persistent data analysis (DNT set) + + p The evidence collection tool analysed cookies and local storage after the DNT-enabled browsing session. + + - var cookiesByStorageDNT = groupBy(extra[0].cookies, "firstPartyStorage"); + + each cookieList, index in {'first-party': cookiesByStorageDNT['true'] || [], 'third-party': cookiesByStorageDNT['false'] || []} + h4 Cookies linked to #{ index } hosts (DNT set) + + if cookieList.length > 0 + table.cookies(style="width: 100%") + colgroup + col(width="0%") + col(width="0%") + col(width="0%") + col(width="100%") + col(width="0%") + thead + tr + th.notrunc # + th.notrunc Host + th.notrunc Path + th.trunc Name + th.notrunc Expiry in days + tbody + each cookie, index in cookieList + tr + td.notrunc= index + 1 + td.notrunc: a( + href=`http://${cookie.domain}`, + title=`http://${cookie.domain}` + )= cookie.domain + td.notrunc: a( + href=`http://${cookie.domain}${cookie.path}`, + title=`http://${cookie.domain}${cookie.path}` + )= cookie.path + td.trunc(title=cookie.name)= cookie.name + td.notrunc + if cookie.session + em session + else + = cookie.expiresDays + + p In total, #{ cookieList.length } #{ index.toLowerCase() } cookie(s) were found when DNT was set. + else + p No #{ index.toLowerCase() } cookies were found when DNT was set. + + h4(id="sec:local-storage-dnt") Local storage (DNT set) + + if extra[0].localStorage && Object.keys(extra[0].localStorage).length > 0 + table.local-storage(style="width: 100%") + colgroup + col(width="0%") + col(width="20%") + col(width="40%") + col(width="40%") + thead + tr + th.notrunc # + th.trunc Host + th.trunc Key + th.trunc Value + tbody + - let index = 1; + each storage, url in extra[0].localStorage + each data, key in storage + tr + td.notrunc= index++ + td.trunc: a(href=url, title=url)= url.replace(/(^\w+:|^)\/\//, "") + td.trunc(title=key)= key + td.trunc.code: pre: code= JSON.stringify(data.value, null, 2) + else + p The local storage was found to be empty when DNT was set. + + h1(id="app:annex") Annex + + h2(id="app:history") Browsing history + + p For the collection of evidence, the browser navigated consecutively to the following #{ browsing_history.length } web page(s): + + ol + each link in browsing_history + li: a(href=link)= link + + h2(id="app:annex-beacons") All potential web beacons + + p The data transmitted by beacons using HTTP GET parameters are decoded for improved readability and displayed beneath the beacon URL. + + each beaconsByList, listName in groupBy(beacons, 'listName') + h5(id=`annex-beacons-${listName}`)= listName + + table.adblock-findings(style="width: 100%") + colgroup + col(width="0%") + col(width="100%") + col(width="0%") + thead + tr + th.notrunc # + th.trunc Sample URL + th.notrunc Freq. + tbody + each beacon, index in beaconsByList + tr + td.notrunc= index + 1 + td.trunc(title=beacon.url)= beacon.url + td.notrunc= beacon.occurrances + if beacon.query + tr + td.notrunc + td.trunc.code(colspan=2): pre: code= JSON.stringify(beacon.query, null, 2).split("\n").slice(1, -1).join("\n").replace(/^ /gm, "") + + if testSSL + - var results = testSSL.scanResult[0]; + + h2(id="app:testssl") TestSSL scan + + p The following data stems from a #[a(href="https://testssl.sh/") TestSSL] scan. The severity ratings are automatically computed by the TestSSL software without considering the security requirements of the individual website. They do not reflect the opinions or views of the website evidence collector's authors. + + p.screen-only #[a(href="testssl/testssl.html") Click here] to check whether the full TestSSL scan report is available. + + table(width="100%") + colgroup + col + col + tbody + tr + td TestSSL version + td= testSSL.version + tr + td OpenSSL version + td= testSSL.openssl + tr + td Target host + td #{ results.targetHost } (#{ results.ip }) + + h3.unnumbered Protocols + + table(width="100%") + colgroup + col(style="width: 0%") + col(style="width: 100%") + col(style="width: 0%") + thead + tr + th Protocol + th Finding + th Severity + tbody + each protocol in results.protocols.sort(sortSeverity) + tr + td.notrunc= protocol.id + td= protocol.finding + td.notrunc= protocol.severity + + h3.unnumbered HTTPS/SSL vulnerabilities + + table(width="100%") + colgroup + col(style="width: 0%") + col(style="width: 80%") + col(style="width: 20%") + col(style="width: 0%") + thead + tr + th Vulnerability + th Finding + th CVE + th Severity + tbody + each vulnerability in results.vulnerabilities.sort(sortSeverity) + tr + td.notrunc= vulnerability.id + td.trunc(title=vulnerability.finding)= vulnerability.finding + td.trunc.trunc(title=vulnerability.cve) + if vulnerability.cve + each cve in vulnerability.cve.split(' ') + a( + href=`https://cve.mitre.org/cgi-bin/cvename.cgi?name=${cve}` + )= cve + | + td.notrunc= vulnerability.severity + + h3.unnumbered Cipher categories + + table(style="width: 100%") + colgroup + col(style="width: 0%") + col(style="width: 100%") + col(style="width: 0%") + col(style="width: 0%") + thead + tr + th Name + th Finding + th CWE + th Severity + tbody + each cipher in results.ciphers.sort(sortSeverity) + tr + td.notrunc= cipher.id + td.notrunc= cipher.finding + td.notrunc + if cipher.cwe + a( + href=`https://cwe.mitre.org/cgi-bin/jumpmenu.cgi?id=${cipher.cwe.replace('CWE-','')}` + )= cipher.cwe + td.notrunc= cipher.severity + + h3.unnumbered HTTP header responses + + table(style="width: 100%") + colgroup + col(style="width: 0%") + col(style="width: 100%") + col(style="width: 0%") + thead + tr + th Name + th Finding + th Severity + tbody + each response in results.headerResponse.sort(sortSeverity) + tr + td.notrunc= response.id + td.trunc(title=response.finding)= response.finding + td.notrunc= response.severity + + h2(id="app:glossary") Glossary + dl + dt Do Not Track (DNT for short, HTTP) + dd The Do Not Track header is the proposed HTTP header field DNT, which requests that a web service does not track its individual visitors. Note that this request cannot be enforced by technical means on the visitors’ side. It is upon the web service to take the DNT header field into account. + dt Filter Lists + dd Browser extensions commonly referred to as #[em Adblockers] have been developed to block the loading of advertisements based on filter lists. Over time, these filter lists have been extended to also block the loading of web page elements associated with tracking web page visitors. For this evidence collection, publicly available tracking filter lists are used to identify web page elements that may track the web page visitors. + dt First-Party + dd In this document, #[em first-party] is a classification for resource links, web beacons, and cookies. To be considered first party, the resource’s domain must match the domain of the inspected web service or other configured first-party domains. Note that the resource path must also be within the path of the web service to be classified as first-party. + dt Host (HTTP) + dd The HTTP #[em host] is the computer that receives and responds to browser requests for web pages. + dt Local Storage (HTML5) + dd Most web browsers allow web pages to store data locally in the browser profile. This #[em local storage] is specific to the website and persists through browser shutdowns. As embedded third-party resources may also have access to first-party local storage, it is classified both as first- and third-party. + dt Redirect (HTTP) + dd A request for a web page may be answered with a new location (URL) to be requested instead. These HTTP #[em redirects] can be used to enforce the use of HTTPS. When visitors request an HTTP web page, they are redirected to the corresponding HTTPS web page. + dt Request (HTTP) + dd To download and display a web page identified by a URL, browsers send HTTP #[em requests] with the URL to the host computer specified as part of the URL. + dt Third-Party + dd Links, web beacons and cookies that are not #[em first-party] (see above) are classified as #[em third-party]. + dt Web Beacon + dd A web beacon is one of various techniques used on web pages to unobtrusively (usually invisibly) track web page visitors. A web beacon can be implemented as a 1x1 pixel image, a transparent image, or an empty file requested alongside other resources when a web page is loaded. + dt Web Beacon Host + dd The #[em host] in the URL of a #[em request] of a #[em web beacon] is referred to as the #[em web beacon host]. + + script. + // @license magnet:?xt=urn:btih:d3d9a9a6595521f9666a5e94cc830dab83b65699&dn=expat.txt Expat + // + // AnchorJS - v5.0.0 - 2023-01-18 + // https://www.bryanbraun.com/anchorjs/ + // Copyright (c) 2023 Bryan Braun; Licensed MIT + // + // @license magnet:?xt=urn:btih:d3d9a9a6595521f9666a5e94cc830dab83b65699&dn=expat.txt Expat + !(function (A, e) { + "use strict"; + "function" == typeof define && define.amd ? define([], e) : "object" == typeof module && module.exports ? (module.exports = e()) : ((A.AnchorJS = e()), (A.anchors = new A.AnchorJS())); + })(globalThis, function () { + "use strict"; + return function (A) { + function u(A) { + (A.icon = Object.prototype.hasOwnProperty.call(A, "icon") ? A.icon : ""), (A.visible = Object.prototype.hasOwnProperty.call(A, "visible") ? A.visible : "hover"), (A.placement = Object.prototype.hasOwnProperty.call(A, "placement") ? A.placement : "right"), (A.ariaLabel = Object.prototype.hasOwnProperty.call(A, "ariaLabel") ? A.ariaLabel : "Anchor"), (A.class = Object.prototype.hasOwnProperty.call(A, "class") ? A.class : ""), (A.base = Object.prototype.hasOwnProperty.call(A, "base") ? A.base : ""), (A.truncate = Object.prototype.hasOwnProperty.call(A, "truncate") ? Math.floor(A.truncate) : 64), (A.titleText = Object.prototype.hasOwnProperty.call(A, "titleText") ? A.titleText : ""); + } + function d(A) { + var e; + if ("string" == typeof A || A instanceof String) e = [].slice.call(document.querySelectorAll(A)); + else { + if (!(Array.isArray(A) || A instanceof NodeList)) throw new TypeError("The selector provided to AnchorJS was invalid."); + e = [].slice.call(A); + } + return e; + } + (this.options = A || {}), + (this.elements = []), + u(this.options), + (this.add = function (A) { + var e, + t, + o, + i, + n, + s, + a, + r, + l, + c, + h, + p = []; + if ((u(this.options), 0 !== (e = d((A = A || "h2, h3, h4, h5, h6"))).length)) { + for ( + null === document.head.querySelector("style.anchorjs") && (((A = document.createElement("style")).className = "anchorjs"), A.appendChild(document.createTextNode("")), void 0 === (h = document.head.querySelector('[rel="stylesheet"],style')) ? document.head.appendChild(A) : document.head.insertBefore(A, h), A.sheet.insertRule(".anchorjs-link{opacity:0;text-decoration:none;-webkit-font-smoothing:antialiased;-moz-osx-font-smoothing:grayscale}", A.sheet.cssRules.length), A.sheet.insertRule(":hover>.anchorjs-link,.anchorjs-link:focus{opacity:1}", A.sheet.cssRules.length), A.sheet.insertRule("[data-anchorjs-icon]::after{content:attr(data-anchorjs-icon)}", A.sheet.cssRules.length), A.sheet.insertRule('@font-face{font-family:anchorjs-icons;src:url(data:n/a;base64,AAEAAAALAIAAAwAwT1MvMg8yG2cAAAE4AAAAYGNtYXDp3gC3AAABpAAAAExnYXNwAAAAEAAAA9wAAAAIZ2x5ZlQCcfwAAAH4AAABCGhlYWQHFvHyAAAAvAAAADZoaGVhBnACFwAAAPQAAAAkaG10eASAADEAAAGYAAAADGxvY2EACACEAAAB8AAAAAhtYXhwAAYAVwAAARgAAAAgbmFtZQGOH9cAAAMAAAAAunBvc3QAAwAAAAADvAAAACAAAQAAAAEAAHzE2p9fDzz1AAkEAAAAAADRecUWAAAAANQA6R8AAAAAAoACwAAAAAgAAgAAAAAAAAABAAADwP/AAAACgAAA/9MCrQABAAAAAAAAAAAAAAAAAAAAAwABAAAAAwBVAAIAAAAAAAIAAAAAAAAAAAAAAAAAAAAAAAMCQAGQAAUAAAKZAswAAACPApkCzAAAAesAMwEJAAAAAAAAAAAAAAAAAAAAARAAAAAAAAAAAAAAAAAAAAAAQAAg//0DwP/AAEADwABAAAAAAQAAAAAAAAAAAAAAIAAAAAAAAAIAAAACgAAxAAAAAwAAAAMAAAAcAAEAAwAAABwAAwABAAAAHAAEADAAAAAIAAgAAgAAACDpy//9//8AAAAg6cv//f///+EWNwADAAEAAAAAAAAAAAAAAAAACACEAAEAAAAAAAAAAAAAAAAxAAACAAQARAKAAsAAKwBUAAABIiYnJjQ3NzY2MzIWFxYUBwcGIicmNDc3NjQnJiYjIgYHBwYUFxYUBwYGIwciJicmNDc3NjIXFhQHBwYUFxYWMzI2Nzc2NCcmNDc2MhcWFAcHBgYjARQGDAUtLXoWOR8fORYtLTgKGwoKCjgaGg0gEhIgDXoaGgkJBQwHdR85Fi0tOAobCgoKOBoaDSASEiANehoaCQkKGwotLXoWOR8BMwUFLYEuehYXFxYugC44CQkKGwo4GkoaDQ0NDXoaShoKGwoFBe8XFi6ALjgJCQobCjgaShoNDQ0NehpKGgobCgoKLYEuehYXAAAADACWAAEAAAAAAAEACAAAAAEAAAAAAAIAAwAIAAEAAAAAAAMACAAAAAEAAAAAAAQACAAAAAEAAAAAAAUAAQALAAEAAAAAAAYACAAAAAMAAQQJAAEAEAAMAAMAAQQJAAIABgAcAAMAAQQJAAMAEAAMAAMAAQQJAAQAEAAMAAMAAQQJAAUAAgAiAAMAAQQJAAYAEAAMYW5jaG9yanM0MDBAAGEAbgBjAGgAbwByAGoAcwA0ADAAMABAAAAAAwAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAABAAH//wAP) format("truetype")}', A.sheet.cssRules.length)), + h = document.querySelectorAll("[id]"), + t = [].map.call(h, function (A) { + return A.id; + }), + i = 0; + i < e.length; + i++ + ) + if (this.hasAnchorJSLink(e[i])) p.push(i); + else { + if (e[i].hasAttribute("id")) o = e[i].getAttribute("id"); + else if (e[i].hasAttribute("data-anchor-id")) o = e[i].getAttribute("data-anchor-id"); + else { + for (r = a = this.urlify(e[i].textContent), s = 0; (n = t.indexOf((r = void 0 !== n ? a + "-" + s : r))), (s += 1), -1 !== n; ); + (n = void 0), t.push(r), e[i].setAttribute("id", r), (o = r); + } + ((l = document.createElement("a")).className = "anchorjs-link " + this.options.class), l.setAttribute("aria-label", this.options.ariaLabel), l.setAttribute("data-anchorjs-icon", this.options.icon), this.options.titleText && (l.title = this.options.titleText), (c = document.querySelector("base") ? window.location.pathname + window.location.search : ""), (c = this.options.base || c), (l.href = c + "#" + o), "always" === this.options.visible && (l.style.opacity = "1"), "" === this.options.icon && ((l.style.font = "1em/1 anchorjs-icons"), "left" === this.options.placement) && (l.style.lineHeight = "inherit"), "left" === this.options.placement ? ((l.style.position = "absolute"), (l.style.marginLeft = "-1.25em"), (l.style.paddingRight = ".25em"), (l.style.paddingLeft = ".25em"), e[i].insertBefore(l, e[i].firstChild)) : ((l.style.marginLeft = ".1875em"), (l.style.paddingRight = ".1875em"), (l.style.paddingLeft = ".1875em"), e[i].appendChild(l)); + } + for (i = 0; i < p.length; i++) e.splice(p[i] - i, 1); + this.elements = this.elements.concat(e); + } + return this; + }), + (this.remove = function (A) { + for (var e, t, o = d(A), i = 0; i < o.length; i++) (t = o[i].querySelector(".anchorjs-link")) && (-1 !== (e = this.elements.indexOf(o[i])) && this.elements.splice(e, 1), o[i].removeChild(t)); + return this; + }), + (this.removeAll = function () { + this.remove(this.elements); + }), + (this.urlify = function (A) { + var e = document.createElement("textarea"); + return ( + (e.innerHTML = A), + (A = e.value), + this.options.truncate || u(this.options), + A.trim() + .replace(/'/gi, "") + .replace(/[& +$,:;=?@"#\u007B\u007D|^~[`%!'<>\]./()*\\\n\t\b\v\u00A0]/g, "-") + .replace(/-{2,}/g, "-") + .substring(0, this.options.truncate) + .replace(/^-+|-+$/gm, "") + .toLowerCase() + ); + }), + (this.hasAnchorJSLink = function (A) { + var e = A.firstChild && -1 < (" " + A.firstChild.className + " ").indexOf(" anchorjs-link "), + A = A.lastChild && -1 < (" " + A.lastChild.className + " ").indexOf(" anchorjs-link "); + return e || A || !1; + }); + }; + }); + // @license-end + + // Enable links for selected headers + var anchors = new AnchorJS(); + anchors.options = { + placement: "right", + /* visible: 'always', */ + /* icon: '¶', */ + }; + anchors.add(":not(header) > h1, :not(header) > h2, h3, h4, h5, h6"); + + script. + function highlighter(item) { + item.addEventListener( + "dblclick", + function () { + item.classList.toggle("highlighted"); + }, + false, + ); + } + // highlighting for table cells + [].forEach.call(document.getElementsByTagName("td"), highlighter); + // highlighting for list items + [].forEach.call(document.getElementsByTagName("li"), highlighter); diff --git a/src/reporter/reporter.ts b/src/reporter/reporter.ts index c0e081c..f798b8a 100644 --- a/src/reporter/reporter.ts +++ b/src/reporter/reporter.ts @@ -44,7 +44,7 @@ export interface ReporterOptions { usePandoc: boolean; htmlTemplate?: string; officeTemplate?: string; - extraFiles: string[]; + extraFiles: any[]; } export class Reporter { diff --git a/src/server/runCollection.ts b/src/server/runCollection.ts index 406b359..d80d39d 100644 --- a/src/server/runCollection.ts +++ b/src/server/runCollection.ts @@ -3,6 +3,11 @@ import { Collector } from "../collector/index.js"; import Inspector from "../inspector/inspector.js"; import { Cookie } from "./server.js"; import { Logger } from "winston"; +import path from "path"; +import { fileURLToPath } from "url"; + +const __filename = fileURLToPath(import.meta.url); +const __dirname = path.dirname(__filename); export interface RunCollectionArguments { website_url: string; @@ -61,6 +66,39 @@ export async function generateHtmlAndPdf( }; } +export async function generateHtmlAndPdfForDntComparativeScan( + withoutDntInspectionOutput: object, + withDntInspectionOutput: object, + logger: Logger, +) { + const templatePath = path.join( + __dirname, + "../assets/template-dnt-comparison.pug", + ); + + let reporterArgs: ReporterOptions = { + printHtmlToConsole: false, + pdf: true, + printJsonToConsole: false, + outputPath: undefined, + usePandoc: false, + printYamlToConsole: false, + extraFiles: [withDntInspectionOutput], + }; + + const reporter = new Reporter(reporterArgs, logger); + let html = reporter.generateHtmlReport( + withoutDntInspectionOutput, + "inspection.html", + templatePath, + ); + let pdfBuffer = await reporter.convertHtmlToPdfInMemory(html); + return { + html: html, + pdf: Buffer.from(pdfBuffer.buffer).toString("base64"), + }; +} + /** * Constructs a JSON object containing all Arguments as it is expected by the underlying implementation. */ -- GitLab From 74327edf3071c101824837c70ad94fabf02e7c7a Mon Sep 17 00:00:00 2001 From: Felix Woestmann <felix.wostmann@edps.europa.eu> Date: Wed, 2 Apr 2025 10:34:18 +0200 Subject: [PATCH 11/11] refactor: Merge the two functions for generating HTML and PDF reports into one. --- src/server/runCollection.ts | 55 ++++++++++++++++++------------------- src/server/server.ts | 5 +++- 2 files changed, 30 insertions(+), 30 deletions(-) diff --git a/src/server/runCollection.ts b/src/server/runCollection.ts index d80d39d..9d05195 100644 --- a/src/server/runCollection.ts +++ b/src/server/runCollection.ts @@ -43,9 +43,15 @@ export async function runCollection( return inspector.run(); } +/* + * Generates the report for the specified template and output as HTML and PDF. + * The PDF is encoded as Base64. + */ export async function generateHtmlAndPdf( - inspectionOutput: object, logger: Logger, + inspectionOutput: object, + templatePath: string, + additionalInspectionOutput?: [object], ) { let reporterArgs: ReporterOptions = { printHtmlToConsole: false, @@ -54,11 +60,16 @@ export async function generateHtmlAndPdf( outputPath: undefined, usePandoc: false, printYamlToConsole: false, - extraFiles: [], + extraFiles: additionalInspectionOutput ?? [], }; const reporter = new Reporter(reporterArgs, logger); - let html = reporter.generateHtmlReport(inspectionOutput, "inspection.html"); + let html = reporter.generateHtmlReport( + inspectionOutput, + "inspection.html", + templatePath, + ); + let pdfBuffer = await reporter.convertHtmlToPdfInMemory(html); return { html: html, @@ -66,36 +77,22 @@ export async function generateHtmlAndPdf( }; } -export async function generateHtmlAndPdfForDntComparativeScan( - withoutDntInspectionOutput: object, - withDntInspectionOutput: object, - logger: Logger, -) { - const templatePath = path.join( +/* + * The function returns the resolved paths for the available templates. + * This is necessary to enable other libraries to use the templates contained in the WEC as there paths have to be resolved. + */ +export function availableTemplates(): { + dntTemplate: string; + regularTemplate: string; +} { + const dntComparisonTemplate = path.join( __dirname, "../assets/template-dnt-comparison.pug", ); - - let reporterArgs: ReporterOptions = { - printHtmlToConsole: false, - pdf: true, - printJsonToConsole: false, - outputPath: undefined, - usePandoc: false, - printYamlToConsole: false, - extraFiles: [withDntInspectionOutput], - }; - - const reporter = new Reporter(reporterArgs, logger); - let html = reporter.generateHtmlReport( - withoutDntInspectionOutput, - "inspection.html", - templatePath, - ); - let pdfBuffer = await reporter.convertHtmlToPdfInMemory(html); + const regularTemplate = path.join(__dirname, "../assets/template.pug"); return { - html: html, - pdf: Buffer.from(pdfBuffer.buffer).toString("base64"), + dntTemplate: dntComparisonTemplate, + regularTemplate: regularTemplate, }; } diff --git a/src/server/server.ts b/src/server/server.ts index 6464404..4766eb8 100644 --- a/src/server/server.ts +++ b/src/server/server.ts @@ -7,6 +7,7 @@ import express, { } from "express"; import bodyParser from "body-parser"; import { + availableTemplates, generateHtmlAndPdf, runCollection, RunCollectionArguments, @@ -122,9 +123,11 @@ function configureRoutes(browser_options: any[]): Router { ); let htmlAndPdf = await generateHtmlAndPdf( - collectionOutput, requestLogger, + collectionOutput, + availableTemplates().regularTemplate, ); + res.send(htmlAndPdf); requestLogger.info("Finished serving request"); } catch (e: any) { -- GitLab