From 12d7889ae49b1bf81eb7f20e482e9bb1b77ecef4 Mon Sep 17 00:00:00 2001 From: Felix Woestmann <felix.wostmann@edps.europa.eu> Date: Tue, 11 Mar 2025 16:33:37 +0100 Subject: [PATCH 1/9] Add code to generate Report with DNT and DNT disabled output --- src/server/runCollection.ts | 1 + 1 file changed, 1 insertion(+) diff --git a/src/server/runCollection.ts b/src/server/runCollection.ts index fda49bd..83fdb9f 100644 --- a/src/server/runCollection.ts +++ b/src/server/runCollection.ts @@ -3,6 +3,7 @@ import { Collector } from "../collector/index.js"; import Inspector from "../inspector/inspector.js"; import { Cookie } from "./server.js"; import { Logger } from "winston"; +import { template } from "lodash"; export interface RunCollectionArguments { website_url: string; -- GitLab From 9231e5b9d60b0b4c1347f6ce05051438c6549aec Mon Sep 17 00:00:00 2001 From: Felix Woestmann <felix.wostmann@edps.europa.eu> Date: Tue, 11 Mar 2025 17:15:18 +0100 Subject: [PATCH 2/9] refactor: Add more types to Collector --- src/server/runCollection.ts | 1 - 1 file changed, 1 deletion(-) diff --git a/src/server/runCollection.ts b/src/server/runCollection.ts index 83fdb9f..fda49bd 100644 --- a/src/server/runCollection.ts +++ b/src/server/runCollection.ts @@ -3,7 +3,6 @@ import { Collector } from "../collector/index.js"; import Inspector from "../inspector/inspector.js"; import { Cookie } from "./server.js"; import { Logger } from "winston"; -import { template } from "lodash"; export interface RunCollectionArguments { website_url: string; -- GitLab From ff01cd84a42d7e748e3ca288699be15e52106c6c Mon Sep 17 00:00:00 2001 From: Felix Woestmann <felix.wostmann@edps.europa.eu> Date: Tue, 11 Mar 2025 16:51:24 +0100 Subject: [PATCH 3/9] refactor: reporter.ts and reporterCommand.ts --- src/commands/collectorCommand.ts | 29 +-- src/commands/reporterCommand.ts | 204 +++++--------------- src/reporter/reporter.ts | 321 ++++++++++++++++--------------- src/server/runCollection.ts | 10 +- 4 files changed, 231 insertions(+), 333 deletions(-) diff --git a/src/commands/collectorCommand.ts b/src/commands/collectorCommand.ts index 9989154..1769bc5 100644 --- a/src/commands/collectorCommand.ts +++ b/src/commands/collectorCommand.ts @@ -9,7 +9,7 @@ import { create } from "../lib/logger.js"; import { CollectionResult, Collector } from "../collector/index.js"; import Inspector from "../inspector/inspector.js"; -import { Reporter, ReporterArguments } from "../reporter/reporter.js"; +import { Reporter, ReporterOptions } from "../reporter/reporter.js"; let collectorCommand = "collect"; @@ -186,7 +186,7 @@ async function runCollector(args: CollectorCommandArguments): Promise<any> { const inspectionResult: any = inspector.run(); - let reporterArgs: ReporterArguments = { + let reporterArgs: ReporterOptions = { outputPath: args.output, json: args.json, yaml: args.yaml, @@ -197,20 +197,25 @@ async function runCollector(args: CollectorCommandArguments): Promise<any> { const reporter = new Reporter(reporterArgs); - reporter.saveJson( + reporter.saveJsonToFile( inspectionResult.websocketLog, "websockets-log.json", false, ); - reporter.saveJson(inspectionResult, "inspection.json"); - reporter.saveYaml(inspectionResult.cookies, "cookies.yml", false); - reporter.saveYaml(inspectionResult.localStorage, "local-storage.yml", false); - reporter.saveYaml(inspectionResult.beacons, "beacons.yml", false); - reporter.saveYaml(inspectionResult, "inspection.yml"); - reporter.generateHtml(inspectionResult); - await reporter.generateOfficeDoc(inspectionResult); - await reporter.convertHtmlToPdf(); - reporter.saveSource(collectionResult.source); + reporter.saveJsonToFile(inspectionResult, "inspection.json"); + reporter.saveYamlToFile(inspectionResult.cookies, "cookies.yml", false); + reporter.saveYamlToFile( + inspectionResult.localStorage, + "local-storage.yml", + false, + ); + reporter.saveYamlToFile(inspectionResult.beacons, "beacons.yml", false); + reporter.saveYamlToFile(inspectionResult, "inspection.yml"); + const htmlReport = reporter.generateHtmlReport(inspectionResult); + await reporter.saveAsOfficeDoc(inspectionResult); + const pdfReport = await reporter.convertHtmlToPdfInMemory(htmlReport); + reporter.saveFile("inspection.pdf", pdfReport); + reporter.saveFile("source.html", collectionResult.source); return inspectionResult; } diff --git a/src/commands/reporterCommand.ts b/src/commands/reporterCommand.ts index 3bf46d6..74203fb 100644 --- a/src/commands/reporterCommand.ts +++ b/src/commands/reporterCommand.ts @@ -10,20 +10,8 @@ import yaml from "js-yaml"; import fs from "fs"; import path from "path"; -import { marked } from "marked"; -import { markedSmartypants } from "marked-smartypants"; -import pug from "pug"; -import groupBy from "lodash/groupBy.js"; -import { spawnSync } from "node:child_process"; -import puppeteer from "puppeteer"; -import { fileURLToPath } from "url"; -import { createRequire } from "module"; import { all as unsafe } from "js-yaml-js-types-esm"; -import HTMLtoDOCX from "html-to-docx"; - -const __filename = fileURLToPath(import.meta.url); -const __dirname = path.dirname(__filename); -const require = createRequire(import.meta.url); +import { Reporter } from "../reporter/reporter.js"; yaml.DEFAULT_SCHEMA = yaml.DEFAULT_SCHEMA.extend(unsafe); @@ -87,173 +75,79 @@ export default { }; async function runReporter(args: ParsedArgsReporter) { - let output = JSON.parse(fs.readFileSync(args.inspectionJsonPath, "utf8")); - - let html_template = - args.htmlTemplate || path.join(__dirname, "../assets/template.pug"); - let office_template = - args.officeTemplate || - path.join(__dirname, "../assets/template-office.pug"); + const collectionData = JSON.parse( + fs.readFileSync(args.inspectionJsonPath, "utf8"), + ); - // it is surprising that https://github.com/jstransformers/jstransformer-marked picks up this object (undocumented API) - // source of this call: https://github.com/markedjs/marked-custom-heading-id/blob/main/src/index.js (MIT License, Copyright (c) 2021 @markedjs) - marked.use({ - renderer: { - heading(text, level, _) { - // WEC patch: add \: - const headingIdRegex = /(?: +|^)\{#([a-z][\:\w-]*)\}(?: +|$)/i; - const hasId = text.match(headingIdRegex); - if (!hasId) { - // fallback to original heading renderer - return false; - } - return `<h${level} id="${hasId[1]}">${text.replace(headingIdRegex, "")}</h${level}>\n`; - }, - }, + const reporter = new Reporter({ + htmlTemplate: args.htmlTemplate, + officeTemplate: args.officeTemplate, + usePandoc: args.usePandoc || false, + extraFiles: args.extraFiles, }); - marked.use(markedSmartypants()); - - const make_office = - args.outputFile && - (args.outputFile.endsWith(".docx") || args.outputFile.endsWith(".odt")); - const make_pdf = args.outputFile && args.outputFile.endsWith(".pdf"); - let html_dump = pug.renderFile( - make_office ? office_template : html_template, - Object.assign({}, output, { - pretty: true, - basedir: path.resolve(path.join(__dirname, "../assets")), // determines root director for pug - // expose some libraries to pug templates - groupBy: groupBy, - marked: marked, // we need to pass the markdown engine to template for access at render-time (as opposed to comile time), see https://github.com/pugjs/pug/issues/1171 - fs: fs, - yaml: yaml, - path: path, - inlineCSS: fs.readFileSync( - require.resolve("github-markdown-css/github-markdown.css"), - "utf8", - ), - inspection: output, - extra: args.extraFiles, - filterOptions: { marked: {} }, - }), - ); + const htmlReport = reporter.generateHtmlReport(collectionData); + // Print to console when no output file is specified if (!args.outputFile) { - console.log(html_dump); - return; - } - - if (make_office) { - await generateOfficeFile( - args.usePandoc, - html_dump, - output, - args.outputFile, - ); - return; - } - - if (make_pdf) { - await generatePdf(args.outputFile, html_dump); + console.log(htmlReport); return; } - fs.writeFileSync(path.join(args.outputFile), html_dump); -} + const fileExtension = path.extname(args.outputFile).toLowerCase(); + const outputDir = path.dirname(args.outputFile); -async function generateOfficeFile( - usePandoc: boolean, - html_dump: string, - output: any, - outputFile?: string, -) { - if (usePandoc) { - // console.warn("Using pandoc to generate", argv.outputFile); - // pandoc infers the output format from the output file name - let ret = spawnSync( - "pandoc", - ["-f", "html", "--number-sections", "--toc", "--output", outputFile], - { - // cwd: '.', - input: html_dump, - encoding: "utf8", - }, - ); - if (ret[2]) { - console.log(ret[2]); - } - return; - } - if (outputFile.endsWith(".odt")) { - console.error( - "To generate .odt, you must have pandoc installed and specify --use-pandoc.", - ); - process.exit(1); + // Create the output directory if it doesn't exist + if (!fs.existsSync(outputDir)) { + fs.mkdirSync(outputDir, { recursive: true }); } - // console.warn("Using NPM html-to-docx to generate", argv.outputFile); - const documentOptions = { - // decodeUnicode: true, - orientation: "portrait", - pageSize: { width: "21.0cm", height: "29.7cm" }, - pageNumber: true, - // lineNumber: true, - // lineNumberOptions: {countBy: 5}, - title: output.title, - lang: "en-UK", - creator: `EDPS Website Evidence Collector v${output.script.version.npm} using NPM html-to-docx`, - }; - - try { - let docx = await HTMLtoDOCX(html_dump, null, documentOptions, null); - fs.writeFileSync(path.join(outputFile), docx); - } catch (e) { - console.error(e); + switch (fileExtension) { + case ".pdf": + const htmlContent = reporter.generateHtmlReport( + collectionData, + undefined, + false, + ); + const pdfBuffer = await reporter.convertHtmlToPdfInMemory(htmlContent); + reporter.saveFile(args.outputFile, pdfBuffer); + break; + case ".docx": + case ".odt": + await reporter.saveAsOfficeDoc(collectionData, args.outputFile); + break; + case ".html": + const html = reporter.generateHtmlReport( + collectionData, + undefined, + false, + ); + reporter.saveFile(args.outputFile, html); + break; + default: + console.log(`File extension ${fileExtension} is not supported.`); + break; } } -async function generatePdf(outputFile: string, html_dump: string) { - const browser = await puppeteer.launch({}); - const pages = await browser.pages(); - await pages[0].setContent(html_dump); - await pages[0].pdf({ - path: path.resolve(path.join(outputFile)), - format: "A4", - printBackground: true, - displayHeaderFooter: true, - headerTemplate: ` - <div class="page-footer" style="width: 100%; font-size: 11px; padding: 5px 5px 0; position: relative;"> - <div style="bottom: 5px; text-align: center;"><span class="title"></span></div> - </div> - `, - footerTemplate: ` - <div class="page-header" style="width: 100%; font-size: 11px; padding: 5px 5px 0; position: relative;"> - <div style="top: 5px; text-align: center;"><span class="pageNumber"></span>/<span class="totalPages"></span></div> - </div> - `, - // this is needed to prevent content from being placed over the footer - margin: { top: "1.5cm", bottom: "1cm" }, - }); - await browser.close(); -} function transformArgsToObject(parsingResult: any): ParsedArgsReporter { return { _: parsingResult._ as string[], inspectionJsonPath: parsingResult._[1] as string, - outputFile: parsingResult["outputFile"] as string, - htmlTemplate: parsingResult["htmlTemplate"] as string | undefined, - officeTemplate: parsingResult["officeTemplate"] as string | undefined, - extraFiles: parsingResult["extraFile"] as string[] | undefined, - usePandoc: parsingResult["usePandoc"] as boolean | undefined, + outputFile: parsingResult["output-file"] as string, + htmlTemplate: parsingResult["html-template"] as string | undefined, + officeTemplate: parsingResult["office-template"] as string | undefined, + extraFiles: parsingResult["extra-file"] as any[] | undefined, + usePandoc: parsingResult["use-pandoc"] as boolean | undefined, }; } + interface ParsedArgsReporter { _: (string | number)[]; inspectionJsonPath: string; outputFile?: string; htmlTemplate?: string; officeTemplate?: string; - extraFiles?: string[]; + extraFiles?: any[]; usePandoc?: boolean; } diff --git a/src/reporter/reporter.ts b/src/reporter/reporter.ts index e0eed4f..92ba978 100644 --- a/src/reporter/reporter.ts +++ b/src/reporter/reporter.ts @@ -1,4 +1,3 @@ -// jshint esversion: 8 import fs from "fs"; import path from "path"; import pug from "pug"; @@ -12,6 +11,7 @@ import { markedSmartypants } from "marked-smartypants"; import { fileURLToPath } from "url"; import { createRequire } from "module"; +import { Logger } from "winston"; const __filename = fileURLToPath(import.meta.url); const __dirname = path.dirname(__filename); @@ -35,202 +35,203 @@ marked.use({ }); marked.use(markedSmartypants()); -export interface ReporterArguments { +export interface ReporterOptions { outputPath?: string; - json: boolean; - yaml: boolean; - html: boolean; - pdf: boolean; - usePandoc: boolean; - "html-template"?: string; + json?: boolean; + yaml?: boolean; + html?: boolean; + pdf?: boolean; + usePandoc?: boolean; + htmlTemplate?: string; + officeTemplate?: string; + extraFiles?: any[]; } export class Reporter { - constructor(private args: ReporterArguments) {} + private options: ReporterOptions; + private logger: Logger; - saveJson(data, filename, log = true) { - const json_dump = JSON.stringify(data, null, 2); - - if (this.args.outputPath) { - fs.writeFileSync(path.join(this.args.outputPath, filename), json_dump); - } - - if (log && this.args.json) { - console.log(json_dump); - } - } - - saveYaml(data, filename, log = true) { - const yaml_dump = yaml.dump(data, { - noRefs: true, - replacer: function replacer(_, value) { - return value instanceof URL ? value.toString() : value; - }, - }); - - if (this.args.outputPath) { - fs.writeFileSync(path.join(this.args.outputPath, filename), yaml_dump); - } - - if (log && this.args.yaml) { - console.log(yaml_dump); - } + constructor(options: ReporterOptions = {}, logger: Logger) { + this.options = options; + this.logger = logger; } - readYaml(filename) { - return yaml.load( - fs.readFileSync(path.join(this.args.outputPath, filename), "utf8"), - ); - } - - generateHtml( - data, - filename = "inspection.html", + /* + * Takes the output of the inspector and generates a HTML report. + */ + generateHtmlReport( + inspectionData: object, + outputFilename = "inspection.html", log = true, - template = "../assets/template.pug", - extraData?, + customTemplate?: string, ) { - const html_template = - this.args["html-template"] || path.join(__dirname, template); - - const html_dump = pug.renderFile( - html_template, - Object.assign({}, data, { - pretty: true, - basedir: path.join(__dirname, "../assets"), - groupBy: groupBy, - marked: marked, // we need to pass the markdown engine to template for access at render-time (as opposed to comile time), see https://github.com/pugjs/pug/issues/1171 - fs: fs, - yaml: yaml, - path: path, - inlineCSS: fs.readFileSync( - require.resolve("github-markdown-css/github-markdown.css"), - ), - filterOptions: { marked: {} }, - extra: extraData, - }), - ); + const templatePath = + this.options.htmlTemplate || + customTemplate || + path.join(__dirname, "../assets/template.pug"); - if (this.args.outputPath) { - fs.writeFileSync(path.join(this.args.outputPath, filename), html_dump); - } + const templateData = fs.readFileSync(templatePath, "utf8"); + + const htmlReport = this.renderHtmlReport(inspectionData, templateData); - if (log && this.args.html) { - console.log(html_dump); + this.saveFile(outputFilename, htmlReport); + + if (log && this.options.html) { + this.logger.info(htmlReport); } - return html_dump; + return htmlReport; } - async convertHtmlToPdf( - htmlfilename = "inspection.html", - pdffilename = "inspection.pdf", - ) { - if (this.args.pdf && this.args.outputPath) { - let content = fs.readFileSync( - path.resolve(path.join(this.args.outputPath, htmlfilename)), + renderHtmlReport(inspectionData: object, template: string) { + const pugInputData = Object.assign({}, inspectionData, { + pretty: true, + basedir: path.join(__dirname, "../assets"), + groupBy: groupBy, + marked: marked, // we need to pass the markdown engine to template for access at render-time (as opposed to comile time), see https://github.com/pugjs/pug/issues/1171 + fs: fs, + yaml: yaml, + path: path, + inlineCSS: fs.readFileSync( + require.resolve("github-markdown-css/github-markdown.css"), "utf8", - ); - let pdfBuffer = await this.convertHtmlToPdfInMemory(content); - fs.writeFileSync( - path.resolve(path.join(this.args.outputPath, pdffilename)), - pdfBuffer, - ); - } + ), + inspection: inspectionData, + extra: this.options.extraFiles, + filterOptions: { marked: {} }, + }); + + return pug.render(template, pugInputData); } async convertHtmlToPdfInMemory(htmlContent: string): Promise<Uint8Array> { const browser = await puppeteer.launch({}); - const page = await browser.newPage(); - await page.setContent(htmlContent, { waitUntil: "networkidle0" }); - let pdfBuffer = await page.pdf({ + const pages = await browser.pages(); + await pages[0].setContent(htmlContent); + const pdfBuffer = await pages[0].pdf({ format: "A4", printBackground: true, displayHeaderFooter: true, headerTemplate: ` - <div style="width: 100%; font-size: 11px; padding: 5px 5px 0; position: relative;"> - <div style="bottom: 5px; text-align: center;"><span class="title"></span></div> - </div>`, + <div class="page-footer" style="width: 100%; font-size: 11px; padding: 5px 5px 0; position: relative;"> + <div style="bottom: 5px; text-align: center;"><span class="title"></span></div> + </div> + `, footerTemplate: ` - <div style="width: 100%; font-size: 11px; padding: 5px 5px 0; position: relative;"> - <div style="top: 5px; text-align: center;"><span class="pageNumber"></span>/<span class="totalPages"></span></div> - </div>`, + <div class="page-header" style="width: 100%; font-size: 11px; padding: 5px 5px 0; position: relative;"> + <div style="top: 5px; text-align: center;"><span class="pageNumber"></span>/<span class="totalPages"></span></div> + </div> + `, + // this is needed to prevent content from being placed over the footer margin: { top: "1.5cm", bottom: "1cm" }, }); await browser.close(); return pdfBuffer; } - async generateOfficeDoc( - data, - filename = "inspection.docx", - log = true, - template = "../assets/template-office.pug", + /* + * Generates either a .docx or .odt. + * For .docx a conversion with pandoc or with js is available + * For .odt only pandoc is available. + */ + async saveAsOfficeDoc( + inspectionData: object, + outputFilePath = "inspection.docx", + customTemplate?: string, ) { - if (this.args.outputPath) { - const office_template = - this.args["office-template"] || path.join(__dirname, template); - const html_dump = pug.renderFile( - office_template, - Object.assign({}, data, { - pretty: true, - basedir: path.join(__dirname, "../assets"), - jsondir: ".", // images in the folder of the inspection.json - groupBy: groupBy, - marked: marked, // we need to pass the markdown engine to template for access at render-time (as opposed to comile time), see https://github.com/pugjs/pug/issues/1171 - fs: fs, - yaml: yaml, - path: path, - inlineCSS: fs.readFileSync( - require.resolve("github-markdown-css/github-markdown.css"), - ), - filterOptions: { marked: {} }, - }), + const office_template = + this.options.officeTemplate || + customTemplate || + path.join(__dirname, "../assets/template-office.pug"); + + const templateData = fs.readFileSync(office_template, "utf-8"); + + const htmlReport = this.renderHtmlReport(inspectionData, templateData); + + if (this.options.usePandoc) { + await this.htmlToOfficeWithPandoc(htmlReport, outputFilePath); + return; + } + if (outputFilePath.endsWith(".odt")) { + this.logger.error( + "To generate .odt, you must have pandoc installed and specify --use-pandoc.", ); + process.exit(1); + } - if (this.args.usePandoc) { - const ret = spawnSync( - "pandoc", - ["-f", "html", "--number-sections", "--toc", "--output", filename], - { - cwd: this.args.outputPath, - input: html_dump, - encoding: "utf8", - }, - ); - if (ret[2]) { - console.log(ret[2]); - } - } else { - if (filename.endsWith(".odt")) { - console.error( - "To generate .odt, you must have pandoc installed and specify --use-pandoc.", - ); - process.exit(1); - } - - const documentOptions = { - orientation: "portrait", - pageSize: { width: "21.0cm", height: "29.7cm" }, - pageNumber: true, - title: data.title, - lang: "en-UK", - creator: `EDPS Website Evidence Collector v${data.script.version.npm} using NPM html-to-docx`, - }; - const fileBuffer = await HTMLtoDOCX( - html_dump, - null, - documentOptions, - null, - ); - fs.writeFileSync(path.join(this.args.outputPath, filename), fileBuffer); - } + await this.htmlToDocxJavascript(inspectionData, htmlReport, outputFilePath); + } + + private async htmlToOfficeWithPandoc(html_dump: string, outputFile: string) { + const ret = spawnSync( + "pandoc", + ["-f", "html", "--number-sections", "--toc", "--output", outputFile], + { + input: html_dump, + encoding: "utf8", + }, + ); + if (ret[2]) { + this.logger.info(ret[2]); } } - saveSource(source, filename = "source.html") { - if (this.args.outputPath) { - fs.writeFileSync(path.join(this.args.outputPath, filename), source); + private async htmlToDocxJavascript( + inspectionData: any, + htmlReport: string, + outputFileName: string, + ) { + const documentOptions = { + orientation: "portrait", + pageSize: { width: "21.0cm", height: "29.7cm" }, + pageNumber: true, + title: inspectionData.title, + lang: "en-UK", + creator: `EDPS Website Evidence Collector v${inspectionData.script.version.npm} using NPM html-to-docx`, + }; + + try { + let docx = await HTMLtoDOCX(htmlReport, null, documentOptions, null); + this.saveFile(outputFileName, docx); + } catch (e) { + this.logger.error(e); + } + } + + saveJsonToFile(data: any, filename: string, log = true) { + const json_dump = JSON.stringify(data, null, 2); + + this.saveFile(filename, json_dump); + + if (log && this.options.json) { + this.logger.info(json_dump); + } + } + + saveYamlToFile(data: any, filename: string, log = true) { + const yaml_dump = yaml.dump(data, { + noRefs: true, + replacer: function replacer(_, value) { + return value instanceof URL ? value.toString() : value; + }, + }); + + this.saveFile(filename, yaml_dump); + + if (log && this.options.yaml) { + this.logger.info(yaml_dump); + } + } + + loadYamlFile(filename: string) { + return yaml.load( + fs.readFileSync(path.join(this.options.outputPath, filename), "utf8"), + ); + } + + saveFile(filename: string, data: any) { + if (this.options.outputPath) { + fs.writeFileSync(path.join(this.options.outputPath, filename), data); } } } diff --git a/src/server/runCollection.ts b/src/server/runCollection.ts index fda49bd..2ee1297 100644 --- a/src/server/runCollection.ts +++ b/src/server/runCollection.ts @@ -1,4 +1,4 @@ -import { Reporter, ReporterArguments } from "../reporter/reporter.js"; +import { Reporter, ReporterOptions } from "../reporter/reporter.js"; import { Collector } from "../collector/index.js"; import Inspector from "../inspector/inspector.js"; import { Cookie } from "./server.js"; @@ -38,8 +38,8 @@ export async function runCollection( return inspector.run(); } -export async function generateHtmlAndPdf(inspectionOutput, extraOuptut?) { - let reporterArgs: ReporterArguments = { +export async function generateHtmlAndPdf(inspectionOutput: object) { + let reporterArgs: ReporterOptions = { html: true, pdf: true, json: false, @@ -49,12 +49,10 @@ export async function generateHtmlAndPdf(inspectionOutput, extraOuptut?) { }; const reporter = new Reporter(reporterArgs); - let html = reporter.generateHtml( + let html = reporter.generateHtmlReport( inspectionOutput, "inspection.html", false, - extraOuptut ? "path/to/alternative/template" : undefined, - extraOuptut, ); let pdfBuffer = await reporter.convertHtmlToPdfInMemory(html); return { -- GitLab From db34a82332bfc2ba824e9061c16f14bbfed90e02 Mon Sep 17 00:00:00 2001 From: Felix Woestmann <felix.wostmann@edps.europa.eu> Date: Fri, 21 Mar 2025 16:46:06 +0100 Subject: [PATCH 4/9] fix: properly call reporter with logger --- src/commands/collectorCommand.ts | 2 +- src/commands/reporterCommand.ts | 22 ++++++++++++++-------- src/server/runCollection.ts | 7 +++++-- src/server/server.ts | 5 ++++- 4 files changed, 24 insertions(+), 12 deletions(-) diff --git a/src/commands/collectorCommand.ts b/src/commands/collectorCommand.ts index 1769bc5..cfc98e3 100644 --- a/src/commands/collectorCommand.ts +++ b/src/commands/collectorCommand.ts @@ -195,7 +195,7 @@ async function runCollector(args: CollectorCommandArguments): Promise<any> { usePandoc: args.usePandoc, }; - const reporter = new Reporter(reporterArgs); + const reporter = new Reporter(reporterArgs, logger); reporter.saveJsonToFile( inspectionResult.websocketLog, diff --git a/src/commands/reporterCommand.ts b/src/commands/reporterCommand.ts index 74203fb..dedde67 100644 --- a/src/commands/reporterCommand.ts +++ b/src/commands/reporterCommand.ts @@ -12,6 +12,7 @@ import fs from "fs"; import path from "path"; import { all as unsafe } from "js-yaml-js-types-esm"; import { Reporter } from "../reporter/reporter.js"; +import { create } from "../lib/logger.js"; yaml.DEFAULT_SCHEMA = yaml.DEFAULT_SCHEMA.extend(unsafe); @@ -75,22 +76,27 @@ export default { }; async function runReporter(args: ParsedArgsReporter) { + const logger = create({}); + const collectionData = JSON.parse( fs.readFileSync(args.inspectionJsonPath, "utf8"), ); - const reporter = new Reporter({ - htmlTemplate: args.htmlTemplate, - officeTemplate: args.officeTemplate, - usePandoc: args.usePandoc || false, - extraFiles: args.extraFiles, - }); + const reporter = new Reporter( + { + htmlTemplate: args.htmlTemplate, + officeTemplate: args.officeTemplate, + usePandoc: args.usePandoc || false, + extraFiles: args.extraFiles, + }, + logger, + ); const htmlReport = reporter.generateHtmlReport(collectionData); // Print to console when no output file is specified if (!args.outputFile) { - console.log(htmlReport); + logger.info(htmlReport); return; } @@ -125,7 +131,7 @@ async function runReporter(args: ParsedArgsReporter) { reporter.saveFile(args.outputFile, html); break; default: - console.log(`File extension ${fileExtension} is not supported.`); + logger.warn(`File extension ${fileExtension} is not supported.`); break; } } diff --git a/src/server/runCollection.ts b/src/server/runCollection.ts index 2ee1297..7e5a02b 100644 --- a/src/server/runCollection.ts +++ b/src/server/runCollection.ts @@ -38,7 +38,10 @@ export async function runCollection( return inspector.run(); } -export async function generateHtmlAndPdf(inspectionOutput: object) { +export async function generateHtmlAndPdf( + inspectionOutput: object, + logger: Logger, +) { let reporterArgs: ReporterOptions = { html: true, pdf: true, @@ -48,7 +51,7 @@ export async function generateHtmlAndPdf(inspectionOutput: object) { yaml: false, }; - const reporter = new Reporter(reporterArgs); + const reporter = new Reporter(reporterArgs, logger); let html = reporter.generateHtmlReport( inspectionOutput, "inspection.html", diff --git a/src/server/server.ts b/src/server/server.ts index 5dc11a2..6464404 100644 --- a/src/server/server.ts +++ b/src/server/server.ts @@ -121,7 +121,10 @@ function configureRoutes(browser_options: any[]): Router { requestLogger, ); - let htmlAndPdf = await generateHtmlAndPdf(collectionOutput); + let htmlAndPdf = await generateHtmlAndPdf( + collectionOutput, + requestLogger, + ); res.send(htmlAndPdf); requestLogger.info("Finished serving request"); } catch (e: any) { -- GitLab From 8c0ebda3d423dedafebcd1ad00189b73d2af574b Mon Sep 17 00:00:00 2001 From: Felix Woestmann <felix.wostmann@edps.europa.eu> Date: Fri, 21 Mar 2025 17:01:02 +0100 Subject: [PATCH 5/9] refactor: Use paths again for rendering pug files, as using the template as a loaded string requires specifying the filename anyway --- src/reporter/reporter.ts | 19 +++++++++---------- 1 file changed, 9 insertions(+), 10 deletions(-) diff --git a/src/reporter/reporter.ts b/src/reporter/reporter.ts index 92ba978..85c30d7 100644 --- a/src/reporter/reporter.ts +++ b/src/reporter/reporter.ts @@ -70,9 +70,7 @@ export class Reporter { customTemplate || path.join(__dirname, "../assets/template.pug"); - const templateData = fs.readFileSync(templatePath, "utf8"); - - const htmlReport = this.renderHtmlReport(inspectionData, templateData); + const htmlReport = this.renderHtmlReport(inspectionData, templatePath); this.saveFile(outputFilename, htmlReport); @@ -83,10 +81,10 @@ export class Reporter { return htmlReport; } - renderHtmlReport(inspectionData: object, template: string) { + renderHtmlReport(inspectionData: object, templatePath: string) { const pugInputData = Object.assign({}, inspectionData, { pretty: true, - basedir: path.join(__dirname, "../assets"), + basedir: path.resolve(path.join(__dirname, "../assets")), groupBy: groupBy, marked: marked, // we need to pass the markdown engine to template for access at render-time (as opposed to comile time), see https://github.com/pugjs/pug/issues/1171 fs: fs, @@ -101,7 +99,7 @@ export class Reporter { filterOptions: { marked: {} }, }); - return pug.render(template, pugInputData); + return pug.renderFile(templatePath, pugInputData); } async convertHtmlToPdfInMemory(htmlContent: string): Promise<Uint8Array> { @@ -139,14 +137,15 @@ export class Reporter { outputFilePath = "inspection.docx", customTemplate?: string, ) { - const office_template = + const officeTemplatePath = this.options.officeTemplate || customTemplate || path.join(__dirname, "../assets/template-office.pug"); - const templateData = fs.readFileSync(office_template, "utf-8"); - - const htmlReport = this.renderHtmlReport(inspectionData, templateData); + const htmlReport = this.renderHtmlReport( + inspectionData, + officeTemplatePath, + ); if (this.options.usePandoc) { await this.htmlToOfficeWithPandoc(htmlReport, outputFilePath); -- GitLab From 46165cd0fa79c5d42ee68b3fc1a219be08868a9c Mon Sep 17 00:00:00 2001 From: Felix Woestmann <felix.wostmann@edps.europa.eu> Date: Fri, 21 Mar 2025 17:45:28 +0100 Subject: [PATCH 6/9] refactor: Have only ReporterOptions which decide what is printed to console during generation. --- src/commands/collectorCommand.ts | 23 ++++++++--------------- src/commands/reporterCommand.ts | 18 +++++++----------- src/reporter/reporter.ts | 25 ++++++++++++------------- src/server/runCollection.ts | 13 +++++-------- 4 files changed, 32 insertions(+), 47 deletions(-) diff --git a/src/commands/collectorCommand.ts b/src/commands/collectorCommand.ts index cfc98e3..5fe194c 100644 --- a/src/commands/collectorCommand.ts +++ b/src/commands/collectorCommand.ts @@ -188,28 +188,21 @@ async function runCollector(args: CollectorCommandArguments): Promise<any> { let reporterArgs: ReporterOptions = { outputPath: args.output, - json: args.json, - yaml: args.yaml, - html: args.html, + printJsonToConsole: args.json, + printYamlToConsole: args.yaml, + printHtmlToConsole: args.html, pdf: args.pdf, usePandoc: args.usePandoc, + extraFiles: [], }; const reporter = new Reporter(reporterArgs, logger); - reporter.saveJsonToFile( - inspectionResult.websocketLog, - "websockets-log.json", - false, - ); + reporter.saveJsonToFile(inspectionResult.websocketLog, "websockets-log.json"); reporter.saveJsonToFile(inspectionResult, "inspection.json"); - reporter.saveYamlToFile(inspectionResult.cookies, "cookies.yml", false); - reporter.saveYamlToFile( - inspectionResult.localStorage, - "local-storage.yml", - false, - ); - reporter.saveYamlToFile(inspectionResult.beacons, "beacons.yml", false); + reporter.saveYamlToFile(inspectionResult.cookies, "cookies.yml"); + reporter.saveYamlToFile(inspectionResult.localStorage, "local-storage.yml"); + reporter.saveYamlToFile(inspectionResult.beacons, "beacons.yml"); reporter.saveYamlToFile(inspectionResult, "inspection.yml"); const htmlReport = reporter.generateHtmlReport(inspectionResult); await reporter.saveAsOfficeDoc(inspectionResult); diff --git a/src/commands/reporterCommand.ts b/src/commands/reporterCommand.ts index dedde67..4a81558 100644 --- a/src/commands/reporterCommand.ts +++ b/src/commands/reporterCommand.ts @@ -88,6 +88,11 @@ async function runReporter(args: ParsedArgsReporter) { officeTemplate: args.officeTemplate, usePandoc: args.usePandoc || false, extraFiles: args.extraFiles, + printHtmlToConsole: !args.outputFile, + printJsonToConsole: false, + pdf: false, + printYamlToConsole: false, + outputPath: ".", }, logger, ); @@ -110,11 +115,7 @@ async function runReporter(args: ParsedArgsReporter) { switch (fileExtension) { case ".pdf": - const htmlContent = reporter.generateHtmlReport( - collectionData, - undefined, - false, - ); + const htmlContent = reporter.generateHtmlReport(collectionData); const pdfBuffer = await reporter.convertHtmlToPdfInMemory(htmlContent); reporter.saveFile(args.outputFile, pdfBuffer); break; @@ -123,12 +124,7 @@ async function runReporter(args: ParsedArgsReporter) { await reporter.saveAsOfficeDoc(collectionData, args.outputFile); break; case ".html": - const html = reporter.generateHtmlReport( - collectionData, - undefined, - false, - ); - reporter.saveFile(args.outputFile, html); + reporter.generateHtmlReport(collectionData, args.outputFile); break; default: logger.warn(`File extension ${fileExtension} is not supported.`); diff --git a/src/reporter/reporter.ts b/src/reporter/reporter.ts index 85c30d7..fcb452d 100644 --- a/src/reporter/reporter.ts +++ b/src/reporter/reporter.ts @@ -37,21 +37,21 @@ marked.use(markedSmartypants()); export interface ReporterOptions { outputPath?: string; - json?: boolean; - yaml?: boolean; - html?: boolean; - pdf?: boolean; - usePandoc?: boolean; + printJsonToConsole: boolean; + printYamlToConsole: boolean; + printHtmlToConsole: boolean; + pdf: boolean; + usePandoc: boolean; htmlTemplate?: string; officeTemplate?: string; - extraFiles?: any[]; + extraFiles: string[]; } export class Reporter { private options: ReporterOptions; private logger: Logger; - constructor(options: ReporterOptions = {}, logger: Logger) { + constructor(options: ReporterOptions, logger: Logger) { this.options = options; this.logger = logger; } @@ -62,7 +62,6 @@ export class Reporter { generateHtmlReport( inspectionData: object, outputFilename = "inspection.html", - log = true, customTemplate?: string, ) { const templatePath = @@ -74,7 +73,7 @@ export class Reporter { this.saveFile(outputFilename, htmlReport); - if (log && this.options.html) { + if (this.options.printHtmlToConsole) { this.logger.info(htmlReport); } @@ -197,17 +196,17 @@ export class Reporter { } } - saveJsonToFile(data: any, filename: string, log = true) { + saveJsonToFile(data: any, filename: string) { const json_dump = JSON.stringify(data, null, 2); this.saveFile(filename, json_dump); - if (log && this.options.json) { + if (this.options.printJsonToConsole) { this.logger.info(json_dump); } } - saveYamlToFile(data: any, filename: string, log = true) { + saveYamlToFile(data: any, filename: string) { const yaml_dump = yaml.dump(data, { noRefs: true, replacer: function replacer(_, value) { @@ -217,7 +216,7 @@ export class Reporter { this.saveFile(filename, yaml_dump); - if (log && this.options.yaml) { + if (this.options.printYamlToConsole) { this.logger.info(yaml_dump); } } diff --git a/src/server/runCollection.ts b/src/server/runCollection.ts index 7e5a02b..406b359 100644 --- a/src/server/runCollection.ts +++ b/src/server/runCollection.ts @@ -43,20 +43,17 @@ export async function generateHtmlAndPdf( logger: Logger, ) { let reporterArgs: ReporterOptions = { - html: true, + printHtmlToConsole: false, pdf: true, - json: false, + printJsonToConsole: false, outputPath: undefined, usePandoc: false, - yaml: false, + printYamlToConsole: false, + extraFiles: [], }; const reporter = new Reporter(reporterArgs, logger); - let html = reporter.generateHtmlReport( - inspectionOutput, - "inspection.html", - false, - ); + let html = reporter.generateHtmlReport(inspectionOutput, "inspection.html"); let pdfBuffer = await reporter.convertHtmlToPdfInMemory(html); return { html: html, -- GitLab From 9284452dc8021f1035b1edaf2150a6280231e29f Mon Sep 17 00:00:00 2001 From: Felix Woestmann <felix.wostmann@edps.europa.eu> Date: Fri, 21 Mar 2025 17:45:55 +0100 Subject: [PATCH 7/9] refactor: Remove superfluous call of generateHTML report --- src/commands/reporterCommand.ts | 8 -------- 1 file changed, 8 deletions(-) diff --git a/src/commands/reporterCommand.ts b/src/commands/reporterCommand.ts index 4a81558..a98dae1 100644 --- a/src/commands/reporterCommand.ts +++ b/src/commands/reporterCommand.ts @@ -97,14 +97,6 @@ async function runReporter(args: ParsedArgsReporter) { logger, ); - const htmlReport = reporter.generateHtmlReport(collectionData); - - // Print to console when no output file is specified - if (!args.outputFile) { - logger.info(htmlReport); - return; - } - const fileExtension = path.extname(args.outputFile).toLowerCase(); const outputDir = path.dirname(args.outputFile); -- GitLab From 3436325db86bccfefaf7bcad4de5ae4776701302 Mon Sep 17 00:00:00 2001 From: Felix Woestmann <felix.wostmann@edps.europa.eu> Date: Fri, 21 Mar 2025 17:46:29 +0100 Subject: [PATCH 8/9] refactor: Improve typing of arguments for RunReporter function --- src/commands/reporterCommand.ts | 48 ++++++++++++++++----------------- 1 file changed, 23 insertions(+), 25 deletions(-) diff --git a/src/commands/reporterCommand.ts b/src/commands/reporterCommand.ts index a98dae1..dbf11cc 100644 --- a/src/commands/reporterCommand.ts +++ b/src/commands/reporterCommand.ts @@ -65,17 +65,37 @@ export default { .nargs("output-file", 1) .alias("output-file", "o") .string("output-file") - .check((argv: ParsedArgsReporter) => { + .check((argv: any) => { if (!argv._[1]) { return "Error: You must provide a file name or path"; } return true; }); }, - handler: async (argv: any) => await runReporter(transformArgsToObject(argv)), + handler: async (argv: any) => { + const runReporterArgs: RunReporterArgs = { + inspectionJsonPath: argv._[1], + htmlTemplate: argv.htmlTemplate, + officeTemplate: argv.officeTemplate, + usePandoc: argv.usePandoc, + extraFiles: argv.extraFile || [], + outputFile: argv.outputFile, + }; + + await runReporter(runReporterArgs); + }, +}; + +type RunReporterArgs = { + htmlTemplate?: string; + officeTemplate?: string; + usePandoc: boolean; + extraFiles: string[]; + outputFile?: string; + inspectionJsonPath: string; }; -async function runReporter(args: ParsedArgsReporter) { +async function runReporter(args: RunReporterArgs) { const logger = create({}); const collectionData = JSON.parse( @@ -123,25 +143,3 @@ async function runReporter(args: ParsedArgsReporter) { break; } } - -function transformArgsToObject(parsingResult: any): ParsedArgsReporter { - return { - _: parsingResult._ as string[], - inspectionJsonPath: parsingResult._[1] as string, - outputFile: parsingResult["output-file"] as string, - htmlTemplate: parsingResult["html-template"] as string | undefined, - officeTemplate: parsingResult["office-template"] as string | undefined, - extraFiles: parsingResult["extra-file"] as any[] | undefined, - usePandoc: parsingResult["use-pandoc"] as boolean | undefined, - }; -} - -interface ParsedArgsReporter { - _: (string | number)[]; - inspectionJsonPath: string; - outputFile?: string; - htmlTemplate?: string; - officeTemplate?: string; - extraFiles?: any[]; - usePandoc?: boolean; -} -- GitLab From 7a7433c792feb7a9ea8f24dc477c19c62f5611e5 Mon Sep 17 00:00:00 2001 From: Felix Woestmann <felix.wostmann@edps.europa.eu> Date: Fri, 21 Mar 2025 17:46:46 +0100 Subject: [PATCH 9/9] refactor: add logging of filenames and change default log level --- src/lib/logger.ts | 5 +++-- src/reporter/reporter.ts | 4 +++- 2 files changed, 6 insertions(+), 3 deletions(-) diff --git a/src/lib/logger.ts b/src/lib/logger.ts index 4f8096b..d120b19 100644 --- a/src/lib/logger.ts +++ b/src/lib/logger.ts @@ -39,11 +39,12 @@ const create = ( options: CreateLoggerOptions, outputFilePath?: string, defaultMeta?: {}, + defaultLogLevel = "info", ): Logger => { const defaults: CreateLoggerOptions = { console: { silent: false, - level: "debug", + level: defaultLogLevel, stderrLevels: ["error", "debug", "info", "warn"], format: process.stdout.isTTY ? format.combine(format.colorize(), format.simple(), format.metadata()) @@ -51,7 +52,7 @@ const create = ( }, file: { enabled: true, - level: "silly", + level: defaultLogLevel, format: format.combine(format.json(), format.metadata()), }, }; diff --git a/src/reporter/reporter.ts b/src/reporter/reporter.ts index fcb452d..c0e081c 100644 --- a/src/reporter/reporter.ts +++ b/src/reporter/reporter.ts @@ -229,7 +229,9 @@ export class Reporter { saveFile(filename: string, data: any) { if (this.options.outputPath) { - fs.writeFileSync(path.join(this.options.outputPath, filename), data); + let effectivePath = path.join(this.options.outputPath, filename); + this.logger.debug(`Saving file to ${effectivePath}`); + fs.writeFileSync(effectivePath, data); } } } -- GitLab