From 58afd8ac3e5b4e2a1f8167fd80399ce75884b51e Mon Sep 17 00:00:00 2001 From: Thomas Forgione Date: Sun, 19 Feb 2023 10:42:30 +0100 Subject: [PATCH] Merge --- index.js | 149 ++++++++++++++++++++++++------------------------------ index2.js | 89 -------------------------------- 2 files changed, 67 insertions(+), 171 deletions(-) delete mode 100644 index2.js diff --git a/index.js b/index.js index 21b7368..6ffa309 100644 --- a/index.js +++ b/index.js @@ -4,11 +4,14 @@ const fs = require('fs').promises; const process = require('process'); const puppeteer = require('puppeteer'); +// Size of the rendering of the web page +const size = { width: 1280, height: 720 }; + async function main() { if (process.argv[2] === undefined) { - console.error("This program expects an argument."); - console.error("USAGE: locator "); + console.error('This program expects an argument.'); + console.error('USAGE: locator '); process.exit(1); } @@ -20,44 +23,25 @@ async function main() { try { await fs.access(path, fs.constants.F_OK); } catch (e) { - console.error("No such file: " + path); + console.error('No such file: ' + path); process.exit(1); } - // Size of the rendering of the web page - const size = { width: 1280, height: 720 }; - // Initialize browser const browser = await puppeteer.launch(); const page = await browser.newPage(); await page.setViewport(size); - await page.goto("file://" + path); + await page.goto('file://' + path); - // This will contain all the collected information - let info = []; + // Only consider the first slide (#\\331 === #1, which is the id of the first slide) + // We don't take into account the other slides because it will mess up with our screenshot varification + let root = await page.$('#\\31'); // Take a first screenshot await page.screenshot({path: __dirname + '/' + 'screenshot1.png'}); // Edit the page to shrink elements in order to get better bounding boxes - for (let selector of ["h1", "h2", "h3", "h4", "h5", "h6", "a", "img", "p", "ul", "ol", "li"]) { - - let query = selector; - - let shouldCreateSpan = query !== "ul" && query !== "ol" && query != "img"; - - if (shouldCreateSpan) { - - await page.evaluate(([query]) => { - for (let e of document.querySelectorAll(query)) { - if (e.children.length === 0) { - e.innerHTML = '' + e.innerHTML + ''; - } - } - - }, [query]); - } - } + let withSpan = await addSpan(root); // Take another screenshot and check the modification we made didn't change the layout of the page await page.screenshot({path: __dirname + '/' + 'screenshot2.png'}); @@ -72,63 +56,64 @@ async function main() { throw new Error("Page edit changed the layout"); } - for (let selector of ["h1", "h2", "h3", "h4", "h5", "h6", "a", "img", "p", "ul", "ol", "li"]) { - - let query = selector; - - // Shrink the elements horizontally (to be able to get better bounding boxes) - let shouldCreateSpan = query !== "ul" && query !== "ol" && query != "img"; - - // Query the considered element - let parents = await page.$$(query); - let elements = await page.$$(query + (shouldCreateSpan ? ' > *:first-child' : '')); - - for (let index = 0; index < elements.length; index ++) { - - let parent = parents[index]; - let element = elements[index]; - - let box = await element.boundingBox(); - - let classElement = shouldCreateSpan ? parent : element; - let classNameAttr = await classElement.getProperty('className'); - let className = await classNameAttr.jsonValue(); - - // Scale the bounding box - box.x /= size.width; - box.width /= size.width; - box.y /= size.height; - box.height /= size.height; - - // Give the selector as type - box.type = selector; - - if (className !== "") { - box.class = className; - } - - info.push(box); - } - - } - - // Sort the info by y and the x (top to bottom, then left to right) - info.sort((a, b) => { - if (a.y < b.y || (a.y == b.y && a.x < b.x)) { - return -1; - } - - if (a.y > b.y || (a.y == b.y && a.x > b.x)) { - return 1; - } - - return 0; - }); - - // Pretty print the output info - console.log(JSON.stringify(info, undefined, 4)); + // Analyse the root and output the result + let analyse = await analyseElement(root); + console.log(JSON.stringify(analyse, undefined, 4)); await browser.close(); + } +// Traverses the text nodes of the element and put every text into a single span. +async function addSpan(element) { + let elts = await element.$$('*'); + + for (let elt of elts) { + let value = await elt.evaluate(el => el.textContent, element); + if (value !== "") { + await elt.evaluate(el => el.innerHTML = '' + el.innerHTML + ''); + } + } +} + +// Recursive function to analyse an HTML element. +// The output is written in hierarchy. +async function analyseElement(element) { + // Get some information on the element + let tagAttr = await element.getProperty('tagName'); + let tagName = await tagAttr.jsonValue(); + + let classAttr = await element.getProperty('className'); + let className = await classAttr.jsonValue(); + + let textContent = await element.evaluate(el => el.textContent, element); + + let box = await element.boundingBox(); + + // Register it into the return value + let analyse = {}; + analyse.tag = tagName; + analyse.class = className; + analyse.box = box; + analyse.children = []; + + // Extract the text content if it is a span (we made those spans by ourselves in the addSpan function) + if (tagName === 'SPAN' && textContent !== "") { + analyse.text = textContent; + } + + + // Select the children of this HTML element. + let children = await element.$$('> *'); + + for (let child of children) { + // Recursively analyse the children + analyse.children.push(await analyseElement(child)); + } + + return analyse; +} + + + main(); diff --git a/index2.js b/index2.js deleted file mode 100644 index 0c734d3..0000000 --- a/index2.js +++ /dev/null @@ -1,89 +0,0 @@ -#!/usr/bin/env node - -const fs = require('fs').promises; -const process = require('process'); -const puppeteer = require('puppeteer'); - -// Size of the rendering of the web page -const size = { width: 1280, height: 720 }; - -async function main() { - - if (process.argv[2] === undefined) { - console.error("This program expects an argument."); - console.error("USAGE: locator "); - process.exit(1); - } - - // Path to the HTML file to analyse (given as relative path from current directory) - // We need the full path so that puppeteer is able to access it - const path = process.argv[2].startsWith('/') ? process.argv[2] : process.cwd() + '/' + process.argv[2]; - - // Check that the file exists - try { - await fs.access(path, fs.constants.F_OK); - } catch (e) { - console.error("No such file: " + path); - process.exit(1); - } - - // Initialize browser - const browser = await puppeteer.launch(); - const page = await browser.newPage(); - await page.setViewport(size); - await page.goto("file://" + path); - - let currentSlide = 1; - let hierarchy = []; - - while (true) { - let root = await page.$("#\\3" + currentSlide); - - if (root === null) { - break; - } - - let currentInfo = {}; - hierarchy.push(currentInfo); - await analyseElement(root, currentInfo); - - currentSlide++; - } - - console.log(JSON.stringify(hierarchy, undefined, 4)); - - await browser.close(); - -} - - -async function analyseElement(element, hierarchy, tabs = '', stop = false) { - let tagAttr = await element.getProperty("tagName"); - let tagName = await tagAttr.jsonValue(); - - let classAttr = await element.getProperty("className"); - let className = await classAttr.jsonValue(); - - let box = await element.boundingBox(); - hierarchy.tag = tagName; - hierarchy.class = className; - hierarchy.x = box.x / size.width; - hierarchy.width = box.width / size.width; - hierarchy.y = box.y / size.height; - hierarchy.height = box.height / size.height; - hierarchy.children = []; - - console.log(tabs + tagName + ' "' + className + '" ' + JSON.stringify(box)); - - let children = await element.$$('> *'); - - for (let child of children) { - let currentInfo = {}; - hierarchy.children.push(currentInfo); - await analyseElement(child, currentInfo, tabs + ' ', true); - } -} - - - -main();