diff --git a/index.js b/index.js index 16c629b..8bdfcad 100644 --- a/index.js +++ b/index.js @@ -36,18 +36,54 @@ async function main() { // This will contain all the collected information let info = []; + // Take a first screenshot + await page.screenshot({path: __dirname + '/' + 'screenshot1.png'}); + + // Edit the page to shrink elements in order to get better bounding boxes for (let selector of ["h1", "h2", "h3", "h4", "h5", "h6", "a", "img", "p", "ul", "ol", "li"]) { let query = selector; - if (query === "p") { - // Skip paragraphs that have children (which means that they are not text paragraphs) - query += ":not(:has(*))"; + + let shouldCreateSpan = query !== "ul" && query !== "ol" && query != "img"; + + if (shouldCreateSpan) { + + await page.evaluate(([query]) => { + for (let e of document.querySelectorAll(query)) { + if (e.children.length === 0) { + e.innerHTML = '' + e.innerHTML + ''; + } + } + + }, [query]); } + } + + // Take another screenshot and check the modification we made didn't change the layout of the page + await page.screenshot({path: __dirname + '/' + 'screenshot2.png'}); + + // Compare both screenshots + let file1 = await fs.readFile(__dirname + '/' + 'screenshot1.png'); + let file2 = await fs.readFile(__dirname + '/' + 'screenshot2.png'); + let filesAreSame = file1.map((x, i) => x === file2[i]).reduce((a, b) => a && b, true); + + // Crash if they're different + if (!filesAreSame) { + throw new Error("Page edit changed the layout"); + } + + for (let selector of ["h1", "h2", "h3", "h4", "h5", "h6", "a", "img", "p", "ul", "ol", "li"]) { + + let query = selector; + + // Shrink the elements horizontally (to be able to get better bounding boxes) + let shouldCreateSpan = query !== "ul" && query !== "ol" && query != "img"; // Query the considered element - let elements = await page.$$(query); + let elements = await page.$$(query + (shouldCreateSpan ? ' > *:first-child' : '')); for (let element of elements) { + let box = await element.boundingBox(); // Scale the bounding box