locator/index.js

#!/usr/bin/env node

const fs = require('fs').promises;
const process = require('process');
const puppeteer = require('puppeteer');

// Size of the rendering of the web page
const size = { width: 1280, height: 720 };

async function main() {

    if (process.argv[2] === undefined) {
        console.error('This program expects an argument.');
        console.error('USAGE: locator <path-to-HTML-file>');
        process.exit(1);
    }

    // Path to the HTML file to analyse (given as relative path from current directory)
    // We need the full path so that puppeteer is able to access it
    const path = process.argv[2].startsWith('/') ? process.argv[2] : process.cwd() + '/' + process.argv[2];

    // Check that the file exists
    try {
        await fs.access(path, fs.constants.F_OK);
    } catch (e) {
        console.error('No such file: ' + path);
        process.exit(1);
    }

    // Initialize browser
    const browser = await puppeteer.launch();
    const page = await browser.newPage();
    await page.setViewport(size);
    await page.goto('file://' + path);

    // Only consider the first slide (#\\331 === #1, which is the id of the first slide)
    // We don't take into account the other slides because it will mess up with our screenshot varification
    let root = await page.$('#\\31');

    // If there is no slide, try to run on HTML body
    if (root === null) {
        console.error('Not a marp HTML file.');
        process.exit(1);
    }

    // Take a first screenshot
    await page.screenshot({path: __dirname + '/' + 'screenshot1.png'});

    // Edit the page to shrink elements in order to get better bounding boxes
    let withSpan = await addSpan(root);

    // Take another screenshot and check the modification we made didn't change the layout of the page
    await page.screenshot({path: __dirname + '/' + 'screenshot2.png'});

    // Compare both screenshots
    let file1 = await fs.readFile(__dirname + '/' + 'screenshot1.png');
    let file2 = await fs.readFile(__dirname + '/' + 'screenshot2.png');
    let filesAreSame = file1.map((x, i) => x === file2[i]).reduce((a, b) => a && b, true);

    // Crash if they're different
    if (!filesAreSame) {
        throw new Error("Page edit changed the layout");
    }

    // Analyse the root and output the result
    let analyse = await analyseElement(root);

    if (process.argv[3] === '--flatten') {
        analyse = flatten(analyse);
    }

    console.log(JSON.stringify(analyse, undefined, 4));

    await browser.close();

}

// Traverses the text nodes of the element and put every text into a single span.
async function addSpan(element) {
    let elts = await element.$$('*');

    for (let elt of elts) {
        let value = await elt.evaluate(el => el.textContent, element);
        if (value !== "") {
            await elt.evaluate(el => {
                if (el.innerHTML.indexOf('<li>') === -1) {
                    el.innerHTML = '<span class="sized-span">' + el.innerHTML + '</span>';
                    return true;
                } else {
                    return false;
                }
            });
        }
    }

}

// Recursive function to analyse an HTML element.
// The output is written in hierarchy.
async function analyseElement(element) {
    // Get some information on the element
    let tagAttr = await element.getProperty('tagName');
    let tagName = await tagAttr.jsonValue();

    let classAttr = await element.getProperty('className');
    let className = await classAttr.jsonValue();

    let textContent = await element.evaluate(el => el.textContent, element);

    let box = await element.boundingBox();

    // Register it into the return value
    let analyse = {};
    analyse.tag = tagName;
    analyse.class = className;
    analyse.box = box;
    box.x /= size.width;
    box.width /= size.width;
    box.y /= size.height;
    box.height /= size.height;
    analyse.children = [];

    // Extract the text content if it is a span (we made those spans by ourselves in the addSpan function)
    if (tagName === 'SPAN' && textContent !== "") {
        analyse.text = textContent;
    } else {
        analyse.text = null;
    }

    // Select the children of this HTML element.
    let children = await element.$$('> *');

    for (let child of children) {
        // Recursively analyse the children
        analyse.children.push(await analyseElement(child));
    }

    return analyse;
}

// Flattens the tree into a list.
function flatten(input, acc = []) {
    let children = input.children;
    let child = children[0];
    delete input["children"];

    switch (children.length) {
        case 0:
            acc.push(input);
            break;

        case 1:
            if (child.tag === "SPAN" && child.class.indexOf("sized-span") !== -1) {
                child.tag = input.tag;
                acc.push(child);
                break;
            }
            // There is purposefully no break here, if the condition above is false, we want to do the default treatment

        default:
            acc.push(input);
            for (let child of children) {
                flatten(child, acc);
            }
            break;
    }

    return acc;
}


main();