locator/index.js

87 lines
2.3 KiB
JavaScript

#!/usr/bin/env node
const fs = require('fs').promises;
const process = require('process');
const puppeteer = require('puppeteer');
async function main() {
if (process.argv[2] === undefined) {
console.error("This program expects an argument.");
console.error("USAGE: locator <path-to-HTML-file>");
process.exit(1);
}
// Path to the HTML file to analyse (given as relative path from current directory)
// We need the full path so that puppeteer is able to access it
const path = process.argv[2].startsWith('/') ? process.argv[2] : process.cwd() + '/' + process.argv[2];
// Check that the file exists
try {
await fs.access(path, fs.constants.F_OK);
} catch (e) {
console.error("No such file: " + path);
process.exit(1);
}
// Size of the rendering of the web page
const size = { width: 1280, height: 720 };
// Initialize browser
const browser = await puppeteer.launch();
const page = await browser.newPage();
await page.setViewport(size);
await page.goto("file://" + path);
// This will contain all the collected information
let info = [];
for (let selector of ["h1", "h2", "h3", "h4", "h5", "h6", "a", "img", "p", "ul", "ol", "li"]) {
let query = selector;
if (query === "p") {
// Skip paragraphs that have children (which means that they are not text paragraphs)
query += ":not(:has(*))";
}
// Query the considered element
let elements = await page.$$(query);
for (let element of elements) {
let box = await element.boundingBox();
// Scale the bounding box
box.x /= size.width;
box.width /= size.width;
box.y /= size.height;
box.height /= size.height;
// Give the selector as type
box.type = selector;
info.push(box);
}
}
// Sort the info by y and the x (top to bottom, then left to right)
info.sort((a, b) => {
if (a.y < b.y || (a.y == b.y && a.x < b.x)) {
return -1;
}
if (a.y > b.y || (a.y == b.y && a.x > b.x)) {
return 1;
}
return 0;
});
// Pretty print the output info
console.log(JSON.stringify(info, undefined, 4));
await browser.close();
}
main();