Locator starts working great
This commit is contained in:
parent
68eeda7aa6
commit
14f5d327b3
22
example.html
22
example.html
File diff suppressed because one or more lines are too long
|
@ -1,7 +1,43 @@
|
||||||
const { PNG } = require('pngjs');
|
const { PNG } = require('pngjs');
|
||||||
const fs = require('fs');
|
const fs = require('fs');
|
||||||
|
|
||||||
// Fixed from https://www.npmjs.com/package/png-quality
|
async function segmentationMask(input1, input2, output, threshold = 0.02) {
|
||||||
|
let img1 = await loadPngFile(input1);
|
||||||
|
let img2 = await loadPngFile(input2);
|
||||||
|
|
||||||
|
if (img1.width !== img2.width || img1.height !== img2.height) {
|
||||||
|
throw new Error("Cannot compute mask on images with different sizes");
|
||||||
|
}
|
||||||
|
|
||||||
|
for (let i = 0; i < img1.data.length; i += 4) {
|
||||||
|
let r1 = img1.data[i + 0] / 255;
|
||||||
|
let g1 = img1.data[i + 1] / 255;
|
||||||
|
let b1 = img1.data[i + 2] / 255;
|
||||||
|
|
||||||
|
let r2 = img2.data[i + 0] / 255;
|
||||||
|
let g2 = img2.data[i + 1] / 255;
|
||||||
|
let b2 = img2.data[i + 2] / 255;
|
||||||
|
|
||||||
|
// Test difference
|
||||||
|
let difference = Math.sqrt((r1 - r2) * (r1 - r2) + (g1 - g2) * (g1 - g2) + (b1 - b2) * (b1 - b2));
|
||||||
|
let pixelsAreDifferent = difference > threshold;
|
||||||
|
|
||||||
|
if (pixelsAreDifferent) {
|
||||||
|
img1.data[i + 0] = 255;
|
||||||
|
img1.data[i + 1] = 255;
|
||||||
|
img1.data[i + 2] = 255;
|
||||||
|
} else {
|
||||||
|
img1.data[i + 0] = 0;
|
||||||
|
img1.data[i + 1] = 0;
|
||||||
|
img1.data[i + 2] = 0;
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
let outputStream = fs.createWriteStream(output);
|
||||||
|
await img1.pack().pipe(outputStream);
|
||||||
|
}
|
||||||
|
|
||||||
|
// The following is fixed from https://www.npmjs.com/package/png-quality
|
||||||
|
|
||||||
async function loadPngFile(pathOrBuffer) {
|
async function loadPngFile(pathOrBuffer) {
|
||||||
// Load buffer of path
|
// Load buffer of path
|
||||||
|
@ -62,4 +98,5 @@ module.exports = {
|
||||||
loadPngFile,
|
loadPngFile,
|
||||||
mse,
|
mse,
|
||||||
psnr,
|
psnr,
|
||||||
|
segmentationMask,
|
||||||
};
|
};
|
226
index.js
226
index.js
|
@ -3,7 +3,7 @@
|
||||||
const fs = require('fs').promises;
|
const fs = require('fs').promises;
|
||||||
const process = require('process');
|
const process = require('process');
|
||||||
const puppeteer = require('puppeteer');
|
const puppeteer = require('puppeteer');
|
||||||
const quality = require('./quality.js');
|
const image = require('./image.js');
|
||||||
const uuid = require('uuid').v4;
|
const uuid = require('uuid').v4;
|
||||||
|
|
||||||
// Size of the rendering of the web page
|
// Size of the rendering of the web page
|
||||||
|
@ -34,36 +34,181 @@ async function eprint(data) {
|
||||||
}
|
}
|
||||||
|
|
||||||
async function println(data) {
|
async function println(data) {
|
||||||
await write(process.stdout, data + '\n');
|
await write(process.stdout, (data ? data : "") + '\n');
|
||||||
}
|
}
|
||||||
|
|
||||||
async function eprintln(data) {
|
async function eprintln(data) {
|
||||||
await write(process.stderr, data + '\n');
|
await write(process.stderr, (data ? data : "") + '\n');
|
||||||
|
}
|
||||||
|
|
||||||
|
async function info(data) {
|
||||||
|
await eprintln("\x1b[34;1minfo\x1b[0m\x1b[1m:\x1b[0m " + data);
|
||||||
|
}
|
||||||
|
|
||||||
|
async function warning(data) {
|
||||||
|
await eprintln("\x1b[33;1mwarning\x1b[0m\x1b[1m:\x1b[0m " + data);
|
||||||
|
}
|
||||||
|
|
||||||
|
async function error(data) {
|
||||||
|
await eprintln("\x1b[31;1merror\x1b[0m\x1b[1m:\x1b[0m " + data);
|
||||||
|
}
|
||||||
|
|
||||||
|
function help() {
|
||||||
|
const name = "\x1b[32mlocator\x1b[0m";
|
||||||
|
const version = "0.1.0";
|
||||||
|
const description = "Helper tool to analyse HTML content produced from marp slides";
|
||||||
|
const command = "locator";
|
||||||
|
|
||||||
|
const usage = "\x1b[33mUSAGE:\x1b[0m";
|
||||||
|
const args = "\x1b[33mARGUMENTS:\x1b[0m";
|
||||||
|
|
||||||
|
const helpShort = "\x1b[32m-h\x1b[0m";
|
||||||
|
const helpLong = "\x1b[32m--help\x1b[0m";
|
||||||
|
|
||||||
|
const inputShort = "\x1b[32m-i\x1b[0m";
|
||||||
|
const inputLong = "\x1b[32m--input\x1b[0m";
|
||||||
|
|
||||||
|
const outputShort = "\x1b[32m-o\x1b[0m";
|
||||||
|
const outputLong = "\x1b[32m--output\x1b[0m";
|
||||||
|
|
||||||
|
const forceShort = "\x1b[32m-f\x1b[0m";
|
||||||
|
const forceLong = "\x1b[32m--force\x1b[0m";
|
||||||
|
|
||||||
|
const shrinkShort = "\x1b[32m-s\x1b[0m";
|
||||||
|
const shrinkLong = "\x1b[32m--shrink\x1b[0m";
|
||||||
|
|
||||||
|
const flattenLong = "\x1b[32m--flatten\x1b[0m";
|
||||||
|
|
||||||
|
println(`${name} ${version}
|
||||||
|
${description}
|
||||||
|
|
||||||
|
${usage}
|
||||||
|
${command} -i <HTML-FILE>
|
||||||
|
|
||||||
|
${args}
|
||||||
|
${helpShort}, ${helpLong} Displays this help and quit
|
||||||
|
${inputShort}, ${inputLong} <INPUT> Path to the HTML input file
|
||||||
|
${outputShort}, ${outputLong} <OUTPUT> Save mask images and annotations as json in this directory
|
||||||
|
${shrinkShort}, ${shrinkLong} Replace leafs of tree by span of themselves to shrink horizontally their bounding boxes
|
||||||
|
${flattenLong} Flattens the tree into a list before serializing in JSON
|
||||||
|
${forceShort}, ${forceLong} Delete the output directory before generating masks it again`);
|
||||||
}
|
}
|
||||||
|
|
||||||
async function main() {
|
async function main() {
|
||||||
|
|
||||||
let outputDir = null;
|
let outputDir = null;
|
||||||
let filename = process.argv[2];
|
let filename = null;
|
||||||
|
let threshold = undefined;
|
||||||
|
let forceMode = false;
|
||||||
|
let shrinkBoxes = true;
|
||||||
|
let flatten = false;
|
||||||
|
|
||||||
if (process.argv[2] === "-o" || process.argv[2] === "--output") {
|
let argIndex = 2;
|
||||||
outputDir = process.argv[3];
|
|
||||||
filename = process.argv[4];
|
while (argIndex < process.argv.length) {
|
||||||
|
|
||||||
|
switch (process.argv[argIndex]) {
|
||||||
|
case "-i":
|
||||||
|
case "--input":
|
||||||
|
filename = process.argv[argIndex + 1];
|
||||||
|
argIndex += 2;
|
||||||
|
break;
|
||||||
|
|
||||||
|
case "-o":
|
||||||
|
case "--output":
|
||||||
|
outputDir = process.argv[argIndex + 1];
|
||||||
|
argIndex += 2;
|
||||||
|
break;
|
||||||
|
|
||||||
|
case "-f":
|
||||||
|
case "--force":
|
||||||
|
forceMode = true;
|
||||||
|
argIndex++;
|
||||||
|
break;
|
||||||
|
|
||||||
|
case "-t":
|
||||||
|
case "--threshold":
|
||||||
|
threshold = parseFloat(process.argv[argIndex + 1]);
|
||||||
|
argIndex += 2;
|
||||||
|
break;
|
||||||
|
|
||||||
|
case "-s":
|
||||||
|
case "--shrink":
|
||||||
|
shrinkBoxes = true;
|
||||||
|
argIndex++;
|
||||||
|
break;
|
||||||
|
|
||||||
|
case "--flatten":
|
||||||
|
flatten = true;
|
||||||
|
argIndex++;
|
||||||
|
break;
|
||||||
|
|
||||||
|
case "-h":
|
||||||
|
case "--help":
|
||||||
|
help();
|
||||||
|
process.exit(0);
|
||||||
|
|
||||||
|
default:
|
||||||
|
error("unknown option " + process.argv[argIndex]);
|
||||||
|
help();
|
||||||
|
process.exit(1);
|
||||||
|
break;
|
||||||
}
|
}
|
||||||
|
|
||||||
|
}
|
||||||
|
|
||||||
|
if (filename === null) {
|
||||||
|
error("program needs a filename argument");
|
||||||
|
help();
|
||||||
|
process.exit(1);
|
||||||
|
}
|
||||||
|
|
||||||
|
if (threshold !== undefined && isNaN(threshold)) {
|
||||||
|
error(tmp + " is not a valid threshold value");
|
||||||
|
process.exit(1);
|
||||||
|
}
|
||||||
|
|
||||||
|
if (outputDir !== null) {
|
||||||
|
|
||||||
|
if (forceMode === false) {
|
||||||
|
|
||||||
try {
|
try {
|
||||||
await fs.mkdir(outputDir);
|
await fs.mkdir(outputDir);
|
||||||
} catch (e) {
|
} catch (e) {
|
||||||
eprintln("Couldn't create directory " + outputDir + ": " + e);
|
error("couldn't create directory " + outputDir + ": " + e);
|
||||||
process.exit(1);
|
process.exit(1);
|
||||||
}
|
}
|
||||||
|
|
||||||
if (filename === undefined) {
|
} else {
|
||||||
eprintln('This program expects an argument.');
|
|
||||||
eprintln('USAGE: locator <path-to-HTML-file>');
|
try {
|
||||||
|
// If we can just create the directory, we don't need to do anything more
|
||||||
|
await fs.mkdir(outputDir);
|
||||||
|
|
||||||
|
} catch (e) {
|
||||||
|
|
||||||
|
// If it fails, we must try to delete it to recreate it because we're in force mode
|
||||||
|
|
||||||
|
try {
|
||||||
|
// Try to access .locator file
|
||||||
|
// If it exists, it is likely that we generated the directory, and therefore, it is safe to delete
|
||||||
|
await fs.access(outputDir + "/.locator", fs.constants.F_OK);
|
||||||
|
} catch (e) {
|
||||||
|
// If the file doesn't exist, we don't really know what we would be deleting so we should avoid it
|
||||||
|
error(outputDir + " doesn't seem to have been generated by locator, not deleting and quitting");
|
||||||
process.exit(1);
|
process.exit(1);
|
||||||
}
|
}
|
||||||
|
|
||||||
|
await fs.rm(outputDir, { recursive: true, force: true });
|
||||||
|
await fs.mkdir(outputDir);
|
||||||
|
}
|
||||||
|
|
||||||
|
let lock = await fs.open(outputDir + "/.locator", 'a');
|
||||||
|
await lock.close();
|
||||||
|
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
// Path to the HTML file to analyse (given as relative path from current directory)
|
// Path to the HTML file to analyse (given as relative path from current directory)
|
||||||
// We need the full path so that puppeteer is able to access it
|
// We need the full path so that puppeteer is able to access it
|
||||||
const path = filename.startsWith('/') ? filename : process.cwd() + '/' + filename;
|
const path = filename.startsWith('/') ? filename : process.cwd() + '/' + filename;
|
||||||
|
@ -72,7 +217,7 @@ async function main() {
|
||||||
try {
|
try {
|
||||||
await fs.access(path, fs.constants.F_OK);
|
await fs.access(path, fs.constants.F_OK);
|
||||||
} catch (e) {
|
} catch (e) {
|
||||||
eprintln('No such file: ' + path);
|
error('no such file: ' + path);
|
||||||
process.exit(1);
|
process.exit(1);
|
||||||
}
|
}
|
||||||
|
|
||||||
|
@ -88,7 +233,7 @@ async function main() {
|
||||||
|
|
||||||
// If there is no slide, try to run on HTML body
|
// If there is no slide, try to run on HTML body
|
||||||
if (root === null) {
|
if (root === null) {
|
||||||
eprintln('Not a marp HTML file.');
|
error('not a marp HTML file');
|
||||||
process.exit(1);
|
process.exit(1);
|
||||||
}
|
}
|
||||||
|
|
||||||
|
@ -103,7 +248,12 @@ async function main() {
|
||||||
await page.screenshot({path: (outputDir === null ? __dirname : outputDir) + '/' + 'screenshot1.png'});
|
await page.screenshot({path: (outputDir === null ? __dirname : outputDir) + '/' + 'screenshot1.png'});
|
||||||
|
|
||||||
// Edit the page to shrink elements in order to get better bounding boxes
|
// Edit the page to shrink elements in order to get better bounding boxes
|
||||||
let withSpan = await addSpan(root);
|
if (shrinkBoxes) {
|
||||||
|
info("shrinking bounding boxes");
|
||||||
|
await addSpan(root);
|
||||||
|
info("boundingboxes shrunk");
|
||||||
|
eprintln();
|
||||||
|
}
|
||||||
|
|
||||||
// Take another screenshot and check the modification we made didn't change the layout of the page
|
// Take another screenshot and check the modification we made didn't change the layout of the page
|
||||||
await page.screenshot({path: (outputDir === null ? __dirname : outputDir) + '/' + 'screenshot2.png'});
|
await page.screenshot({path: (outputDir === null ? __dirname : outputDir) + '/' + 'screenshot2.png'});
|
||||||
|
@ -115,27 +265,29 @@ async function main() {
|
||||||
|
|
||||||
if (!filesAreSame) {
|
if (!filesAreSame) {
|
||||||
// Check psnr
|
// Check psnr
|
||||||
let psnr = await quality.psnr(__dirname + '/' + 'screenshot1.png', __dirname + '/' + 'screenshot2.png');
|
let psnr = await image.psnr(__dirname + '/' + 'screenshot1.png', __dirname + '/' + 'screenshot2.png');
|
||||||
|
|
||||||
// Crash if they're different
|
// Crash if they're different
|
||||||
if (psnr > 70) {
|
if (psnr > 70) {
|
||||||
eprintln("\x1b[33mWarning: " + filename + " produced slight diff: psnr = " + psnr + '\x1b[0m');
|
warning(filename + " produced slight diff: psnr = " + psnr);
|
||||||
} else {
|
} else {
|
||||||
await eprintln("\x1b[31mError: age edit changed the layout: psnr = " + psnr + '\x1b[0m');
|
await error("page edit changed the layout: psnr = " + psnr);
|
||||||
process.exit(1);
|
process.exit(1);
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
// Analyse the root and output the result
|
// Analyse the root and output the result
|
||||||
let analyse = await analyseElement(root, page, outputDir);
|
info("performing analysis");
|
||||||
|
let analyse = await analyseElement(root, page, outputDir, threshold);
|
||||||
|
info("analysis done");
|
||||||
|
|
||||||
if (process.argv[3] === '--flatten') {
|
if (flatten) {
|
||||||
analyse = flatten(analyse);
|
analyse = flattenTree(analyse);
|
||||||
}
|
}
|
||||||
|
|
||||||
let json = JSON.stringify(analyse, undefined, 4);
|
let json = JSON.stringify(analyse, undefined, 4);
|
||||||
if (outputDir === null) {
|
if (outputDir === null) {
|
||||||
console.log();
|
console.log(json);
|
||||||
} else {
|
} else {
|
||||||
await fs.writeFile(outputDir + '/annotations.json', json);
|
await fs.writeFile(outputDir + '/annotations.json', json);
|
||||||
}
|
}
|
||||||
|
@ -162,7 +314,7 @@ async function addSpan(element) {
|
||||||
});
|
});
|
||||||
|
|
||||||
if (html !== null) {
|
if (html !== null) {
|
||||||
await eprintln("\x1b[36mReplaced " + JSON.stringify(html[0]) + " by " + JSON.stringify(html[1]) + '\x1b[0m');
|
await info("replaced \x1b[34m" + html[0].replace(/(\r\n|\n|\r)/gm, "") + "\x1b[0m by \x1b[34m" + html[1].replace(/(\r\n|\n|\r)/gm, "") + '\x1b[0m');
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
@ -171,7 +323,7 @@ async function addSpan(element) {
|
||||||
|
|
||||||
// Recursive function to analyse an HTML element.
|
// Recursive function to analyse an HTML element.
|
||||||
// The output is written in hierarchy.
|
// The output is written in hierarchy.
|
||||||
async function analyseElement(element, page, outputDir = null) {
|
async function analyseElement(element, page, outputDir = null, threshold = undefined) {
|
||||||
// Get some information on the element
|
// Get some information on the element
|
||||||
let tagAttr = await element.getProperty('tagName');
|
let tagAttr = await element.getProperty('tagName');
|
||||||
let tagName = await tagAttr.jsonValue();
|
let tagName = await tagAttr.jsonValue();
|
||||||
|
@ -187,15 +339,23 @@ async function analyseElement(element, page, outputDir = null) {
|
||||||
let analyse = {};
|
let analyse = {};
|
||||||
analyse.tag = tagName;
|
analyse.tag = tagName;
|
||||||
analyse.class = className;
|
analyse.class = className;
|
||||||
analyse.uuid = uuid();
|
|
||||||
analyse.box = box;
|
analyse.box = box;
|
||||||
box.x /= size.width;
|
box.x /= size.width;
|
||||||
box.width /= size.width;
|
box.width /= size.width;
|
||||||
box.y /= size.height;
|
box.y /= size.height;
|
||||||
box.height /= size.height;
|
box.height /= size.height;
|
||||||
|
|
||||||
|
if (outputDir !== null) {
|
||||||
|
analyse.uuid = uuid();
|
||||||
|
}
|
||||||
|
|
||||||
analyse.children = [];
|
analyse.children = [];
|
||||||
|
|
||||||
if (outputDir !== null) {
|
if (outputDir !== null) {
|
||||||
|
|
||||||
|
info("computing screenshots \x1b[34m" + analyse.uuid + "\x1b[0m");
|
||||||
|
|
||||||
// Perform a screenshot where the element is hidden
|
// Perform a screenshot where the element is hidden
|
||||||
let previousVisibility = await element.evaluate(el => {
|
let previousVisibility = await element.evaluate(el => {
|
||||||
let previousVisibility = el.style.visibility;
|
let previousVisibility = el.style.visibility;
|
||||||
|
@ -205,6 +365,14 @@ async function analyseElement(element, page, outputDir = null) {
|
||||||
|
|
||||||
await page.screenshot({path: outputDir + '/' + analyse.uuid + '.png'});
|
await page.screenshot({path: outputDir + '/' + analyse.uuid + '.png'});
|
||||||
|
|
||||||
|
// Compute mask
|
||||||
|
await image.segmentationMask(
|
||||||
|
outputDir + '/screenshot2.png',
|
||||||
|
outputDir + '/' + analyse.uuid + '.png',
|
||||||
|
outputDir + '/' + analyse.uuid + '-mask.png',
|
||||||
|
threshold,
|
||||||
|
);
|
||||||
|
|
||||||
await element.evaluate((el, previousVisibility) => {
|
await element.evaluate((el, previousVisibility) => {
|
||||||
el.style.visibility = previousVisibility;
|
el.style.visibility = previousVisibility;
|
||||||
}, [ previousVisibility ]);
|
}, [ previousVisibility ]);
|
||||||
|
@ -213,8 +381,6 @@ async function analyseElement(element, page, outputDir = null) {
|
||||||
// Extract the text content if it is a span (we made those spans by ourselves in the addSpan function)
|
// Extract the text content if it is a span (we made those spans by ourselves in the addSpan function)
|
||||||
if (tagName === 'SPAN' && textContent !== "") {
|
if (tagName === 'SPAN' && textContent !== "") {
|
||||||
analyse.text = textContent;
|
analyse.text = textContent;
|
||||||
} else {
|
|
||||||
analyse.text = null;
|
|
||||||
}
|
}
|
||||||
|
|
||||||
// Select the children of this HTML element.
|
// Select the children of this HTML element.
|
||||||
|
@ -222,14 +388,14 @@ async function analyseElement(element, page, outputDir = null) {
|
||||||
|
|
||||||
for (let child of children) {
|
for (let child of children) {
|
||||||
// Recursively analyse the children
|
// Recursively analyse the children
|
||||||
analyse.children.push(await analyseElement(child, page, outputDir));
|
analyse.children.push(await analyseElement(child, page, outputDir, threshold));
|
||||||
}
|
}
|
||||||
|
|
||||||
return analyse;
|
return analyse;
|
||||||
}
|
}
|
||||||
|
|
||||||
// Flattens the tree into a list.
|
// Flattens the tree into a list.
|
||||||
function flatten(input, acc = []) {
|
function flattenTree(input, acc = []) {
|
||||||
let children = input.children;
|
let children = input.children;
|
||||||
let child = children[0];
|
let child = children[0];
|
||||||
delete input["children"];
|
delete input["children"];
|
||||||
|
@ -241,7 +407,7 @@ function flatten(input, acc = []) {
|
||||||
|
|
||||||
case 1:
|
case 1:
|
||||||
if (child.tag === "SPAN" && child.class.indexOf("sized-span") !== -1) {
|
if (child.tag === "SPAN" && child.class.indexOf("sized-span") !== -1) {
|
||||||
child.tag = input.tag;
|
delete child["children"];
|
||||||
acc.push(child);
|
acc.push(child);
|
||||||
break;
|
break;
|
||||||
}
|
}
|
||||||
|
@ -250,7 +416,7 @@ function flatten(input, acc = []) {
|
||||||
default:
|
default:
|
||||||
acc.push(input);
|
acc.push(input);
|
||||||
for (let child of children) {
|
for (let child of children) {
|
||||||
flatten(child, acc);
|
flattenTree(child, acc);
|
||||||
}
|
}
|
||||||
break;
|
break;
|
||||||
}
|
}
|
||||||
|
|
|
@ -1,7 +1,7 @@
|
||||||
{
|
{
|
||||||
"name": "locator",
|
"name": "locator",
|
||||||
"version": "1.0.0",
|
"version": "0.1.0",
|
||||||
"description": "",
|
"description": "Helper tool to analyse HTML content produced from marp slides",
|
||||||
"main": "index.js",
|
"main": "index.js",
|
||||||
"bin": "index.js",
|
"bin": "index.js",
|
||||||
"scripts": {
|
"scripts": {
|
||||||
|
|
Loading…
Reference in New Issue