Merge
This commit is contained in:
parent
e5cca75737
commit
58afd8ac3e
127
index.js
127
index.js
|
@ -4,11 +4,14 @@ const fs = require('fs').promises;
|
||||||
const process = require('process');
|
const process = require('process');
|
||||||
const puppeteer = require('puppeteer');
|
const puppeteer = require('puppeteer');
|
||||||
|
|
||||||
|
// Size of the rendering of the web page
|
||||||
|
const size = { width: 1280, height: 720 };
|
||||||
|
|
||||||
async function main() {
|
async function main() {
|
||||||
|
|
||||||
if (process.argv[2] === undefined) {
|
if (process.argv[2] === undefined) {
|
||||||
console.error("This program expects an argument.");
|
console.error('This program expects an argument.');
|
||||||
console.error("USAGE: locator <path-to-HTML-file>");
|
console.error('USAGE: locator <path-to-HTML-file>');
|
||||||
process.exit(1);
|
process.exit(1);
|
||||||
}
|
}
|
||||||
|
|
||||||
|
@ -20,44 +23,25 @@ async function main() {
|
||||||
try {
|
try {
|
||||||
await fs.access(path, fs.constants.F_OK);
|
await fs.access(path, fs.constants.F_OK);
|
||||||
} catch (e) {
|
} catch (e) {
|
||||||
console.error("No such file: " + path);
|
console.error('No such file: ' + path);
|
||||||
process.exit(1);
|
process.exit(1);
|
||||||
}
|
}
|
||||||
|
|
||||||
// Size of the rendering of the web page
|
|
||||||
const size = { width: 1280, height: 720 };
|
|
||||||
|
|
||||||
// Initialize browser
|
// Initialize browser
|
||||||
const browser = await puppeteer.launch();
|
const browser = await puppeteer.launch();
|
||||||
const page = await browser.newPage();
|
const page = await browser.newPage();
|
||||||
await page.setViewport(size);
|
await page.setViewport(size);
|
||||||
await page.goto("file://" + path);
|
await page.goto('file://' + path);
|
||||||
|
|
||||||
// This will contain all the collected information
|
// Only consider the first slide (#\\331 === #1, which is the id of the first slide)
|
||||||
let info = [];
|
// We don't take into account the other slides because it will mess up with our screenshot varification
|
||||||
|
let root = await page.$('#\\31');
|
||||||
|
|
||||||
// Take a first screenshot
|
// Take a first screenshot
|
||||||
await page.screenshot({path: __dirname + '/' + 'screenshot1.png'});
|
await page.screenshot({path: __dirname + '/' + 'screenshot1.png'});
|
||||||
|
|
||||||
// Edit the page to shrink elements in order to get better bounding boxes
|
// Edit the page to shrink elements in order to get better bounding boxes
|
||||||
for (let selector of ["h1", "h2", "h3", "h4", "h5", "h6", "a", "img", "p", "ul", "ol", "li"]) {
|
let withSpan = await addSpan(root);
|
||||||
|
|
||||||
let query = selector;
|
|
||||||
|
|
||||||
let shouldCreateSpan = query !== "ul" && query !== "ol" && query != "img";
|
|
||||||
|
|
||||||
if (shouldCreateSpan) {
|
|
||||||
|
|
||||||
await page.evaluate(([query]) => {
|
|
||||||
for (let e of document.querySelectorAll(query)) {
|
|
||||||
if (e.children.length === 0) {
|
|
||||||
e.innerHTML = '<span>' + e.innerHTML + '</span>';
|
|
||||||
}
|
|
||||||
}
|
|
||||||
|
|
||||||
}, [query]);
|
|
||||||
}
|
|
||||||
}
|
|
||||||
|
|
||||||
// Take another screenshot and check the modification we made didn't change the layout of the page
|
// Take another screenshot and check the modification we made didn't change the layout of the page
|
||||||
await page.screenshot({path: __dirname + '/' + 'screenshot2.png'});
|
await page.screenshot({path: __dirname + '/' + 'screenshot2.png'});
|
||||||
|
@ -72,63 +56,64 @@ async function main() {
|
||||||
throw new Error("Page edit changed the layout");
|
throw new Error("Page edit changed the layout");
|
||||||
}
|
}
|
||||||
|
|
||||||
for (let selector of ["h1", "h2", "h3", "h4", "h5", "h6", "a", "img", "p", "ul", "ol", "li"]) {
|
// Analyse the root and output the result
|
||||||
|
let analyse = await analyseElement(root);
|
||||||
|
console.log(JSON.stringify(analyse, undefined, 4));
|
||||||
|
|
||||||
let query = selector;
|
await browser.close();
|
||||||
|
|
||||||
// Shrink the elements horizontally (to be able to get better bounding boxes)
|
}
|
||||||
let shouldCreateSpan = query !== "ul" && query !== "ol" && query != "img";
|
|
||||||
|
|
||||||
// Query the considered element
|
// Traverses the text nodes of the element and put every text into a single span.
|
||||||
let parents = await page.$$(query);
|
async function addSpan(element) {
|
||||||
let elements = await page.$$(query + (shouldCreateSpan ? ' > *:first-child' : ''));
|
let elts = await element.$$('*');
|
||||||
|
|
||||||
for (let index = 0; index < elements.length; index ++) {
|
for (let elt of elts) {
|
||||||
|
let value = await elt.evaluate(el => el.textContent, element);
|
||||||
|
if (value !== "") {
|
||||||
|
await elt.evaluate(el => el.innerHTML = '<span>' + el.innerHTML + '</span>');
|
||||||
|
}
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
let parent = parents[index];
|
// Recursive function to analyse an HTML element.
|
||||||
let element = elements[index];
|
// The output is written in hierarchy.
|
||||||
|
async function analyseElement(element) {
|
||||||
|
// Get some information on the element
|
||||||
|
let tagAttr = await element.getProperty('tagName');
|
||||||
|
let tagName = await tagAttr.jsonValue();
|
||||||
|
|
||||||
|
let classAttr = await element.getProperty('className');
|
||||||
|
let className = await classAttr.jsonValue();
|
||||||
|
|
||||||
|
let textContent = await element.evaluate(el => el.textContent, element);
|
||||||
|
|
||||||
let box = await element.boundingBox();
|
let box = await element.boundingBox();
|
||||||
|
|
||||||
let classElement = shouldCreateSpan ? parent : element;
|
// Register it into the return value
|
||||||
let classNameAttr = await classElement.getProperty('className');
|
let analyse = {};
|
||||||
let className = await classNameAttr.jsonValue();
|
analyse.tag = tagName;
|
||||||
|
analyse.class = className;
|
||||||
|
analyse.box = box;
|
||||||
|
analyse.children = [];
|
||||||
|
|
||||||
// Scale the bounding box
|
// Extract the text content if it is a span (we made those spans by ourselves in the addSpan function)
|
||||||
box.x /= size.width;
|
if (tagName === 'SPAN' && textContent !== "") {
|
||||||
box.width /= size.width;
|
analyse.text = textContent;
|
||||||
box.y /= size.height;
|
|
||||||
box.height /= size.height;
|
|
||||||
|
|
||||||
// Give the selector as type
|
|
||||||
box.type = selector;
|
|
||||||
|
|
||||||
if (className !== "") {
|
|
||||||
box.class = className;
|
|
||||||
}
|
}
|
||||||
|
|
||||||
info.push(box);
|
|
||||||
|
// Select the children of this HTML element.
|
||||||
|
let children = await element.$$('> *');
|
||||||
|
|
||||||
|
for (let child of children) {
|
||||||
|
// Recursively analyse the children
|
||||||
|
analyse.children.push(await analyseElement(child));
|
||||||
}
|
}
|
||||||
|
|
||||||
}
|
return analyse;
|
||||||
|
|
||||||
// Sort the info by y and the x (top to bottom, then left to right)
|
|
||||||
info.sort((a, b) => {
|
|
||||||
if (a.y < b.y || (a.y == b.y && a.x < b.x)) {
|
|
||||||
return -1;
|
|
||||||
}
|
|
||||||
|
|
||||||
if (a.y > b.y || (a.y == b.y && a.x > b.x)) {
|
|
||||||
return 1;
|
|
||||||
}
|
|
||||||
|
|
||||||
return 0;
|
|
||||||
});
|
|
||||||
|
|
||||||
// Pretty print the output info
|
|
||||||
console.log(JSON.stringify(info, undefined, 4));
|
|
||||||
|
|
||||||
await browser.close();
|
|
||||||
}
|
}
|
||||||
|
|
||||||
|
|
||||||
|
|
||||||
main();
|
main();
|
||||||
|
|
89
index2.js
89
index2.js
|
@ -1,89 +0,0 @@
|
||||||
#!/usr/bin/env node
|
|
||||||
|
|
||||||
const fs = require('fs').promises;
|
|
||||||
const process = require('process');
|
|
||||||
const puppeteer = require('puppeteer');
|
|
||||||
|
|
||||||
// Size of the rendering of the web page
|
|
||||||
const size = { width: 1280, height: 720 };
|
|
||||||
|
|
||||||
async function main() {
|
|
||||||
|
|
||||||
if (process.argv[2] === undefined) {
|
|
||||||
console.error("This program expects an argument.");
|
|
||||||
console.error("USAGE: locator <path-to-HTML-file>");
|
|
||||||
process.exit(1);
|
|
||||||
}
|
|
||||||
|
|
||||||
// Path to the HTML file to analyse (given as relative path from current directory)
|
|
||||||
// We need the full path so that puppeteer is able to access it
|
|
||||||
const path = process.argv[2].startsWith('/') ? process.argv[2] : process.cwd() + '/' + process.argv[2];
|
|
||||||
|
|
||||||
// Check that the file exists
|
|
||||||
try {
|
|
||||||
await fs.access(path, fs.constants.F_OK);
|
|
||||||
} catch (e) {
|
|
||||||
console.error("No such file: " + path);
|
|
||||||
process.exit(1);
|
|
||||||
}
|
|
||||||
|
|
||||||
// Initialize browser
|
|
||||||
const browser = await puppeteer.launch();
|
|
||||||
const page = await browser.newPage();
|
|
||||||
await page.setViewport(size);
|
|
||||||
await page.goto("file://" + path);
|
|
||||||
|
|
||||||
let currentSlide = 1;
|
|
||||||
let hierarchy = [];
|
|
||||||
|
|
||||||
while (true) {
|
|
||||||
let root = await page.$("#\\3" + currentSlide);
|
|
||||||
|
|
||||||
if (root === null) {
|
|
||||||
break;
|
|
||||||
}
|
|
||||||
|
|
||||||
let currentInfo = {};
|
|
||||||
hierarchy.push(currentInfo);
|
|
||||||
await analyseElement(root, currentInfo);
|
|
||||||
|
|
||||||
currentSlide++;
|
|
||||||
}
|
|
||||||
|
|
||||||
console.log(JSON.stringify(hierarchy, undefined, 4));
|
|
||||||
|
|
||||||
await browser.close();
|
|
||||||
|
|
||||||
}
|
|
||||||
|
|
||||||
|
|
||||||
async function analyseElement(element, hierarchy, tabs = '', stop = false) {
|
|
||||||
let tagAttr = await element.getProperty("tagName");
|
|
||||||
let tagName = await tagAttr.jsonValue();
|
|
||||||
|
|
||||||
let classAttr = await element.getProperty("className");
|
|
||||||
let className = await classAttr.jsonValue();
|
|
||||||
|
|
||||||
let box = await element.boundingBox();
|
|
||||||
hierarchy.tag = tagName;
|
|
||||||
hierarchy.class = className;
|
|
||||||
hierarchy.x = box.x / size.width;
|
|
||||||
hierarchy.width = box.width / size.width;
|
|
||||||
hierarchy.y = box.y / size.height;
|
|
||||||
hierarchy.height = box.height / size.height;
|
|
||||||
hierarchy.children = [];
|
|
||||||
|
|
||||||
console.log(tabs + tagName + ' "' + className + '" ' + JSON.stringify(box));
|
|
||||||
|
|
||||||
let children = await element.$$('> *');
|
|
||||||
|
|
||||||
for (let child of children) {
|
|
||||||
let currentInfo = {};
|
|
||||||
hierarchy.children.push(currentInfo);
|
|
||||||
await analyseElement(child, currentInfo, tabs + ' ', true);
|
|
||||||
}
|
|
||||||
}
|
|
||||||
|
|
||||||
|
|
||||||
|
|
||||||
main();
|
|
Loading…
Reference in New Issue