This commit is contained in:
Thomas Forgione 2023-02-20 11:36:48 +01:00
parent 58afd8ac3e
commit 38d5cb9ed0
4 changed files with 312 additions and 33 deletions

1
.gitignore vendored
View File

@ -1 +1,2 @@
node_modules
run.sh

265
README.md
View File

@ -33,54 +33,257 @@ node index.js <path-to-the-HTML-file>
## Example
```sh
locator 10.html
locator example.html
```
Output:
```json
{
"tag": "SECTION",
"class": "",
"box": {
"x": 0,
"y": 0,
"width": 1,
"height": 1
},
"children": [
{
"tag": "H1",
"class": "",
"box": {
"x": 0.0546875,
"y": 0.09722222222222222,
"width": 0.890625,
"height": 0.11812065972222222
},
"children": [
{
"tag": "SPAN",
"class": "sized-span",
"box": {
"x": 0.0546875,
"y": 0.10416666666666667,
"width": 0.50234375,
"height": 0.10416666666666667
},
"children": [],
"text": "The painting wanders"
}
],
"text": null
},
{
"tag": "P",
"class": "",
"box": {
"x": 0.0546875,
"y": 0.26395399305555556,
"width": 0.890625,
"height": 0.065625
},
"children": [
{
"tag": "SPAN",
"class": "sized-span",
"box": {
"x": 0.0546875,
"y": 0.2667317708333333,
"width": 0.43125,
"height": 0.058333333333333334
},
"children": [],
"text": "The dramatic trek wraps workout."
}
],
"text": null
},
{
"tag": "UL",
"class": "",
"box": {
"x": 0.0546875,
"y": 0.37819010416666665,
"width": 0.890625,
"height": 0.22604166666666667
},
"children": [
{
"tag": "LI",
"class": "",
"box": {
"x": 0.0859375,
"y": 0.37819010416666665,
"width": 0.859375,
"height": 0.065625
},
"children": [
{
"tag": "SPAN",
"class": "sized-span",
"box": {
"x": 0.0859375,
"y": 0.3809678819444444,
"width": 0.5876953125,
"height": 0.058333333333333334
},
"children": [],
"text": "The wrong candidate sublets anesthesiologist."
}
],
"text": null
},
{
"tag": "LI",
"class": "",
"box": {
"x": 0.0859375,
"y": 0.4583984375,
"width": 0.859375,
"height": 0.065625
},
"children": [
{
"tag": "SPAN",
"class": "sized-span",
"box": {
"x": 0.0859375,
"y": 0.4611762152777778,
"width": 0.4072265625,
"height": 0.058333333333333334
},
"children": [],
"text": "The late hydrocarb slides violet."
}
],
"text": null
},
{
"tag": "LI",
"class": "",
"box": {
"x": 0.0859375,
"y": 0.5386067708333333,
"width": 0.859375,
"height": 0.065625
},
"children": [
{
"tag": "SPAN",
"class": "sized-span",
"box": {
"x": 0.0859375,
"y": 0.5413845486111111,
"width": 0.3783203125,
"height": 0.058333333333333334
},
"children": [],
"text": "The rich arthur forms zombie."
}
],
"text": null
}
],
"text": null
}
],
"text": null
}
```
You can also flatten the output:
```sh
locator example.html --flatten
```
Ouput:
```json
[
{
"x": 0.0546875,
"y": 0.09722222222222222,
"width": 0.890625,
"height": 0.11812065972222222,
"type": "h1"
"tag": "SECTION",
"class": "",
"box": {
"x": 0,
"y": 0,
"width": 1,
"height": 1
},
"text": null
},
{
"x": 0.0546875,
"y": 0.26395399305555556,
"width": 0.890625,
"height": 0.065625,
"type": "p"
"tag": "H1",
"class": "sized-span",
"box": {
"x": 0.0546875,
"y": 0.10416666666666667,
"width": 0.50234375,
"height": 0.10416666666666667
},
"children": [],
"text": "The painting wanders"
},
{
"x": 0.0546875,
"y": 0.37819010416666665,
"width": 0.890625,
"height": 0.22604166666666667,
"type": "ul"
"tag": "P",
"class": "sized-span",
"box": {
"x": 0.0546875,
"y": 0.2667317708333333,
"width": 0.43125,
"height": 0.058333333333333334
},
"children": [],
"text": "The dramatic trek wraps workout."
},
{
"x": 0.0859375,
"y": 0.37819010416666665,
"width": 0.859375,
"height": 0.065625,
"type": "li"
"tag": "UL",
"class": "",
"box": {
"x": 0.0546875,
"y": 0.37819010416666665,
"width": 0.890625,
"height": 0.22604166666666667
},
"text": null
},
{
"x": 0.0859375,
"y": 0.4583984375,
"width": 0.859375,
"height": 0.065625,
"type": "li"
"tag": "LI",
"class": "sized-span",
"box": {
"x": 0.0859375,
"y": 0.3809678819444444,
"width": 0.5876953125,
"height": 0.058333333333333334
},
"children": [],
"text": "The wrong candidate sublets anesthesiologist."
},
{
"x": 0.0859375,
"y": 0.5386067708333333,
"width": 0.859375,
"height": 0.065625,
"type": "li"
"tag": "LI",
"class": "sized-span",
"box": {
"x": 0.0859375,
"y": 0.4611762152777778,
"width": 0.4072265625,
"height": 0.058333333333333334
},
"children": [],
"text": "The late hydrocarb slides violet."
},
{
"tag": "LI",
"class": "sized-span",
"box": {
"x": 0.0859375,
"y": 0.5413845486111111,
"width": 0.3783203125,
"height": 0.058333333333333334
},
"children": [],
"text": "The rich arthur forms zombie."
}
]
```

22
example.html Normal file

File diff suppressed because one or more lines are too long

View File

@ -37,6 +37,12 @@ async function main() {
// We don't take into account the other slides because it will mess up with our screenshot varification
let root = await page.$('#\\31');
// If there is no slide, try to run on HTML body
if (root === null) {
console.error('Not a marp HTML file.');
process.exit(1);
}
// Take a first screenshot
await page.screenshot({path: __dirname + '/' + 'screenshot1.png'});
@ -58,6 +64,11 @@ async function main() {
// Analyse the root and output the result
let analyse = await analyseElement(root);
if (process.argv[3] === '--flatten') {
analyse = flatten(analyse);
}
console.log(JSON.stringify(analyse, undefined, 4));
await browser.close();
@ -71,9 +82,17 @@ async function addSpan(element) {
for (let elt of elts) {
let value = await elt.evaluate(el => el.textContent, element);
if (value !== "") {
await elt.evaluate(el => el.innerHTML = '<span>' + el.innerHTML + '</span>');
await elt.evaluate(el => {
if (el.innerHTML.indexOf('<li>') === -1) {
el.innerHTML = '<span class="sized-span">' + el.innerHTML + '</span>';
return true;
} else {
return false;
}
});
}
}
}
// Recursive function to analyse an HTML element.
@ -95,14 +114,19 @@ async function analyseElement(element) {
analyse.tag = tagName;
analyse.class = className;
analyse.box = box;
box.x /= size.width;
box.width /= size.width;
box.y /= size.height;
box.height /= size.height;
analyse.children = [];
// Extract the text content if it is a span (we made those spans by ourselves in the addSpan function)
if (tagName === 'SPAN' && textContent !== "") {
analyse.text = textContent;
} else {
analyse.text = null;
}
// Select the children of this HTML element.
let children = await element.$$('> *');
@ -114,6 +138,35 @@ async function analyseElement(element) {
return analyse;
}
// Flattens the tree into a list.
function flatten(input, acc = []) {
let children = input.children;
let child = children[0];
delete input["children"];
switch (children.length) {
case 0:
acc.push(input);
break;
case 1:
if (child.tag === "SPAN" && child.class.indexOf("sized-span") !== -1) {
child.tag = input.tag;
acc.push(child);
break;
}
// There is purposefully no break here, if the condition above is false, we want to do the default treatment
default:
acc.push(input);
for (let child of children) {
flatten(child, acc);
}
break;
}
return acc;
}
main();