diff --git a/.gitignore b/.gitignore index 3c3629e..0db01f3 100644 --- a/.gitignore +++ b/.gitignore @@ -1 +1,2 @@ node_modules +run.sh diff --git a/README.md b/README.md index 7719a73..f094b9d 100644 --- a/README.md +++ b/README.md @@ -33,54 +33,257 @@ node index.js ## Example ```sh -locator 10.html +locator example.html ``` Output: +```json +{ + "tag": "SECTION", + "class": "", + "box": { + "x": 0, + "y": 0, + "width": 1, + "height": 1 + }, + "children": [ + { + "tag": "H1", + "class": "", + "box": { + "x": 0.0546875, + "y": 0.09722222222222222, + "width": 0.890625, + "height": 0.11812065972222222 + }, + "children": [ + { + "tag": "SPAN", + "class": "sized-span", + "box": { + "x": 0.0546875, + "y": 0.10416666666666667, + "width": 0.50234375, + "height": 0.10416666666666667 + }, + "children": [], + "text": "The painting wanders" + } + ], + "text": null + }, + { + "tag": "P", + "class": "", + "box": { + "x": 0.0546875, + "y": 0.26395399305555556, + "width": 0.890625, + "height": 0.065625 + }, + "children": [ + { + "tag": "SPAN", + "class": "sized-span", + "box": { + "x": 0.0546875, + "y": 0.2667317708333333, + "width": 0.43125, + "height": 0.058333333333333334 + }, + "children": [], + "text": "The dramatic trek wraps workout." + } + ], + "text": null + }, + { + "tag": "UL", + "class": "", + "box": { + "x": 0.0546875, + "y": 0.37819010416666665, + "width": 0.890625, + "height": 0.22604166666666667 + }, + "children": [ + { + "tag": "LI", + "class": "", + "box": { + "x": 0.0859375, + "y": 0.37819010416666665, + "width": 0.859375, + "height": 0.065625 + }, + "children": [ + { + "tag": "SPAN", + "class": "sized-span", + "box": { + "x": 0.0859375, + "y": 0.3809678819444444, + "width": 0.5876953125, + "height": 0.058333333333333334 + }, + "children": [], + "text": "The wrong candidate sublets anesthesiologist." + } + ], + "text": null + }, + { + "tag": "LI", + "class": "", + "box": { + "x": 0.0859375, + "y": 0.4583984375, + "width": 0.859375, + "height": 0.065625 + }, + "children": [ + { + "tag": "SPAN", + "class": "sized-span", + "box": { + "x": 0.0859375, + "y": 0.4611762152777778, + "width": 0.4072265625, + "height": 0.058333333333333334 + }, + "children": [], + "text": "The late hydrocarb slides violet." + } + ], + "text": null + }, + { + "tag": "LI", + "class": "", + "box": { + "x": 0.0859375, + "y": 0.5386067708333333, + "width": 0.859375, + "height": 0.065625 + }, + "children": [ + { + "tag": "SPAN", + "class": "sized-span", + "box": { + "x": 0.0859375, + "y": 0.5413845486111111, + "width": 0.3783203125, + "height": 0.058333333333333334 + }, + "children": [], + "text": "The rich arthur forms zombie." + } + ], + "text": null + } + ], + "text": null + } + ], + "text": null +} +``` + +You can also flatten the output: + +```sh +locator example.html --flatten +``` + +Ouput: + ```json [ { - "x": 0.0546875, - "y": 0.09722222222222222, - "width": 0.890625, - "height": 0.11812065972222222, - "type": "h1" + "tag": "SECTION", + "class": "", + "box": { + "x": 0, + "y": 0, + "width": 1, + "height": 1 + }, + "text": null }, { - "x": 0.0546875, - "y": 0.26395399305555556, - "width": 0.890625, - "height": 0.065625, - "type": "p" + "tag": "H1", + "class": "sized-span", + "box": { + "x": 0.0546875, + "y": 0.10416666666666667, + "width": 0.50234375, + "height": 0.10416666666666667 + }, + "children": [], + "text": "The painting wanders" }, { - "x": 0.0546875, - "y": 0.37819010416666665, - "width": 0.890625, - "height": 0.22604166666666667, - "type": "ul" + "tag": "P", + "class": "sized-span", + "box": { + "x": 0.0546875, + "y": 0.2667317708333333, + "width": 0.43125, + "height": 0.058333333333333334 + }, + "children": [], + "text": "The dramatic trek wraps workout." }, { - "x": 0.0859375, - "y": 0.37819010416666665, - "width": 0.859375, - "height": 0.065625, - "type": "li" + "tag": "UL", + "class": "", + "box": { + "x": 0.0546875, + "y": 0.37819010416666665, + "width": 0.890625, + "height": 0.22604166666666667 + }, + "text": null }, { - "x": 0.0859375, - "y": 0.4583984375, - "width": 0.859375, - "height": 0.065625, - "type": "li" + "tag": "LI", + "class": "sized-span", + "box": { + "x": 0.0859375, + "y": 0.3809678819444444, + "width": 0.5876953125, + "height": 0.058333333333333334 + }, + "children": [], + "text": "The wrong candidate sublets anesthesiologist." }, { - "x": 0.0859375, - "y": 0.5386067708333333, - "width": 0.859375, - "height": 0.065625, - "type": "li" + "tag": "LI", + "class": "sized-span", + "box": { + "x": 0.0859375, + "y": 0.4611762152777778, + "width": 0.4072265625, + "height": 0.058333333333333334 + }, + "children": [], + "text": "The late hydrocarb slides violet." + }, + { + "tag": "LI", + "class": "sized-span", + "box": { + "x": 0.0859375, + "y": 0.5413845486111111, + "width": 0.3783203125, + "height": 0.058333333333333334 + }, + "children": [], + "text": "The rich arthur forms zombie." } ] ``` + diff --git a/example.html b/example.html new file mode 100644 index 0000000..23efe80 --- /dev/null +++ b/example.html @@ -0,0 +1,22 @@ +
+

The painting wanders

+

The dramatic trek wraps workout.

+
    +
  • The wrong candidate sublets anesthesiologist.
  • +
  • The late hydrocarb slides violet.
  • +
  • The rich arthur forms zombie.
  • +
+
+
\ No newline at end of file diff --git a/index.js b/index.js index 6ffa309..a0eea3f 100644 --- a/index.js +++ b/index.js @@ -37,6 +37,12 @@ async function main() { // We don't take into account the other slides because it will mess up with our screenshot varification let root = await page.$('#\\31'); + // If there is no slide, try to run on HTML body + if (root === null) { + console.error('Not a marp HTML file.'); + process.exit(1); + } + // Take a first screenshot await page.screenshot({path: __dirname + '/' + 'screenshot1.png'}); @@ -58,6 +64,11 @@ async function main() { // Analyse the root and output the result let analyse = await analyseElement(root); + + if (process.argv[3] === '--flatten') { + analyse = flatten(analyse); + } + console.log(JSON.stringify(analyse, undefined, 4)); await browser.close(); @@ -71,9 +82,17 @@ async function addSpan(element) { for (let elt of elts) { let value = await elt.evaluate(el => el.textContent, element); if (value !== "") { - await elt.evaluate(el => el.innerHTML = '' + el.innerHTML + ''); + await elt.evaluate(el => { + if (el.innerHTML.indexOf('
  • ') === -1) { + el.innerHTML = '' + el.innerHTML + ''; + return true; + } else { + return false; + } + }); } } + } // Recursive function to analyse an HTML element. @@ -95,14 +114,19 @@ async function analyseElement(element) { analyse.tag = tagName; analyse.class = className; analyse.box = box; + box.x /= size.width; + box.width /= size.width; + box.y /= size.height; + box.height /= size.height; analyse.children = []; // Extract the text content if it is a span (we made those spans by ourselves in the addSpan function) if (tagName === 'SPAN' && textContent !== "") { analyse.text = textContent; + } else { + analyse.text = null; } - // Select the children of this HTML element. let children = await element.$$('> *'); @@ -114,6 +138,35 @@ async function analyseElement(element) { return analyse; } +// Flattens the tree into a list. +function flatten(input, acc = []) { + let children = input.children; + let child = children[0]; + delete input["children"]; + + switch (children.length) { + case 0: + acc.push(input); + break; + + case 1: + if (child.tag === "SPAN" && child.class.indexOf("sized-span") !== -1) { + child.tag = input.tag; + acc.push(child); + break; + } + // There is purposefully no break here, if the condition above is false, we want to do the default treatment + + default: + acc.push(input); + for (let child of children) { + flatten(child, acc); + } + break; + } + + return acc; +} main();