Extract tags

This commit is contained in:
Thomas Forgione 2022-02-21 21:11:31 +01:00
parent ba8b57e5e4
commit 7fd7b56f9a
7 changed files with 277 additions and 32 deletions

1
.gitignore vendored
View File

@ -1,3 +1,4 @@
node_modules
elm-stuff
target
js/*

View File

@ -30,11 +30,11 @@ target/debug/elmojinput: js/main.js src/**
target/release/elmojinput: js/main.min.js src/**
cargo build --release
js/emoji.txt:
@curl https://unicode.org/Public/emoji/14.0/emoji-test.txt -o $(BUILD_DIR)/emoji.txt
js/emoji.html:
@curl https://unicode.org/emoji/charts/emoji-list.html -o $(BUILD_DIR)/emoji.html
elm/Emoji.elm: js/emoji.txt extract.js
elm/Emoji.elm: js/emoji.html extract.js
@node extract.js > elm/Emoji.elm
clean:
@rm -rf $(BUILD_DIR)/{main.js,main.min.js,emoji.txt} elm/Emoji.elm
@rm -rf $(BUILD_DIR)/{main.js,main.min.js,emoji.html} elm/Emoji.elm

View File

@ -130,8 +130,6 @@ header model =
else
Border.width 1
, Border.rounded 5
, Element.width Element.fill
, Element.height Element.fill
]
{ label = Element.el [ Element.centerX, Element.centerY ] (Element.text (Emoji.categoryEmoji x).unicode)
, onPress = Just (CategoryClicked x)
@ -205,8 +203,8 @@ port copy : String -> Cmd msg
-- UTILS --
minimum : ( comparable, List comparable ) -> comparable
minimum ( h, t ) =
minimum : comparable -> List comparable -> comparable
minimum h t =
case List.minimum t of
Nothing ->
h
@ -215,8 +213,8 @@ minimum ( h, t ) =
min h v
maximum : ( comparable, List comparable ) -> comparable
maximum ( h, t ) =
maximum : comparable -> List comparable -> comparable
maximum h t =
case List.maximum t of
Nothing ->
h

View File

@ -1,35 +1,50 @@
const fs = require('fs');
const decode = require('decode-html');
const { parse } = require('node-html-parser');
function formatEmoji(emoji) {
return ('{ name = "' + emoji.name + '", unicode = "' + emoji.unicode + '" }');
return (
'{ name = "' + emoji.name +
'", unicode = "' + emoji.unicode +
'", tags = [ ' + emoji.tags.map(x => '"' + x + '"').join(', ') +
' ] }'
);
}
let text = fs.readFileSync('js/emoji.txt', 'utf-8');
let html = parse(fs.readFileSync('js/emoji.html', 'utf-8'));
let table = html.getElementsByTagName('table')[0];
let emojis = {};
let currentEmojis = null;
for (let line of text.split('\n')) {
if (line.startsWith('#')) {
if (line.startsWith('# group:')) {
let name = line.split(':')[1].split(' ')[1].toLowerCase();
emojis[name] = [];
currentEmojis = emojis[name];
for (let element of table.childNodes) {
// Skip text
if (element.nodeType === 3) continue;
if (element.rawTagName === 'tr') {
let cols = element.childNodes.filter(x => x.nodeType !== 3);
let firstCol = cols[0];
if (firstCol.classList.contains('bighead')) {
let name = decode(firstCol.childNodes.filter(x => x.nodeType !== 3)[0].childNodes[0]._rawText)
.split(' ')[0]
.toLowerCase();
currentEmojis = [];
emojis[name] = currentEmojis;
}
continue;
}
if (line.length === 0) {
continue;
}
if (firstCol.rawTagName === 'td') {
// We're in an emoji line
let name = decode(cols[3].childNodes[0]._rawText);
let tags = decode(cols[4].childNodes[0]._rawText).split('|').map(x => x.trim());
let unicode = cols[2].childNodes[0].childNodes[0].rawAttrs.split("'")[1];
currentEmojis.push({
unicode, name, tags
});
}
let end = line.split('#')[1];
let split = end.split(' ');
let emoji = split[1];
let name = split.slice(3).join(' ');
currentEmojis.push({
name,
unicode: emoji,
});
}
}
delete(emojis.component);
@ -42,6 +57,7 @@ console.log('\n');
console.log('type alias Emoji =');
console.log(' { name : String');
console.log(' , unicode : String');
console.log(' , tags : List String');
console.log(' }');
console.log('\n');

1
js/main.min.js vendored

File diff suppressed because one or more lines are too long

225
package-lock.json generated Normal file
View File

@ -0,0 +1,225 @@
{
"name": "emojinput",
"lockfileVersion": 2,
"requires": true,
"packages": {
"": {
"dependencies": {
"decode-html": "^2.0.0",
"node-html-parser": "^5.2.0"
}
},
"node_modules/boolbase": {
"version": "1.0.0",
"resolved": "https://registry.npmjs.org/boolbase/-/boolbase-1.0.0.tgz",
"integrity": "sha1-aN/1++YMUes3cl6p4+0xDcwed24="
},
"node_modules/css-select": {
"version": "4.2.1",
"resolved": "https://registry.npmjs.org/css-select/-/css-select-4.2.1.tgz",
"integrity": "sha512-/aUslKhzkTNCQUB2qTX84lVmfia9NyjP3WpDGtj/WxhwBzWBYUV3DgUpurHTme8UTPcPlAD1DJ+b0nN/t50zDQ==",
"dependencies": {
"boolbase": "^1.0.0",
"css-what": "^5.1.0",
"domhandler": "^4.3.0",
"domutils": "^2.8.0",
"nth-check": "^2.0.1"
},
"funding": {
"url": "https://github.com/sponsors/fb55"
}
},
"node_modules/css-what": {
"version": "5.1.0",
"resolved": "https://registry.npmjs.org/css-what/-/css-what-5.1.0.tgz",
"integrity": "sha512-arSMRWIIFY0hV8pIxZMEfmMI47Wj3R/aWpZDDxWYCPEiOMv6tfOrnpDtgxBYPEQD4V0Y/958+1TdC3iWTFcUPw==",
"engines": {
"node": ">= 6"
},
"funding": {
"url": "https://github.com/sponsors/fb55"
}
},
"node_modules/decode-html": {
"version": "2.0.0",
"resolved": "https://registry.npmjs.org/decode-html/-/decode-html-2.0.0.tgz",
"integrity": "sha1-fQqIfORCgOYJeKcH67f4CB/WHqo="
},
"node_modules/dom-serializer": {
"version": "1.3.2",
"resolved": "https://registry.npmjs.org/dom-serializer/-/dom-serializer-1.3.2.tgz",
"integrity": "sha512-5c54Bk5Dw4qAxNOI1pFEizPSjVsx5+bpJKmL2kPn8JhBUq2q09tTCa3mjijun2NfK78NMouDYNMBkOrPZiS+ig==",
"dependencies": {
"domelementtype": "^2.0.1",
"domhandler": "^4.2.0",
"entities": "^2.0.0"
},
"funding": {
"url": "https://github.com/cheeriojs/dom-serializer?sponsor=1"
}
},
"node_modules/domelementtype": {
"version": "2.2.0",
"resolved": "https://registry.npmjs.org/domelementtype/-/domelementtype-2.2.0.tgz",
"integrity": "sha512-DtBMo82pv1dFtUmHyr48beiuq792Sxohr+8Hm9zoxklYPfa6n0Z3Byjj2IV7bmr2IyqClnqEQhfgHJJ5QF0R5A==",
"funding": [
{
"type": "github",
"url": "https://github.com/sponsors/fb55"
}
]
},
"node_modules/domhandler": {
"version": "4.3.0",
"resolved": "https://registry.npmjs.org/domhandler/-/domhandler-4.3.0.tgz",
"integrity": "sha512-fC0aXNQXqKSFTr2wDNZDhsEYjCiYsDWl3D01kwt25hm1YIPyDGHvvi3rw+PLqHAl/m71MaiF7d5zvBr0p5UB2g==",
"dependencies": {
"domelementtype": "^2.2.0"
},
"engines": {
"node": ">= 4"
},
"funding": {
"url": "https://github.com/fb55/domhandler?sponsor=1"
}
},
"node_modules/domutils": {
"version": "2.8.0",
"resolved": "https://registry.npmjs.org/domutils/-/domutils-2.8.0.tgz",
"integrity": "sha512-w96Cjofp72M5IIhpjgobBimYEfoPjx1Vx0BSX9P30WBdZW2WIKU0T1Bd0kz2eNZ9ikjKgHbEyKx8BB6H1L3h3A==",
"dependencies": {
"dom-serializer": "^1.0.1",
"domelementtype": "^2.2.0",
"domhandler": "^4.2.0"
},
"funding": {
"url": "https://github.com/fb55/domutils?sponsor=1"
}
},
"node_modules/entities": {
"version": "2.2.0",
"resolved": "https://registry.npmjs.org/entities/-/entities-2.2.0.tgz",
"integrity": "sha512-p92if5Nz619I0w+akJrLZH0MX0Pb5DX39XOwQTtXSdQQOaYH03S1uIQp4mhOZtAXrxq4ViO67YTiLBo2638o9A==",
"funding": {
"url": "https://github.com/fb55/entities?sponsor=1"
}
},
"node_modules/he": {
"version": "1.2.0",
"resolved": "https://registry.npmjs.org/he/-/he-1.2.0.tgz",
"integrity": "sha512-F/1DnUGPopORZi0ni+CvrCgHQ5FyEAHRLSApuYWMmrbSwoN2Mn/7k+Gl38gJnR7yyDZk6WLXwiGod1JOWNDKGw==",
"bin": {
"he": "bin/he"
}
},
"node_modules/node-html-parser": {
"version": "5.2.0",
"resolved": "https://registry.npmjs.org/node-html-parser/-/node-html-parser-5.2.0.tgz",
"integrity": "sha512-fmiwLfQu+J2A0zjwSEkztSHexAf5qq/WoiL/Hgo1K7JpfEP+OGWY5maG0kGaM+IFVdixF/1QbyXaQ3h4cGfeLw==",
"dependencies": {
"css-select": "^4.1.3",
"he": "1.2.0"
}
},
"node_modules/nth-check": {
"version": "2.0.1",
"resolved": "https://registry.npmjs.org/nth-check/-/nth-check-2.0.1.tgz",
"integrity": "sha512-it1vE95zF6dTT9lBsYbxvqh0Soy4SPowchj0UBGj/V6cTPnXXtQOPUbhZ6CmGzAD/rW22LQK6E96pcdJXk4A4w==",
"dependencies": {
"boolbase": "^1.0.0"
},
"funding": {
"url": "https://github.com/fb55/nth-check?sponsor=1"
}
}
},
"dependencies": {
"boolbase": {
"version": "1.0.0",
"resolved": "https://registry.npmjs.org/boolbase/-/boolbase-1.0.0.tgz",
"integrity": "sha1-aN/1++YMUes3cl6p4+0xDcwed24="
},
"css-select": {
"version": "4.2.1",
"resolved": "https://registry.npmjs.org/css-select/-/css-select-4.2.1.tgz",
"integrity": "sha512-/aUslKhzkTNCQUB2qTX84lVmfia9NyjP3WpDGtj/WxhwBzWBYUV3DgUpurHTme8UTPcPlAD1DJ+b0nN/t50zDQ==",
"requires": {
"boolbase": "^1.0.0",
"css-what": "^5.1.0",
"domhandler": "^4.3.0",
"domutils": "^2.8.0",
"nth-check": "^2.0.1"
}
},
"css-what": {
"version": "5.1.0",
"resolved": "https://registry.npmjs.org/css-what/-/css-what-5.1.0.tgz",
"integrity": "sha512-arSMRWIIFY0hV8pIxZMEfmMI47Wj3R/aWpZDDxWYCPEiOMv6tfOrnpDtgxBYPEQD4V0Y/958+1TdC3iWTFcUPw=="
},
"decode-html": {
"version": "2.0.0",
"resolved": "https://registry.npmjs.org/decode-html/-/decode-html-2.0.0.tgz",
"integrity": "sha1-fQqIfORCgOYJeKcH67f4CB/WHqo="
},
"dom-serializer": {
"version": "1.3.2",
"resolved": "https://registry.npmjs.org/dom-serializer/-/dom-serializer-1.3.2.tgz",
"integrity": "sha512-5c54Bk5Dw4qAxNOI1pFEizPSjVsx5+bpJKmL2kPn8JhBUq2q09tTCa3mjijun2NfK78NMouDYNMBkOrPZiS+ig==",
"requires": {
"domelementtype": "^2.0.1",
"domhandler": "^4.2.0",
"entities": "^2.0.0"
}
},
"domelementtype": {
"version": "2.2.0",
"resolved": "https://registry.npmjs.org/domelementtype/-/domelementtype-2.2.0.tgz",
"integrity": "sha512-DtBMo82pv1dFtUmHyr48beiuq792Sxohr+8Hm9zoxklYPfa6n0Z3Byjj2IV7bmr2IyqClnqEQhfgHJJ5QF0R5A=="
},
"domhandler": {
"version": "4.3.0",
"resolved": "https://registry.npmjs.org/domhandler/-/domhandler-4.3.0.tgz",
"integrity": "sha512-fC0aXNQXqKSFTr2wDNZDhsEYjCiYsDWl3D01kwt25hm1YIPyDGHvvi3rw+PLqHAl/m71MaiF7d5zvBr0p5UB2g==",
"requires": {
"domelementtype": "^2.2.0"
}
},
"domutils": {
"version": "2.8.0",
"resolved": "https://registry.npmjs.org/domutils/-/domutils-2.8.0.tgz",
"integrity": "sha512-w96Cjofp72M5IIhpjgobBimYEfoPjx1Vx0BSX9P30WBdZW2WIKU0T1Bd0kz2eNZ9ikjKgHbEyKx8BB6H1L3h3A==",
"requires": {
"dom-serializer": "^1.0.1",
"domelementtype": "^2.2.0",
"domhandler": "^4.2.0"
}
},
"entities": {
"version": "2.2.0",
"resolved": "https://registry.npmjs.org/entities/-/entities-2.2.0.tgz",
"integrity": "sha512-p92if5Nz619I0w+akJrLZH0MX0Pb5DX39XOwQTtXSdQQOaYH03S1uIQp4mhOZtAXrxq4ViO67YTiLBo2638o9A=="
},
"he": {
"version": "1.2.0",
"resolved": "https://registry.npmjs.org/he/-/he-1.2.0.tgz",
"integrity": "sha512-F/1DnUGPopORZi0ni+CvrCgHQ5FyEAHRLSApuYWMmrbSwoN2Mn/7k+Gl38gJnR7yyDZk6WLXwiGod1JOWNDKGw=="
},
"node-html-parser": {
"version": "5.2.0",
"resolved": "https://registry.npmjs.org/node-html-parser/-/node-html-parser-5.2.0.tgz",
"integrity": "sha512-fmiwLfQu+J2A0zjwSEkztSHexAf5qq/WoiL/Hgo1K7JpfEP+OGWY5maG0kGaM+IFVdixF/1QbyXaQ3h4cGfeLw==",
"requires": {
"css-select": "^4.1.3",
"he": "1.2.0"
}
},
"nth-check": {
"version": "2.0.1",
"resolved": "https://registry.npmjs.org/nth-check/-/nth-check-2.0.1.tgz",
"integrity": "sha512-it1vE95zF6dTT9lBsYbxvqh0Soy4SPowchj0UBGj/V6cTPnXXtQOPUbhZ6CmGzAD/rW22LQK6E96pcdJXk4A4w==",
"requires": {
"boolbase": "^1.0.0"
}
}
}
}

6
package.json Normal file
View File

@ -0,0 +1,6 @@
{
"dependencies": {
"decode-html": "^2.0.0",
"node-html-parser": "^5.2.0"
}
}