Skip to content

Commit

Permalink
Added tweet translation mechanism (#96)
Browse files Browse the repository at this point in the history
* Added tweet translation mechanism.

* Removed test translation.

It was done via Google Translate which I don't trust for this application.
  • Loading branch information
gjvnq authored Jun 1, 2022
1 parent 3687239 commit 93743fc
Show file tree
Hide file tree
Showing 15 changed files with 1,361 additions and 44 deletions.
11 changes: 11 additions & 0 deletions build/cache.js
Original file line number Diff line number Diff line change
Expand Up @@ -71,6 +71,17 @@ module.exports = exports = class Manifest {
}

async get (task) {
if (task === undefined || task === null) {
console.error(task);
throw new Error('Task action is undefined or null.');
return;
}
if (task.input === undefined || task.input === null) {
console.error(task);
throw new Error('Task action is missing input. (tip: remove `twitter-cache.json` and run `gulp` again)');
return;
}

const hash = this.hash(task);
const { input, output, cache: altCachePath } = task;
const ext = path.extname(task.output);
Expand Down
138 changes: 131 additions & 7 deletions build/engines.js
Original file line number Diff line number Diff line change
@@ -1,4 +1,3 @@

const path = require('path');

const fs = require('fs-extra');
Expand All @@ -16,6 +15,18 @@ const i18n = require('./lang');

const mAnchor = require('markdown-it-anchor');

const dateFNS = require('date-fns');
const dateFNSLocales = require('date-fns/locale');
const str2locale = {
'en': dateFNSLocales.enUS,
'zh': dateFNSLocales.zhCN,
'de': dateFNSLocales.de,
'fr': dateFNSLocales.fr,
'hu': dateFNSLocales.hu,
'pl': dateFNSLocales.pl,
'es': dateFNSLocales.es
};

const markdownEngines = {
full: markdownIt({
html: true,
Expand Down Expand Up @@ -181,12 +192,14 @@ class Injectables {

helpers () {
return {
import: this.import(),
markdown: this.markdown(),
icon: this.icon(),
prod: this.production(),
rev: this.rev(),
lang: this.lang(),
import: this.import(),
markdown: this.markdown(),
icon: this.icon(),
coalesce: this.coalesce(),
prod: this.production(),
rev: this.rev(),
lang: this.lang(),
date: this.date(),
};
}

Expand Down Expand Up @@ -281,4 +294,115 @@ class Injectables {
};
}

// Given a list of arguments, returns the firt that isn't undefined
coalesce () {
return function (...raw_args) {
const { arguments: args } = raw_args.pop();
for (let arg in args) {
if (args[arg] !== undefined) {
return args[arg];
}
}
return undefined;
};
}

// Multi tool for printing dates
//
// {{date}} -> prints current date
// {{date datestr}} -> prints date in datestr
// {{date datestr datefmt}} -> prints date in datestr in format datefmt
// {{date datestr datefmt lang}} -> prints date in datestr in format datefmt according to conventions for language lang
//
// Datestr can be the string "now", `undefined`, and anything parsable by `new Date()`.
//
// If lang is not specified, it will be extracted from the page metadata. If that is not available, English will be assumed.
// In case of errors, the date will be returned as an ISO string if possible and its raw datestr input otherwise.
// Datefmt format is available at https://date-fns.org/v2.25.0/docs/format
//
// Common formats:
// - "h:mm aa - EEE, LLL do, yyyy" = 12 hour clock, e.g. '1:28 PM - Sat, Feb 15th, 2020' (en) or '1:28 PM - sam., 15/févr./2020' (fr)
// - "hh:mm - EEE, LLL do, yyyy" = 24 hour clock, e.g. '13:28 - Sat, Feb 15th, 2020' (en) or '13:28 - sam., 15/févr./2020' (fr)
// - "yyyy-MM-dd'T'HH:mm:ss.SSSXXX" or "iso" = ISO 8601 format, e.g. '2020-02-15T13:28:02.000Z'
date () {
return function (...args) {
let extra = args.pop();
let datestr, dateobj, datefmt, lang;

const { resolve: rval } = extra;
const filename = rval('@value.input');
lang = (rval('@root.this.page.lang') || 'en').split('-')[0];

switch (args.length) {
case 0:
datestr = "now";
break;
case 1:
datestr = args[0];
break;
case 2:
datestr = args[0];
datefmt = args[1];
break;
case 3:
datestr = args[0];
datefmt = args[1];
lang = args[2];
break;
default:
throw new Exception('wrong number of arguments for {{date}}, got '+args.length+' maximum is 3');
}

if (datestr === "now" || datestr === undefined) {
dateobj = new Date();
} else {
dateobj = new Date(datestr);
}

if (!dateFNS.isValid(dateobj)) {
console.trace('Invalid input for date: ', { datestr, filename, args, extra });
return datestr.toString();
}

if (datefmt == "iso") {
return dateobj.toISOString();
}

if (lang === undefined) {
return dateobj.toISOString();
}

const locale = str2locale[lang];
if (locale === undefined) {
console.warn('Locale not found: '+lang);
}
if (datefmt === undefined || locale === undefined) {
const options = {
weekday: 'short',
year: 'numeric',
month: 'short',
day: 'numeric',
timeZone: 'UTC',
timeZoneName: 'short',
hour: '2-digit',
minute: '2-digit',
second: '2-digit'
};
try {
return dateobj.toLocaleString(lang, options);
} catch (error) {
console.trace('Something went horribly wrong while formating dates.', { error, filename, args, extra });
return dateobj.toISOString();
}
}

try {
return dateFNS.format(dateobj, datefmt, {locale: locale});
} catch (error) {
console.trace('Something went horribly wrong while formating dates.', { error, filename, args, extra });
return dateobj.toISOString();
}
};
}

}
26 changes: 26 additions & 0 deletions build/index.js
Original file line number Diff line number Diff line change
Expand Up @@ -10,6 +10,7 @@ const { sortBy } = require('lodash');

const getEngines = require('./engines');
const primeTweets = require('./page-tweets');
const i18nTweets = require('./page-tweets').i18n;
const pageWriter = require('./page-writer');
const pageConcatinator = require('./page-concatinator');
const evaluate = require('./evaluate');
Expand Down Expand Up @@ -52,6 +53,9 @@ exports.everything = function (prod = false) {
posts = sortBy(posts, 'date');
posts.reverse();

// Process i18n for tweets
await i18nTweets();

const assets = [ ...PostFiles.assets, ...PublicFiles.assets ];

const [ tasks ] = await Promise.all([
Expand Down Expand Up @@ -126,6 +130,28 @@ exports.pages = function () {
return fn;
};

let twitterProcessing = false;

exports.twitter = function () {
async function fn () {
if (twitterProcessing) {
return;
}

twitterProcessing = true;
try {
await i18nTweets();
} catch (exception_var) {
twitterProcessing = false;
throw exception_var;
}
twitterProcessing = false;
}

fn.displayName = 'buildTwitter';
return fn;
};

exports.task = function (action, prod = false) {
const fn = async () => {
const tasks = await {
Expand Down
79 changes: 70 additions & 9 deletions build/lib/tweetparse.js
Original file line number Diff line number Diff line change
Expand Up @@ -13,6 +13,9 @@ const schema = {
protected: true,
},
html: true,
html_i18n: true,
full_text: true,
full_text_i18n: true,
quoted_status_id_str: true,
entities: { media: [ {
type: true,
Expand All @@ -31,6 +34,12 @@ var entityProcessors = {
hashtags (tags, tweet) {
tags.forEach((tagObj) => {
tweet.html = tweet.html.replace('#' + tagObj.text, `<a href="https://twitter.com/hashtag/{tagObj.text}" class="hashtag">#${tagObj.text}</a>`);
if (tweet.html_i18n !== undefined) {
const langs = Object.keys(tweet.html_i18n);
for (const lang of langs) {
tweet.html_i18n[lang] = tweet.html_i18n[lang].replace('#' + tagObj.text, `<a href="https://twitter.com/hashtag/{tagObj.text}" class="hashtag">#${tagObj.text}</a>`);
}
}
});
},

Expand All @@ -41,21 +50,41 @@ var entityProcessors = {
user_mentions (users, tweet) {
users.forEach((userObj) => {
var regex = new RegExp('@' + userObj.screen_name, 'gi' );
tweet.html = tweet.html.replace(regex, `<a href="https://twitter.com/${userObj.screen_name}" class="mention">@${userObj.screen_name}</a>`);
const mention_html = `<a href="https://twitter.com/${userObj.screen_name}" class="mention">@${userObj.screen_name}</a>`;
tweet.html = tweet.html.replace(regex, mention_html);
if (tweet.html_i18n !== undefined) {
const langs = Object.keys(tweet.html_i18n);
for (const lang of langs) {
tweet.html_i18n[lang] = tweet.html_i18n[lang].replace(regex, mention_html);
}
}
});
},

urls (urls, tweet) {
urls.forEach(({ url, expanded_url, display_url }) => {
const isQT = tweet.quoted_status_permalink && url === tweet.quoted_status_permalink.url;
const className = isQT ? 'quoted-tweet' : 'url';
tweet.html = tweet.html.replace(url, isQT ? '' : `<a href="${expanded_url}" class="${className}">${display_url}</a>`);
const fancy_html = `<a href="${expanded_url}" class="${className}">${display_url}</a>`;
tweet.html = tweet.html.replace(url, isQT ? '' : fancy_html);
if (tweet.html_i18n !== undefined) {
const langs = Object.keys(tweet.html_i18n);
for (const lang of langs) {
tweet.html_i18n[lang] = tweet.html_i18n[lang].replace(url, isQT ? '' : fancy_html);
}
}
});
},

media (media, tweet) {
media.forEach((m) => {
tweet.html = tweet.html.replace(m.url, '');
if (tweet.html_i18n !== undefined) {
const langs = Object.keys(tweet.html_i18n);
for (const lang of langs) {
tweet.html_i18n[lang] = tweet.html_i18n[lang].replace(m.url, '');
}
}
let width, height;

if (has(m, 'video_info.aspect_ratio')) {
Expand Down Expand Up @@ -90,6 +119,13 @@ var entityProcessors = {
module.exports = exports = function (tweets) {
return tweets.length ? tweets.map(parseTweet) : parseTweet(tweets);

function parseStep1 (text) {
return text.split(/(\r\n|\n\r|\r|\n)+/)
.map((s) => s.trim() && '<p>' + s + '</p>')
.filter(Boolean)
.join('');
}

function parseTweet (tweet) {
// clone the tweet so we're not altering the original
tweet = JSON.parse(JSON.stringify(tweet));
Expand All @@ -105,11 +141,29 @@ module.exports = exports = function (tweets) {
];

// Copying text value to a new property html. The final output will be set to this property
tweet.html = (tweet.full_text || tweet.text)
.split(/(\r\n|\n\r|\r|\n)+/)
.map((s) => s.trim() && '<p>' + s + '</p>')
.filter(Boolean)
.join('');
if (tweet.full_text !== undefined || tweet.text !== undefined) {
tweet.html = parseStep1(tweet.full_text || tweet.text);
}
if (tweet.html_i18n === undefined) {
tweet.html_i18n = {};
}
if (tweet.full_text_i18n === undefined) {
tweet.full_text_i18n = {};
}

// Find which languages we actually have translations for
const possible_langs = Object.keys(tweet.full_text_i18n);
const langs = [];
for (const lang of possible_langs) {
const trimed = tweet.full_text_i18n[lang].trim();
if (trimed.length > 0) {
langs.push(lang);
}
}

for (const lang of langs) {
tweet.html_i18n[lang] = parseStep1(tweet.full_text_i18n[lang]);
}

if (tweet.quoted_status) {
tweet.quoted_status = parseTweet(tweet.quoted_status);
Expand Down Expand Up @@ -160,8 +214,15 @@ module.exports = exports = function (tweets) {
}

// Process Emoji's
tweet.html = twemoji.parse(tweet.html);
tweet.user.name_html = twemoji.parse(tweet.user.name);
if (tweet.html) {
tweet.html = twemoji.parse(tweet.html);
}
for (const lang of langs) {
tweet.html_i18n[lang] = twemoji.parse(tweet.html_i18n[lang]);
}
if (tweet.user !== undefined && tweet.user.name !== undefined) {
tweet.user.name_html = twemoji.parse(tweet.user.name);
}

return deepPick(tweet, schema);
}
Expand Down
Loading

0 comments on commit 93743fc

Please sign in to comment.