feat: add variations (#14)

exam-data · Sep 13, 2023 · 0680529 · 0680529
1 parent 67dfedd
commit 0680529
Show file tree

Hide file tree

Showing 12 changed files with 17,013 additions and 11,176 deletions.
diff --git a/README.md b/README.md
@@ -8,7 +8,7 @@
 
 高频词汇的释义经过了人工初步校对，其他单词选取使用频率总和大于 50% 的释义（数据来自 [the little dict](http://louischeung.top:225/mdict%E8%AF%8D%E5%85%B8%E5%8C%85/The%20Little%20Dict/)），可以保证一定的准确性。减轻不必要的机械记忆负担。
 
-每个单词有异形词（即考纲当中有多种写法的单词）的，计划将其在后面列出，以保证原始数据的准确性。见[这个 PR](https://github.com/awxiaoxian2020/NETEMVocabulary/pull/14)。
+每个单词有异形词（即考纲当中有多种写法的单词）的，一并列出，以保证原始数据的准确性。目前根据[这个数据](https://github.com/awxiaoxian2020/spelling-variations/blob/dev/src/bydictionary.json)进行了初步填充。有空再和考纲校对。
 
 目前正在开发对应的跨端小程序，见 [develop 分支](https://github.com/awxiaoxian2020/NETEMVocabulary/tree/develop)。
 

diff --git a/scripts/.gitignore b/scripts/.gitignore
@@ -0,0 +1 @@
+**/node_modules/
diff --git a/scripts/spelling-variations/index.js b/scripts/spelling-variations/index.js
@@ -0,0 +1,66 @@
+import SpellingVariations from "./lib/index.js";
+import mysql from "mysql2";
+
+// 创建数据库连接
+const connection = mysql.createConnection({
+  host: "127.0.0.1",
+  user: "root",
+  password: "root",
+  database: "netem",
+});
+
+// 连接到数据库
+connection.connect((err) => {
+  if (err) {
+    console.error("无法连接到数据库:", err);
+    return;
+  }
+  console.log("已成功连接到数据库");
+});
+
+// 执行数据库查询以获取数据
+connection.query("SELECT word FROM vocabulary", (err, results) => {
+  if (err) {
+    console.error("查询数据库时出错:", err);
+    return;
+  }
+
+  // 处理检索到的数据
+  const records = results; // 此处假设您的数据库表包含名为"word"的列
+
+  // 遍历记录并进行拼写变体分析
+  // 处理检索到的数据
+  for (const record of records) {
+    const word = record.word; // 获取单词字段的值
+    const result = new SpellingVariations(word).analyze();
+    if (result.hasVariations) {
+      const uniqueVariantsSet = new Set(
+        result.variations.filter((variant) => variant !== word)
+      ); // 使用Set来确保唯一性
+      const uniqueVariants = Array.from(uniqueVariantsSet).join(", ");
+      const updateQuery = `UPDATE \`vocabulary\` SET \`variant\` = ? WHERE \`word\` = ?`;
+      const query = connection.query(
+        updateQuery,
+        [uniqueVariants, word],
+        (updateErr, updateResults) => {
+          // console.log(updateResults)
+          if (updateErr) {
+            console.error(`更新单词 ${word} 的变体时出错: ${updateErr}`);
+          }
+        }
+      );
+      console.log('sql是',query.sql)
+    } else {
+      continue;
+    }
+  }
+
+  // 关闭数据库连接
+  connection.end((err) => {
+    if (err) {
+      console.error("关闭数据库连接时出错:", err);
+    } else {
+      console.log("已成功关闭数据库连接");
+    }
+  });
+});
diff --git a/scripts/spelling-variations/lib/bypattern.js b/scripts/spelling-variations/lib/bypattern.js
@@ -114,12 +114,12 @@ const patterns = [
 	},
 ];
 
-module.exports = function (word) {
-	var pattern = patterns.find(pattern=>pattern.regex.test(word));
-	if(!pattern) return null;
+export default function(word) {
+	var pattern = patterns.find(pattern => pattern.regex.test(word));
+	if (!pattern) return null;
 	var result = [];
-	var replacement = word.replace(pattern.regex,pattern.replacementString);
-	pattern.originalIndex.forEach(index=>result[index] = word);
-	pattern.replacementIndex.forEach(index=>result[index] = replacement);
+	var replacement = word.replace(pattern.regex, pattern.replacementString);
+	pattern.originalIndex.forEach(index => result[index] = word);
+	pattern.replacementIndex.forEach(index => result[index] = replacement);
 	return result;
-};
+  }
diff --git a/scripts/spelling-variations/lib/index.js b/scripts/spelling-variations/lib/index.js
@@ -1,106 +1,161 @@
-const bydictionary = require('./bydictionary.json');
-const bypattern = require('./bypattern.js');
-
-const spellingVariations = function (word) {
-	this.data = analyse(word);
-};
-
-// @return {Number} how common this variation in the UK's texts (1-0)
-spellingVariations.prototype.scoreUK = function() {return this.data.scoreUK;};
-// @return {Number} how common this variation in the US's texts (1-0)
-spellingVariations.prototype.scoreUS = function() {return this.data.scoreUS;};
-// @return {Boolean} the word has variations
-spellingVariations.prototype.hasVariations = function() {return this.data.hasVariations;};
-// @return {Array} US variations of the word
-spellingVariations.prototype.USVariations = function() {return this.data.USVariations;};
-// @return {Array} UK variations of the word
-spellingVariations.prototype.UKVariations = function() {return this.data.UKVariations;};
-// @return {String} UK's preferred variation
-spellingVariations.prototype.UKPrefered = function() {return this.data.UKPrefered;};
-// @return {String} US's preferred variation
-spellingVariations.prototype.USPrefered = function() {return this.data.USPrefered;};
-// @return {Array} All of the word's variations
-spellingVariations.prototype.variations = function() {return this.data.variations;};
-// @return {String} UK and US common variation
-spellingVariations.prototype.commonVariation = function() {return this.data.commonVariation;};
-// @return {String} converts the word spelling to it's UK variant
-spellingVariations.prototype.toUK = function() {return this.data.UKPrefered || this.data.word;};
-// @return {String} converts the word spelling to it's US variant
-spellingVariations.prototype.toUS = function() {return this.data.USPrefered || this.data.word;};
-// @return {Object} all the info above
-spellingVariations.prototype.analyse = function() {return this.data;};
-// a us alias for the above function :)
-spellingVariations.prototype.analyze = function() {return this.data;};
-
-
-/**
- * 
- * This little guy here is actually the one who does all the heavy
- * lifting of finding the variations and the class and such..
- * 
-**/
-function analyse(word) {
-
-	word = (word || "").toLowerCase();
-
-	const result = {
-		word,
-		scoreUK:-1,
-		scoreUS:-1,
-		hasVariations:false,
-		UKPrefered:word,
-		USPrefered:word,
-		commonVariation:word,
-		UKVariations:[],
-		USVariations:[],
-		variations:[],
-		analyse:analyse,
-		analyze:analyse
-	};
-
-	var resultArr = [];
-	var dictionaryEntry = bydictionary[word];
-	var patternEntry = bypattern(word);
-	if(dictionaryEntry) resultArr = dictionaryEntry.split("|");
-	else if(patternEntry) resultArr = patternEntry;
-	else return result;
-
-	// resultArr reference:
-	// 0: UK1		4: US1
-	// 1: UK2		5: US2
-	// 2: UK3		6: US3
-	// 3: UK4		7: US4		8:UKUS
-
-
-	result.hasVariations = true;
-	result.variations = filterOut(resultArr,word);
-	result.UKPrefered = resultArr[0];
-	result.USPrefered = resultArr[4];
-	result.commonVariation = resultArr[8] || "";
-	result.UKVariations = resultArr.filter((e,i)=>e&&(i<4||i===8)&&e!==word);
-	result.USVariations = resultArr.filter((e,i)=>e&&(i>3||i===8)&&e!==word);
-
-	if(resultArr.indexOf(word) === 8) {
-		result.scoreUK = 0.87;
-		result.scoreUS = 0.87;
-	}
-
-	else {
-		var UKi = resultArr.slice(0,4).indexOf(word);
-		var USi = resultArr.slice(4,8).indexOf(word);
-
-		if(UKi === -1) result.scoreUK = 0;
-		else result.scoreUK = (4-UKi)*0.25;
-
-		if(USi === -1) result.scoreUS = 0;
-		else result.scoreUS = (4-USi)*0.25;
-	}
-
-	return result;
+import bypattern from './bypattern.js';
+import fs from 'fs';
+
+import path from 'path';
+import { exit } from 'process';
+import { fileURLToPath } from 'url';
+
+const __filename = fileURLToPath(import.meta.url);
+
+const __dirname = path.dirname(__filename);
+
+function readJsonFile(filePath) {
+  try {
+    const data = fs.readFileSync(filePath, 'utf8');
+    return JSON.parse(data);
+  } catch (error) {
+    console.error(`Error reading JSON file: ${error}`);
+    exit(-1);
+  }
 }
 
-function filterOut(arr,word){
-	return arr.filter((x)=>x&&x!==word);
+const bydictionary = readJsonFile(path.join(__dirname,'bydictionary.json'));
+
+class SpellingVariations {
+  constructor(word) {
+    this.data = this.analyse(word);
+  }
+
+  // @return {Number} how common this variation is in the UK's texts (1-0)
+  scoreUK() {
+    return this.data.scoreUK;
+  }
+
+  // @return {Number} how common this variation is in the US's texts (1-0)
+  scoreUS() {
+    return this.data.scoreUS;
+  }
+
+  // @return {Boolean} the word has variations
+  hasVariations() {
+    return this.data.hasVariations;
+  }
+
+  // @return {Array} US variations of the word
+  USVariations() {
+    return this.data.USVariations;
+  }
+
+  // @return {Array} UK variations of the word
+  UKVariations() {
+    return this.data.UKVariations;
+  }
+
+  // @return {String} UK's preferred variation
+  UKPreferred() {
+    return this.data.UKPreferred;
+  }
+
+  // @return {String} US's preferred variation
+  USPreferred() {
+    return this.data.USPreferred;
+  }
+
+  // @return {Array} All of the word's variations
+  variations() {
+    return this.data.variations;
+  }
+
+  // @return {String} UK and US common variation
+  commonVariation() {
+    return this.data.commonVariation;
+  }
+
+  // @return {String} converts the word spelling to its UK variant
+  toUK() {
+    return this.data.UKPreferred || this.data.word;
+  }
+
+  // @return {String} converts the word spelling to its US variant
+  toUS() {
+    return this.data.USPreferred || this.data.word;
+  }
+
+  // @return {Object} all the info above
+  analyse() {
+    return this.data;
+  }
+
+  // a US alias for the above function :)
+  analyze() {
+    return this.data;
+  }
+
+  /**
+   * 
+   * This little guy here is actually the one who does all the heavy
+   * lifting of finding the variations and the class and such..
+   * 
+  **/
+  analyse(word) {
+    word = (word || "").toLowerCase();
+
+    const result = {
+      word,
+      scoreUK: -1,
+      scoreUS: -1,
+      hasVariations: false,
+      UKPreferred: word,
+      USPreferred: word,
+      commonVariation: word,
+      UKVariations: [],
+      USVariations: [],
+      variations: [],
+      analyse: this.analyse,
+      analyze: this.analyse
+    };
+
+    var resultArr = [];
+    var dictionaryEntry = bydictionary[word];
+    var patternEntry = bypattern(word);
+    if (dictionaryEntry) resultArr = dictionaryEntry.split("|");
+    else if (patternEntry) resultArr = patternEntry;
+    else return result;
+
+    // resultArr reference:
+    // 0: UK1		4: US1
+    // 1: UK2		5: US2
+    // 2: UK3		6: US3
+    // 3: UK4		7: US4		8:UKUS
+
+    result.hasVariations = true;
+    result.variations = this.filterOut(resultArr, word);
+    result.UKPreferred = resultArr[0];
+    result.USPreferred = resultArr[4];
+    result.commonVariation = resultArr[8] || "";
+    result.UKVariations = resultArr.filter((e, i) => e && (i < 4 || i === 8) && e !== word);
+    result.USVariations = resultArr.filter((e, i) => e && (i > 3 || i === 8) && e !== word);
+
+    if (resultArr.indexOf(word) === 8) {
+      result.scoreUK = 0.87;
+      result.scoreUS = 0.87;
+    } else {
+      var UKi = resultArr.slice(0, 4).indexOf(word);
+      var USi = resultArr.slice(4, 8).indexOf(word);
+
+      if (UKi === -1) result.scoreUK = 0;
+      else result.scoreUK = (4 - UKi) * 0.25;
+
+      if (USi === -1) result.scoreUS = 0;
+      else result.scoreUS = (4 - USi) * 0.25;
+    }
+
+    return result;
+  }
+
+  filterOut(arr, word) {
+    return arr.filter((x) => x && x !== word);
+  }
 }
 
-module.exports = spellingVariations;
+export default SpellingVariations;