-
Notifications
You must be signed in to change notification settings - Fork 11
Commit
This commit does not belong to any branch on this repository, and may belong to a fork outside of the repository.
- Loading branch information
1 parent
67dfedd
commit 0680529
Showing
12 changed files
with
17,013 additions
and
11,176 deletions.
There are no files selected for viewing
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -0,0 +1 @@ | ||
**/node_modules/ |
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -0,0 +1,66 @@ | ||
import SpellingVariations from "./lib/index.js"; | ||
import mysql from "mysql2"; | ||
|
||
// 创建数据库连接 | ||
const connection = mysql.createConnection({ | ||
host: "127.0.0.1", | ||
user: "root", | ||
password: "root", | ||
database: "netem", | ||
}); | ||
|
||
// 连接到数据库 | ||
connection.connect((err) => { | ||
if (err) { | ||
console.error("无法连接到数据库:", err); | ||
return; | ||
} | ||
console.log("已成功连接到数据库"); | ||
}); | ||
|
||
// 执行数据库查询以获取数据 | ||
connection.query("SELECT word FROM vocabulary", (err, results) => { | ||
if (err) { | ||
console.error("查询数据库时出错:", err); | ||
return; | ||
} | ||
|
||
// 处理检索到的数据 | ||
const records = results; // 此处假设您的数据库表包含名为"word"的列 | ||
|
||
// 遍历记录并进行拼写变体分析 | ||
// 处理检索到的数据 | ||
for (const record of records) { | ||
const word = record.word; // 获取单词字段的值 | ||
const result = new SpellingVariations(word).analyze(); | ||
if (result.hasVariations) { | ||
const uniqueVariantsSet = new Set( | ||
result.variations.filter((variant) => variant !== word) | ||
); // 使用Set来确保唯一性 | ||
const uniqueVariants = Array.from(uniqueVariantsSet).join(", "); | ||
const updateQuery = `UPDATE \`vocabulary\` SET \`variant\` = ? WHERE \`word\` = ?`; | ||
const query = connection.query( | ||
updateQuery, | ||
[uniqueVariants, word], | ||
(updateErr, updateResults) => { | ||
// console.log(updateResults) | ||
if (updateErr) { | ||
console.error(`更新单词 ${word} 的变体时出错: ${updateErr}`); | ||
} | ||
} | ||
); | ||
console.log('sql是',query.sql) | ||
} else { | ||
continue; | ||
} | ||
} | ||
|
||
// 关闭数据库连接 | ||
connection.end((err) => { | ||
if (err) { | ||
console.error("关闭数据库连接时出错:", err); | ||
} else { | ||
console.log("已成功关闭数据库连接"); | ||
} | ||
}); | ||
}); |
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -1,106 +1,161 @@ | ||
const bydictionary = require('./bydictionary.json'); | ||
const bypattern = require('./bypattern.js'); | ||
|
||
const spellingVariations = function (word) { | ||
this.data = analyse(word); | ||
}; | ||
|
||
// @return {Number} how common this variation in the UK's texts (1-0) | ||
spellingVariations.prototype.scoreUK = function() {return this.data.scoreUK;}; | ||
// @return {Number} how common this variation in the US's texts (1-0) | ||
spellingVariations.prototype.scoreUS = function() {return this.data.scoreUS;}; | ||
// @return {Boolean} the word has variations | ||
spellingVariations.prototype.hasVariations = function() {return this.data.hasVariations;}; | ||
// @return {Array} US variations of the word | ||
spellingVariations.prototype.USVariations = function() {return this.data.USVariations;}; | ||
// @return {Array} UK variations of the word | ||
spellingVariations.prototype.UKVariations = function() {return this.data.UKVariations;}; | ||
// @return {String} UK's preferred variation | ||
spellingVariations.prototype.UKPrefered = function() {return this.data.UKPrefered;}; | ||
// @return {String} US's preferred variation | ||
spellingVariations.prototype.USPrefered = function() {return this.data.USPrefered;}; | ||
// @return {Array} All of the word's variations | ||
spellingVariations.prototype.variations = function() {return this.data.variations;}; | ||
// @return {String} UK and US common variation | ||
spellingVariations.prototype.commonVariation = function() {return this.data.commonVariation;}; | ||
// @return {String} converts the word spelling to it's UK variant | ||
spellingVariations.prototype.toUK = function() {return this.data.UKPrefered || this.data.word;}; | ||
// @return {String} converts the word spelling to it's US variant | ||
spellingVariations.prototype.toUS = function() {return this.data.USPrefered || this.data.word;}; | ||
// @return {Object} all the info above | ||
spellingVariations.prototype.analyse = function() {return this.data;}; | ||
// a us alias for the above function :) | ||
spellingVariations.prototype.analyze = function() {return this.data;}; | ||
|
||
|
||
/** | ||
* | ||
* This little guy here is actually the one who does all the heavy | ||
* lifting of finding the variations and the class and such.. | ||
* | ||
**/ | ||
function analyse(word) { | ||
|
||
word = (word || "").toLowerCase(); | ||
|
||
const result = { | ||
word, | ||
scoreUK:-1, | ||
scoreUS:-1, | ||
hasVariations:false, | ||
UKPrefered:word, | ||
USPrefered:word, | ||
commonVariation:word, | ||
UKVariations:[], | ||
USVariations:[], | ||
variations:[], | ||
analyse:analyse, | ||
analyze:analyse | ||
}; | ||
|
||
var resultArr = []; | ||
var dictionaryEntry = bydictionary[word]; | ||
var patternEntry = bypattern(word); | ||
if(dictionaryEntry) resultArr = dictionaryEntry.split("|"); | ||
else if(patternEntry) resultArr = patternEntry; | ||
else return result; | ||
|
||
// resultArr reference: | ||
// 0: UK1 4: US1 | ||
// 1: UK2 5: US2 | ||
// 2: UK3 6: US3 | ||
// 3: UK4 7: US4 8:UKUS | ||
|
||
|
||
result.hasVariations = true; | ||
result.variations = filterOut(resultArr,word); | ||
result.UKPrefered = resultArr[0]; | ||
result.USPrefered = resultArr[4]; | ||
result.commonVariation = resultArr[8] || ""; | ||
result.UKVariations = resultArr.filter((e,i)=>e&&(i<4||i===8)&&e!==word); | ||
result.USVariations = resultArr.filter((e,i)=>e&&(i>3||i===8)&&e!==word); | ||
|
||
if(resultArr.indexOf(word) === 8) { | ||
result.scoreUK = 0.87; | ||
result.scoreUS = 0.87; | ||
} | ||
|
||
else { | ||
var UKi = resultArr.slice(0,4).indexOf(word); | ||
var USi = resultArr.slice(4,8).indexOf(word); | ||
|
||
if(UKi === -1) result.scoreUK = 0; | ||
else result.scoreUK = (4-UKi)*0.25; | ||
|
||
if(USi === -1) result.scoreUS = 0; | ||
else result.scoreUS = (4-USi)*0.25; | ||
} | ||
|
||
return result; | ||
import bypattern from './bypattern.js'; | ||
import fs from 'fs'; | ||
|
||
import path from 'path'; | ||
import { exit } from 'process'; | ||
import { fileURLToPath } from 'url'; | ||
|
||
const __filename = fileURLToPath(import.meta.url); | ||
|
||
const __dirname = path.dirname(__filename); | ||
|
||
function readJsonFile(filePath) { | ||
try { | ||
const data = fs.readFileSync(filePath, 'utf8'); | ||
return JSON.parse(data); | ||
} catch (error) { | ||
console.error(`Error reading JSON file: ${error}`); | ||
exit(-1); | ||
} | ||
} | ||
|
||
function filterOut(arr,word){ | ||
return arr.filter((x)=>x&&x!==word); | ||
const bydictionary = readJsonFile(path.join(__dirname,'bydictionary.json')); | ||
|
||
class SpellingVariations { | ||
constructor(word) { | ||
this.data = this.analyse(word); | ||
} | ||
|
||
// @return {Number} how common this variation is in the UK's texts (1-0) | ||
scoreUK() { | ||
return this.data.scoreUK; | ||
} | ||
|
||
// @return {Number} how common this variation is in the US's texts (1-0) | ||
scoreUS() { | ||
return this.data.scoreUS; | ||
} | ||
|
||
// @return {Boolean} the word has variations | ||
hasVariations() { | ||
return this.data.hasVariations; | ||
} | ||
|
||
// @return {Array} US variations of the word | ||
USVariations() { | ||
return this.data.USVariations; | ||
} | ||
|
||
// @return {Array} UK variations of the word | ||
UKVariations() { | ||
return this.data.UKVariations; | ||
} | ||
|
||
// @return {String} UK's preferred variation | ||
UKPreferred() { | ||
return this.data.UKPreferred; | ||
} | ||
|
||
// @return {String} US's preferred variation | ||
USPreferred() { | ||
return this.data.USPreferred; | ||
} | ||
|
||
// @return {Array} All of the word's variations | ||
variations() { | ||
return this.data.variations; | ||
} | ||
|
||
// @return {String} UK and US common variation | ||
commonVariation() { | ||
return this.data.commonVariation; | ||
} | ||
|
||
// @return {String} converts the word spelling to its UK variant | ||
toUK() { | ||
return this.data.UKPreferred || this.data.word; | ||
} | ||
|
||
// @return {String} converts the word spelling to its US variant | ||
toUS() { | ||
return this.data.USPreferred || this.data.word; | ||
} | ||
|
||
// @return {Object} all the info above | ||
analyse() { | ||
return this.data; | ||
} | ||
|
||
// a US alias for the above function :) | ||
analyze() { | ||
return this.data; | ||
} | ||
|
||
/** | ||
* | ||
* This little guy here is actually the one who does all the heavy | ||
* lifting of finding the variations and the class and such.. | ||
* | ||
**/ | ||
analyse(word) { | ||
word = (word || "").toLowerCase(); | ||
|
||
const result = { | ||
word, | ||
scoreUK: -1, | ||
scoreUS: -1, | ||
hasVariations: false, | ||
UKPreferred: word, | ||
USPreferred: word, | ||
commonVariation: word, | ||
UKVariations: [], | ||
USVariations: [], | ||
variations: [], | ||
analyse: this.analyse, | ||
analyze: this.analyse | ||
}; | ||
|
||
var resultArr = []; | ||
var dictionaryEntry = bydictionary[word]; | ||
var patternEntry = bypattern(word); | ||
if (dictionaryEntry) resultArr = dictionaryEntry.split("|"); | ||
else if (patternEntry) resultArr = patternEntry; | ||
else return result; | ||
|
||
// resultArr reference: | ||
// 0: UK1 4: US1 | ||
// 1: UK2 5: US2 | ||
// 2: UK3 6: US3 | ||
// 3: UK4 7: US4 8:UKUS | ||
|
||
result.hasVariations = true; | ||
result.variations = this.filterOut(resultArr, word); | ||
result.UKPreferred = resultArr[0]; | ||
result.USPreferred = resultArr[4]; | ||
result.commonVariation = resultArr[8] || ""; | ||
result.UKVariations = resultArr.filter((e, i) => e && (i < 4 || i === 8) && e !== word); | ||
result.USVariations = resultArr.filter((e, i) => e && (i > 3 || i === 8) && e !== word); | ||
|
||
if (resultArr.indexOf(word) === 8) { | ||
result.scoreUK = 0.87; | ||
result.scoreUS = 0.87; | ||
} else { | ||
var UKi = resultArr.slice(0, 4).indexOf(word); | ||
var USi = resultArr.slice(4, 8).indexOf(word); | ||
|
||
if (UKi === -1) result.scoreUK = 0; | ||
else result.scoreUK = (4 - UKi) * 0.25; | ||
|
||
if (USi === -1) result.scoreUS = 0; | ||
else result.scoreUS = (4 - USi) * 0.25; | ||
} | ||
|
||
return result; | ||
} | ||
|
||
filterOut(arr, word) { | ||
return arr.filter((x) => x && x !== word); | ||
} | ||
} | ||
|
||
module.exports = spellingVariations; | ||
export default SpellingVariations; |
Oops, something went wrong.