feat: segment

dev
bluelovers 2023-08-08 06:40:17 +08:00
parent b470aca534
commit fedfc96baa
5 changed files with 125 additions and 2 deletions

View File

@ -6,7 +6,7 @@ on:
branches: branches:
- dev - dev
paths-ignore: paths-ignore:
- 'test/**' # - 'test/**'
- '.run/**' - '.run/**'
- '.github/**' - '.github/**'
- '/*.*' - '/*.*'
@ -79,8 +79,9 @@ jobs:
with: with:
github_token: ${{ secrets.GITHUB_TOKEN }} github_token: ${{ secrets.GITHUB_TOKEN }}
branch: ${{ github.ref }} branch: ${{ github.ref }}
- uses: casperdcl/push-dir@v1 - name: Push output
if: success() if: success()
uses: casperdcl/push-dir@v1
with: with:
message: "build: static localizations" message: "build: static localizations"
branch: main branch: main

0
src/dict/synonym.txt Normal file
View File

0
src/dict/table.txt Normal file
View File

105
src/lib/segment.ts Normal file
View File

@ -0,0 +1,105 @@
import Bluebird from 'bluebird';
import { getSegment, stringify } from 'novel-segment-cli';
import { EnumDictDatabase } from '@novel-segment/types';
import { Segment } from 'novel-segment/lib';
import { cn2tw_min, tw2cn_min } from '@lazy-cjk/zh-convert/min';
import { load as loadSynonym } from '@novel-segment/loaders/segment/synonym';
import { load as loadTable } from '@novel-segment/loaders/segment/index';
import { join } from 'path';
import { __ROOT } from '../../test/__root';
const __dict_table_txt = join(__ROOT, 'src', 'dict', 'table.txt');
const __dict_synonym_txt = join(__ROOT, 'src', 'dict', 'synonym.txt');
let inited: Segment;
export function initIdeaSegmentText()
{
return Bluebird.resolve()
.then(async () =>
{
if (!inited)
{
inited = await getSegment({
//disableCache: true,
optionsSegment: {
nodeNovelMode: true,
},
}).then(async (segment) =>
{
const db_dict = segment.getDictDatabase(EnumDictDatabase.TABLE);
const db_synonym = segment.getDictDatabase(EnumDictDatabase.SYNONYM);
const autoCjk = db_dict.options.autoCjk;
db_dict.options.autoCjk = true;
/*
db_dict
.add(['選項卡', 0x100000, 0])
.add(['標籤頁', 0x100000, 0])
.add(['標簽頁', 0x100000, 0])
;
*/
/*
db_dict
.remove('复上')
.remove('复分析')
.remove('为重')
.remove('出新')
;
*/
await loadTable(__dict_table_txt)
.each(data =>
{
return db_dict.add(data as any);
})
;
db_dict.options.autoCjk = autoCjk;
await loadSynonym(__dict_synonym_txt)
.each(data =>
{
return db_synonym.add(data as any, false, true);
})
;
/*
db_synonym
.add(['頁籤', '選項卡', '標籤頁', '標簽頁', '选项卡', '标签页', '标签页'])
.add(['視窗', '窗口', '窗口'])
.add(['預設', '默認', '默认'])
.add(['列印', '打印', '打印'])
.add(['貼上', '粘貼', '粘贴'])
.add(['剪貼簿', '剪貼板', '剪贴板'])
.add(['剪下', '剪切', '剪切'])
.add(['註釋', '注釋', '注释'])
.add(['唯讀', '只讀', '只读'])
.add(['選單', '菜單', '菜单'])
.add(['日誌', '日志'])
.add(['註解', '注解', '注解'])
.add(['磁碟', '磁盤', '磁盘'])
;
*/
return inited = segment;
})
}
return inited
})
}
export function processTextSync(input: string, opts?: {
toCN?: boolean,
noSeg?: boolean,
})
{
opts ??= {};
let text = opts.noSeg ? input : stringify(inited.doSegment(input));
text = (opts.toCN ? tw2cn_min : cn2tw_min)(text);
return text
}

View File

@ -1,6 +1,7 @@
import { outputJSON, readJSON } from 'fs-extra'; import { outputJSON, readJSON } from 'fs-extra';
import { join } from 'path'; import { join } from 'path';
import { __ROOT } from '../__root'; import { __ROOT } from '../__root';
import { initIdeaSegmentText, processTextSync } from '../../src/lib/segment';
export default Promise.all([ export default Promise.all([
readJSON(join(__ROOT, 'localizations', 'zh_TW.json')), readJSON(join(__ROOT, 'localizations', 'zh_TW.json')),
@ -10,6 +11,7 @@ export default Promise.all([
]) ])
.then(ls => .then(ls =>
{ {
console.log(`merge localizations`);
let tw = ls[2]; let tw = ls[2];
@ -29,10 +31,25 @@ export default Promise.all([
}) })
.then(async (json) => .then(async (json) =>
{ {
console.log(`output zh_Hant.json`);
await outputJSON(join(__ROOT, 'localizations', 'zh_Hant.json'), json, { await outputJSON(join(__ROOT, 'localizations', 'zh_Hant.json'), json, {
spaces: 2, spaces: 2,
}); });
console.log(`init Segment`);
await initIdeaSegmentText();
console.log(`process zh_Hant.json`);
for (const key of Object.keys(json))
{
json[key] = processTextSync(json[key]);
}
console.log(`build zh_Hant.json`);
await outputJSON(join(__ROOT, 'output', 'localizations', 'zh_Hant.json'), json, { await outputJSON(join(__ROOT, 'output', 'localizations', 'zh_Hant.json'), json, {
spaces: 2, spaces: 2,
}); });