stable-diffusion-webui-loca.../test/segment-check.ts

44 lines
1.1 KiB
TypeScript
Raw Blame History

This file contains ambiguous Unicode characters!

This file contains ambiguous Unicode characters that may be confused with others in your current locale. If your use case is intentional and legitimate, you can safely ignore this warning. Use the Escape button to highlight these characters.

import { async as FastGlob } from '@bluelovers/fast-glob/bluebird';
import { join } from 'upath2';
import { __ROOT } from './__root';
import { outputJSON, readFile } from 'fs-extra';
import { initIdeaSegmentText } from '../src/lib/segment';
import { debug_token } from 'novel-segment/lib/util/index';
import { printPrettyDiff } from '@novel-segment/pretty-diff';
import { chalkByConsole, console } from 'debug-color2';
const cwd = join(__ROOT, 'test', 'temp');
/**
* 人工檢查分詞系統產生的結果列表來了解為什麼沒有發生預期的轉換
*/
export default FastGlob<string>([
'*.txt',
], {
cwd,
})
.mapSeries(async (file) =>
{
console.info(file);
const input = await readFile(join(cwd, file)).then(buf => buf.toString());
return initIdeaSegmentText()
.then(seg => seg.doSegment(input))
.tap(result => debug_token(result))
.tap(result =>
{
console.gray("==================");
console.log(result.map(w => w.w).join(''));
printPrettyDiff(input, result);
console.gray("==================");
})
.tap(result => outputJSON(join(cwd, file + '.json'), result, {
spaces: 2,
}))
;
})
;