diff --git a/src/atoms/data.ts b/src/atoms/data.ts index bd85958..3ee88c1 100644 --- a/src/atoms/data.ts +++ b/src/atoms/data.ts @@ -1,6 +1,13 @@ import { atom, useAtomValue } from "jotai"; import { primitiveRepertoireAtom } from "./constants"; -import { CustomReadings, isPUA } from "~/lib"; +import { + CharacterSetSpecifier, + CustomReadings, + PrimitiveCharacter, + isPUA, + isValidCJKBasicChar, + isValidCJKChar, +} from "~/lib"; import { recursiveRenderCompound } from "~/lib"; import { dataAtom } from "."; import { focusAtom } from "jotai-optics"; @@ -9,6 +16,9 @@ import { CustomGlyph } from "~/lib"; import { determine } from "~/lib"; import { classifier } from "~/lib"; +export const characterSetAtom = focusAtom(dataAtom, (o) => + o.prop("character_set").valueOr("general" as CharacterSetSpecifier), +); export const userRepertoireAtom = focusAtom(dataAtom, (o) => o.prop("repertoire").valueOr({} as PrimitiveRepertoire), ); @@ -25,6 +35,25 @@ export const userTagsAtom = focusAtom(dataAtom, (o) => o.prop("tags").valueOr([] as string[]), ); +export const charactersAtom = atom((get) => { + const primitiveRepertoire = get(primitiveRepertoireAtom); + const characterSet = get(characterSetAtom); + const filters: Record< + CharacterSetSpecifier, + (k: string, v: PrimitiveCharacter) => boolean + > = { + general: (_, v) => v.tygf > 0, + basic: (k, v) => v.tygf > 0 || isValidCJKBasicChar(k), + extended: (k, v) => v.tygf > 0 || isValidCJKChar(k), + }; + + const filter = filters[characterSet]; + const characters = Object.entries(primitiveRepertoire) + .filter(([k, v]) => filter(k, v)) + .map(([k]) => k); + return characters; +}); + export const allRepertoireAtom = atom((get) => { const repertoire = get(primitiveRepertoireAtom); const userRepertoire = get(userRepertoireAtom); @@ -53,10 +82,11 @@ export const glyphAtom = atom((get) => { const result = new Map(); for (const [char, { glyph }] of Object.entries(repertoire)) { if (glyph === undefined) continue; + if (result.has(char)) continue; if (glyph.type === "basic_component") { result.set(char, glyph.strokes); } else { - const svgglyph = recursiveRenderCompound(glyph, repertoire); + const svgglyph = recursiveRenderCompound(glyph, repertoire, result); if (svgglyph instanceof Error) continue; result.set(char, svgglyph); } diff --git a/src/components/Debugger.tsx b/src/components/Debugger.tsx index 2da6c35..37ffe10 100644 --- a/src/components/Debugger.tsx +++ b/src/components/Debugger.tsx @@ -3,7 +3,13 @@ import { ColumnsType } from "antd/es/table"; import { useAtom, useAtomValue } from "jotai"; import { atomWithStorage } from "jotai/utils"; import { useMemo, useState } from "react"; -import { DictEntry, EncodeResult, configAtom, repertoireAtom } from "~/atoms"; +import { + DictEntry, + EncodeResult, + charactersAtom, + configAtom, + repertoireAtom, +} from "~/atoms"; import { encodeResultAtom } from "~/atoms/cache"; import { Select, Uploader } from "~/components/Utils"; import { getSupplemental } from "~/lib"; @@ -11,9 +17,7 @@ import { getSupplemental } from "~/lib"; export default function Debugger() { const config = useAtomValue(configAtom); const repertoire = useAtomValue(repertoireAtom); - const characters = Object.entries(repertoire) - .filter(([, v]) => v.tygf > 0) - .map(([x]) => x); + const characters = useAtomValue(charactersAtom); const code = useAtomValue(encodeResultAtom) ?? []; const referenceAtom = useMemo( () => diff --git a/src/components/Optimizer.tsx b/src/components/Optimizer.tsx index 4d71571..e91c853 100644 --- a/src/components/Optimizer.tsx +++ b/src/components/Optimizer.tsx @@ -17,6 +17,7 @@ import { dictionaryAtom, DictEntry, makeEncodeCallback, + charactersAtom, } from "~/atoms"; import { assemble, @@ -86,9 +87,7 @@ export default function Optimizer() { const [analysisResult, setAnalysisResult] = useAtom(analysisResultAtom); const [assemblyResult, setAssemblyResult] = useAtom(assemblyResultAtom); const repertoire = useAtomValue(repertoireAtom); - const characters = Object.entries(repertoire) - .filter(([_, v]) => v.tygf > 0) - .map(([x]) => x); + const characters = useAtomValue(charactersAtom); const customElements = useAtomValue(customElementsAtom); const [out1, setOut1] = useState(""); const [result, setResult] = useState<[Date, string][]>([]); diff --git a/src/components/PrimitiveDuplicationAnalyzer.tsx b/src/components/PrimitiveDuplicationAnalyzer.tsx deleted file mode 100644 index 1fb38cc..0000000 --- a/src/components/PrimitiveDuplicationAnalyzer.tsx +++ /dev/null @@ -1,92 +0,0 @@ -import { - ProForm, - ProFormDependency, - ProFormDigit, - ProFormGroup, - ProFormSelect, - ProFormSwitch, -} from "@ant-design/pro-components"; -import { Form, Space, Typography } from "antd"; -import { useAtomValue } from "jotai"; -import { Frequency, maxLengthAtom } from "~/atoms"; -import { - AnalyzerForm, - AssemblyResult, - defaultAnalyzer, - summarize, -} from "~/lib"; -import { Select } from "./Utils"; -import { Combined } from "./SequenceTable"; - -export default function PrimitiveDuplicationAnalyzer({ - selections, - setAnalyzer, -}: { - selections: number; - setAnalyzer: (d: AnalyzerForm) => void; -}) { - const maxLength = useAtomValue(maxLengthAtom); - const [form] = Form.useForm(); - - return ( - <> - 原始重码分析 - - form={form} - layout="horizontal" - submitter={false} - initialValues={defaultAnalyzer} - onValuesChange={(_, values) => setAnalyzer(values)} - > - - {selections} - - - - - ({ - label: d === 0 ? "全部" : `前 ${d} 码`, - value: d, - }))} - allowClear={false} - /> - - {({ top }) => ( - - - ({ + label: specifierNames[x], + value: x, + }))} + value={characterSet} + onChange={setCharacterSet} + /> + + + 字集是系统所处理的字符集合,您可以选择通用、基本或扩展三者之一。字集越大,您的方案能输入的字符就越多,但是在拆分时要考虑的字形种类也就更多。建议您从通用字集开始,根据实际需要逐步扩展。 + +
    +
  • 通用字集(8105 个字符)即《通用规范汉字表》中的所有字符;
  • +
  • + 基本字集(21265 + 个字符)是在通用字集的基础上增加了所有中日韩统一表意文字基本区的字符; +
  • +
  • + 扩展字集(27780 + 个字符)是在基本字集的基础上增加了所有中日韩统一表意文字扩展区 A + 的字符; +
  • +
资料 diff --git a/src/pages/[id]/statistics.tsx b/src/pages/[id]/statistics.tsx new file mode 100644 index 0000000..3501b5e --- /dev/null +++ b/src/pages/[id]/statistics.tsx @@ -0,0 +1,202 @@ +import { Flex, Table } from "antd"; +import { + alphabetAtom, + displayAtom, + frequencyAtom, + useChaifenTitle, +} from "~/atoms"; +import { + ProForm, + ProFormDependency, + ProFormDigit, + ProFormGroup, + ProFormSelect, +} from "@ant-design/pro-components"; +import { Form, Space, Typography } from "antd"; +import { useAtomValue } from "jotai"; +import { Frequency, maxLengthAtom } from "~/atoms"; +import { AnalyzerForm, AssemblyResult, renderIndexed } from "~/lib"; +import { Select } from "~/components/Utils"; +import { useState } from "react"; +import { assemblyResultAtom } from "~/atoms/cache"; +import { ColumnsType } from "antd/es/table"; +import { range, sum, sumBy } from "lodash-es"; + +const numbers = [ + "零", + "一", + "二", + "三", + "四", + "五", + "六", + "七", + "八", + "九", + "十", +]; +const render = (value: number) => numbers[value] || value.toString(); + +const analyzePrimitiveDuplication = ( + analyzer: AnalyzerForm, + frequency: Frequency, + result: AssemblyResult, + display: (d: string) => string, + maxLength: number, +) => { + const reverseMap = new Map(); + let relevant = result; + if (analyzer.type === "single") + relevant = relevant.filter((x) => [...x.name].length === 1); + if (analyzer.type === "multi") + relevant = relevant.filter((x) => [...x.name].length > 1); + if (analyzer.top > 0) { + relevant.sort((a, b) => { + return (frequency[b.name] ?? 0) - (frequency[a.name] ?? 0); + }); + relevant = relevant.slice(0, analyzer.top); + } + for (const assembly of relevant) { + const { name, sequence: elements } = assembly; + const sliced = range(maxLength).map((i) => + analyzer.position.includes(i) ? elements[i] : "*", + ); + const summary = `(${sliced.map((x) => renderIndexed(x, display)).join(", ")})`; + reverseMap.set(summary, (reverseMap.get(summary) || []).concat(name)); + } + + return reverseMap; +}; + +interface Density { + name: string; + items: string[]; +} + +const SubStatistics = ({ init }: { init: AnalyzerForm }) => { + const maxLength = useAtomValue(maxLengthAtom); + const [analyzer, setAnalyzer] = useState(init); + const [form] = Form.useForm(); + const assemblyResult = useAtomValue(assemblyResultAtom) ?? []; + const frequency = useAtomValue(frequencyAtom); + const display = useAtomValue(displayAtom); + const reverseMap = analyzePrimitiveDuplication( + analyzer, + frequency, + assemblyResult, + display, + maxLength, + ); + const alphabet = useAtomValue(alphabetAtom); + const dataSource = [...reverseMap] + .sort((a, b) => b[1].length - a[1].length) + .map(([name, items]) => ({ name, items })); + const lengths = dataSource.map((x) => x.items.length); + const columns: ColumnsType = [ + { + title: "元素序列", + dataIndex: "name", + key: "name", + width: 192, + }, + { + title: "数量", + dataIndex: "items", + key: "density", + render: (items) => items.length, + width: 64, + }, + { + title: "对象", + dataIndex: "items", + key: "items", + render: (items) => items.join("、"), + }, + ]; + + const order = render(analyzer.position.length); + const coorder = render(maxLength - analyzer.position.length); + const space = Math.pow(alphabet.length, maxLength - analyzer.position.length); + const estimation = sumBy(lengths, (x) => + analyzer.position.length === maxLength ? x - 1 : (x * x) / 2 / space, + ); + return ( + <> + + {order}元分布({coorder}阶重码估计:{Math.round(estimation)}) + + + form={form} + layout="horizontal" + submitter={false} + initialValues={analyzer} + onValuesChange={(_, values) => setAnalyzer(values)} + > + + ({ + label: `第 ${d + 1} 码`, + value: d, + }))} + allowClear={false} + /> + + + {({ top }) => ( + + +