-
Notifications
You must be signed in to change notification settings - Fork 0
/
generate.py
68 lines (63 loc) · 1.93 KB
/
generate.py
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
import json
content = ""
dialog = ["", ""]
history = []
histories = []
r = []
with open("origin.txt", "r", encoding="utf-8") as f:
for i in f.readlines():
i = i.rstrip()
if i.startswith("[-你-]:"):
# AI 已经说完话了,所以在这里把 AI 的话放进 dialog
dialog[1] = content
# 然后存进 history
if len(dialog[0]) > 0:
history.append(dialog)
dialog = ["", ""]
# 是时候处理你说的话了
i = i.removeprefix("[-你-]:")
content = i
elif i.startswith("[-AI-]:"):
# 你已经说完话了,所以在这里把你的话放进 dialog
dialog[0] = content
# 是时候处理 AI 说的话了
i = i.removeprefix("[-AI-]:")
content = i
elif i == "[-剧终-]":
# 那这时候肯定是 AI 说完话了
# 把 AI 的话放进 dialog
dialog[1] = content
# 然后存进 history
if len(dialog[0]) > 0:
history.append(dialog)
dialog = ["", ""]
# 放进 histories
# print(history)
histories.append(history)
history = []
elif i.startswith("[-注释-]:"):
continue
else:
content = content + "\n" + i
for i in histories:
s = {}
if len(i) == 0:
pass
elif len(i) == 1:
s["instruction"] = i[0][0]
s["input"] = ""
s["output"] = i[0][1]
s["system"] = ""
s["history"] = []
else:
s["instruction"] = i[len(i)-1][0]
s["input"] = ""
s["output"] = i[len(i)-1][1]
s["system"] = ""
s["history"] = []
for j in range(len(i)-1):
k = i[j]
s["history"].append(k.copy())
r.append(s)
with open("converted.json", "w", encoding="utf-8") as f:
f.write(json.dumps(r))