Skip to content

Commit

Permalink
refactor: 更换分词器
Browse files Browse the repository at this point in the history
fix: 修复不支持交叉编译错误
  • Loading branch information
Clov614 committed Aug 30, 2024
1 parent 962e669 commit be7b720
Show file tree
Hide file tree
Showing 4 changed files with 94 additions and 8 deletions.
3 changes: 2 additions & 1 deletion go.mod
Original file line number Diff line number Diff line change
Expand Up @@ -8,10 +8,10 @@ require (
github.com/Clov614/go-ai-sdk v0.3.3
github.com/eatmoreapple/openwechat v1.4.7
github.com/gin-gonic/gin v1.10.0
github.com/go-ego/gse v0.80.3
github.com/google/uuid v1.6.0
github.com/rs/zerolog v1.33.0
github.com/skip2/go-qrcode v0.0.0-20200617195104-da1b6568686e
github.com/yanyiwu/gojieba v1.4.2
github.com/zwgblue/yaml-encoder v0.0.0-20221226083717-a0bdbda0d998
go.etcd.io/bbolt v1.3.11
gopkg.in/yaml.v3 v3.0.1
Expand Down Expand Up @@ -40,6 +40,7 @@ require (
github.com/pelletier/go-toml/v2 v2.2.2 // indirect
github.com/twitchyliquid64/golang-asm v0.15.1 // indirect
github.com/ugorji/go/codec v1.2.12 // indirect
github.com/vcaesar/cedar v0.20.2 // indirect
golang.org/x/arch v0.8.0 // indirect
golang.org/x/crypto v0.25.0 // indirect
golang.org/x/net v0.27.0 // indirect
Expand Down
8 changes: 6 additions & 2 deletions go.sum
Original file line number Diff line number Diff line change
Expand Up @@ -20,6 +20,8 @@ github.com/gin-contrib/sse v0.1.0 h1:Y/yl/+YNO8GZSjAhjMsSuLt29uWRFHdHYUb5lYOV9qE
github.com/gin-contrib/sse v0.1.0/go.mod h1:RHrZQHXnP2xjPF+u1gW/2HnVO7nvIa9PG3Gm+fLHvGI=
github.com/gin-gonic/gin v1.10.0 h1:nTuyha1TYqgedzytsKYqna+DfLos46nTv2ygFy86HFU=
github.com/gin-gonic/gin v1.10.0/go.mod h1:4PMNQiOhvDRa013RKVbsiNwoyezlm2rm0uX/T7kzp5Y=
github.com/go-ego/gse v0.80.3 h1:YNFkjMhlhQnUeuoFcUEd1ivh6SOB764rT8GDsEbDiEg=
github.com/go-ego/gse v0.80.3/go.mod h1:Gt3A9Ry1Eso2Kza4MRaiZ7f2DTAvActmETY46Lxg0gU=
github.com/go-playground/assert/v2 v2.2.0 h1:JvknZsQTYeFEAhQwI4qEt9cyV5ONwRHC+lYKSsYSR8s=
github.com/go-playground/assert/v2 v2.2.0/go.mod h1:VDjEfimB/XKnb+ZQfWdccd7VUvScMdVu0Titje2rxJ4=
github.com/go-playground/locales v0.14.1 h1:EWaQ/wswjilfKLTECiXz7Rh+3BjFhfDFKv/oXslEjJA=
Expand Down Expand Up @@ -81,8 +83,10 @@ github.com/twitchyliquid64/golang-asm v0.15.1 h1:SU5vSMR7hnwNxj24w34ZyCi/FmDZTkS
github.com/twitchyliquid64/golang-asm v0.15.1/go.mod h1:a1lVb/DtPvCB8fslRZhAngC2+aY1QWCk3Cedj/Gdt08=
github.com/ugorji/go/codec v1.2.12 h1:9LC83zGrHhuUA9l16C9AHXAqEV/2wBQ4nkvumAE65EE=
github.com/ugorji/go/codec v1.2.12/go.mod h1:UNopzCgEMSXjBc6AOMqYvWC1ktqTAfzJZUZgYf6w6lg=
github.com/yanyiwu/gojieba v1.4.2 h1:oEuiE/nEmMPdyBEAHMN7IdALNryTnyHd3VlBU0okyiQ=
github.com/yanyiwu/gojieba v1.4.2/go.mod h1:54wkP7sMJ6bklf7yPl6F+JG71dzVUU1WigZbR47nGdY=
github.com/vcaesar/cedar v0.20.2 h1:TDx7AdZhilKcfE1WvdToTJf5VrC/FXcUOW+KY1upLZ4=
github.com/vcaesar/cedar v0.20.2/go.mod h1:lyuGvALuZZDPNXwpzv/9LyxW+8Y6faN7zauFezNsnik=
github.com/vcaesar/tt v0.20.1 h1:D/jUeeVCNbq3ad8M7hhtB3J9x5RZ6I1n1eZ0BJp7M+4=
github.com/vcaesar/tt v0.20.1/go.mod h1:cH2+AwGAJm19Wa6xvEa+0r+sXDJBT0QgNQey6mwqLeU=
github.com/zwgblue/yaml-encoder v0.0.0-20221226083717-a0bdbda0d998 h1:nfgqxY/ewt2bYcoPiND18j/uKPn4cbiQa9WyD+HIPKM=
github.com/zwgblue/yaml-encoder v0.0.0-20221226083717-a0bdbda0d998/go.mod h1:gDS9Ro20YdMC2SY41VMVcy6PqyVCseFPIX1+symaFww=
go.etcd.io/bbolt v1.3.11 h1:yGEzV1wPz2yVCLsD8ZAiGHhHVlczyC9d1rP43/VCRJ0=
Expand Down
26 changes: 21 additions & 5 deletions rikkabot/plugins/ai/filter.go
Original file line number Diff line number Diff line change
Expand Up @@ -6,19 +6,21 @@ package ai

import (
"fmt"
"github.com/yanyiwu/gojieba"
"github.com/go-ego/gse"
)

var x *gojieba.Jieba = gojieba.NewJieba()
var (
seg gse.Segmenter
)

var DefaultFilter *Filter

type Filter struct {
x *gojieba.Jieba // 分词器
seg gse.Segmenter // 分词器
}

func (f *Filter) isLegal(word string) bool {
cutWords := f.x.CutAll(word)
cutWords := f.seg.CutAll(word)
for _, w := range cutWords {
if _, exist := sensitiveWordsMap[w]; exist {
return false
Expand All @@ -42,7 +44,21 @@ func (f *Filter) filter(input string, handle func(content string) (string, error
}

func init() {
// 加载默认词典
_ = seg.LoadDict()
// 加载默认 embed 词典
// seg.LoadDictEmbed()
//
// 加载简体中文词典
_ = seg.LoadDict("zh_s")
_ = seg.LoadDictEmbed("zh_s")
//
// 加载繁体中文词典
_ = seg.LoadDict("zh_t")
//
// 加载日文词典
// seg.LoadDict("jp")
DefaultFilter = &Filter{
x: x,
seg: seg,
}
}
65 changes: 65 additions & 0 deletions rikkabot/plugins/ai/filter_test.go
Original file line number Diff line number Diff line change
@@ -0,0 +1,65 @@
// Package ai
// @Author Clover
// @Data 2024/8/30 下午9:39:00
// @Desc
package ai

import (
"github.com/go-ego/gse"
"testing"
)

func TestFilter_filter(t *testing.T) {
type fields struct {
seg gse.Segmenter
}
type args struct {
input string
handle func(content string) (string, error)
}
tests := []struct {
name string
fields fields
args args
wantRes string
wantErr bool
}{
{
name: "test1",
fields: fields{
seg: seg,
},
args: args{
input: "共产党",
handle: func(content string) (string, error) { return content, nil },
},
wantRes: "filtered",
},
{
name: "test2",
fields: fields{
seg: seg,
},
args: args{
input: "rikka",
handle: func(content string) (string, error) { return content, nil },
},
wantRes: "rikka",
},
}
for _, tt := range tests {
t.Run(tt.name, func(t *testing.T) {
f := &Filter{
seg: tt.fields.seg,
}
gotRes, err := f.filter(tt.args.input, tt.args.handle)
if (err != nil) != tt.wantErr {
t.Errorf("filter() error = %v, wantErr %v", err, tt.wantErr)
return
}
if gotRes != tt.wantRes {
t.Errorf("filter() gotRes = %v, want %v", gotRes, tt.wantRes)
}
})
}
}

0 comments on commit be7b720

Please sign in to comment.