-
Notifications
You must be signed in to change notification settings - Fork 5
/
fetch_test.go
116 lines (104 loc) · 2.58 KB
/
fetch_test.go
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
package crawler
import (
"fmt"
"io/ioutil"
"net/http"
"net/http/httptest"
"net/url"
"strings"
"testing"
"github.com/stretchr/testify/assert"
)
func TestFetchParse(t *testing.T) {
const page = `
<html>
<head>
<meta http-equiv="content-type" content="text/html;charset=GBK">
<meta charset="GBK">
<meta http-equiv="refresh" content="30; URL=1.html">
</head>
<body>
</body>
</html>
`
checkErr := func(err error) {
if err != nil {
t.Log(err)
t.FailNow()
}
}
ts := httptest.NewServer(http.HandlerFunc(func(w http.ResponseWriter, r *http.Request) {
w.Header().Set("Content-Type", "text/html")
w.Header().Set("Content-Location", "index.html")
fmt.Fprint(w, page)
}))
defer ts.Close()
url := ts.URL + "/hello/"
re, err := http.NewRequest("GET", url, nil)
checkErr(err)
req := &Request{
Request: re,
}
checkErr(err)
resp, err := DefaultClient.Do(req)
checkErr(err)
assert := assert.New(t)
resp.scanLocation()
resp.detectContentType()
preview, err := resp.preview(1024)
assert.Nil(err)
resp.scanHTMLMeta(preview)
assert.Equal(url, resp.NewURL.String())
assert.Equal(`text/html; charset=gbk`, resp.ContentType)
assert.Equal("gbk", resp.Charset)
assert.Equal(url+"1.html", resp.Refresh.URL.String())
assert.Equal(url+"index.html", resp.ContentLocation.String())
assert.Equal(30, resp.Refresh.Seconds)
}
func TestConvToUTF8(t *testing.T) {
rs := []*Response{
{
Body: strings.NewReader("<html>你好,世界</html>"),
ContentType: "text/html",
}, {
Body: strings.NewReader("<html>你好,世界</html>"),
ContentType: "text/html; charset=utf-8",
}, {
Body: strings.NewReader("<html><body></body></html>"),
ContentType: "text/html; charset=gbk",
},
}
exp := []struct {
Charset string
CertainCharset bool
Content []byte
}{
{
Content: []byte("<html>你好,世界</html>"),
Charset: "utf-8",
CertainCharset: false,
}, {
Content: []byte("<html>你好,世界</html>"),
Charset: "utf-8",
CertainCharset: true,
}, {
Content: []byte("<html><body></body></html>"),
Charset: "gbk",
CertainCharset: true,
},
}
assert := assert.New(t)
for i, r := range rs {
u, _ := url.Parse(fmt.Sprintf("/hello/%d", i))
r.URL = u
r.NewURL = u
preview, err := r.preview(1024)
assert.NoError(err)
r.convToUTF8(preview, func(_ *url.URL) string { return "utf-8" })
assert.Equal(exp[i].Charset, r.Charset)
assert.Equal(exp[i].CertainCharset, r.CertainCharset)
b, err := ioutil.ReadAll(r.Body)
assert.NoError(err)
assert.Equal(exp[i].Content, b)
}
}