-
Notifications
You must be signed in to change notification settings - Fork 0
/
scraper_test.go
89 lines (76 loc) · 2.96 KB
/
scraper_test.go
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
package main
import (
"os"
"reflect"
"testing"
)
func TestCleaner(t *testing.T) {
tests := []struct {
input string
expected string
}{
{"This is a [edit] test", "This is a test"}, //[edit] is removed
{"[edit] [edit] [edit]", " "}, // multiple [edit] are removed
{"No [edit] here", "No here"}, // [edit] in the middle of the text
{"No [Edit] here", "No [Edit] here"}, // Case-sensitive test
{"This is a [ed]it test", "This is a [ed]it test"}, // [ed]it is not removed
}
for _, test := range tests {
result := cleaner(test.input)
if result != test.expected {
t.Errorf("Input: %q, Expected: %q, Got: %q", test.input, test.expected, result)
}
}
}
// did not build a test case on scrapePage
// some example of setting up test serves avaialble at
// Saha, Amit. 2022. Practical Go: Building Scalable Network & Non-network Applications. New York: Wiley.
// instead i tested on live wiki pages and ensured the results returned were expected
func TestReadUrl(t *testing.T) {
// create sample import file
urlsContent := `https://en.wikipedia.org/wiki/Robotics
https://en.wikipedia.org/wiki/Robot
https://en.wikipedia.org/wiki/Reinforcement_learning
https://en.wikipedia.org/wiki/Robot_Operating_System
https://en.wikipedia.org/wiki/Intelligent_agent
https://en.wikipedia.org/wiki/Software_agent
https://en.wikipedia.org/wiki/Robotic_process_automation
https://en.wikipedia.org/wiki/Chatbot
https://en.wikipedia.org/wiki/Applications_of_artificial_intelligence
https://en.wikipedia.org/wiki/Android_(robot)`
//write the temp file
err := os.WriteFile("test_urls.txt", []byte(urlsContent), os.ModePerm)
if err != nil {
t.Fatalf("Error creating temporary file: %v", err)
}
defer os.Remove("test_urls.txt") // remove the test file
// call our function to read test_urls
urls, err := readUrl("test_urls.txt")
if err != nil {
t.Fatalf("Error reading URLs from file: %v", err)
}
// Define the expected URLs
expectedURLs := []string{
"https://en.wikipedia.org/wiki/Robotics",
"https://en.wikipedia.org/wiki/Robot",
"https://en.wikipedia.org/wiki/Reinforcement_learning",
"https://en.wikipedia.org/wiki/Robot_Operating_System",
"https://en.wikipedia.org/wiki/Intelligent_agent",
"https://en.wikipedia.org/wiki/Software_agent",
"https://en.wikipedia.org/wiki/Robotic_process_automation",
"https://en.wikipedia.org/wiki/Chatbot",
"https://en.wikipedia.org/wiki/Applications_of_artificial_intelligence",
"https://en.wikipedia.org/wiki/Android_(robot)",
}
// Check if the result matches the expected URLs
if !reflect.DeepEqual(urls, expectedURLs) {
t.Errorf("Unexpected URLs. Expected: %+v, Got: %+v", expectedURLs, urls)
}
}
func BenchmarkScrapePage(b *testing.B) {
dataMap := make(map[string]ScrapedData)
url := "https://en.wikipedia.org/wiki/Robotics"
for i := 0; i < b.N; i++ {
scrapePage(url, dataMap)
}
}