-
Notifications
You must be signed in to change notification settings - Fork 0
/
model.go
61 lines (56 loc) · 3.28 KB
/
model.go
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
package gocdx
import "time"
// Record represents a single record in a CDX file
type Record struct {
// Default fields (CDX N b a m s k r M S V g)
MassagedURL string `json:"massaged_url"` // N
Timestamp time.Time `json:"timestamp"` // b
OriginalURL string `json:"original_url"` // a
MIMEType string `json:"mime_type"` // m
StatusCode int `json:"status_code"` // s
NewStyleChecksum string `json:"new_style_checksum"` // k
Redirect string `json:"redirect"` // r
MetaTags string `json:"meta_tags"` // M
CompressedRecordSize int64 `json:"compressed_record_size"` // S
CompressedArcOffset int64 `json:"compressed_arc_offset"` // V
Filename string `json:"filename"` // g
// Optional fields
CanonizedURL string `json:"canonized_url,omitempty"`
NewsGroup string `json:"news_group,omitempty"`
RulespaceCategory string `json:"rulespace_category,omitempty"`
CompressedDatOffset int64 `json:"compressed_dat_offset,omitempty"`
CanonizedFrame string `json:"canonized_frame,omitempty"`
LanguageDescription string `json:"language_description,omitempty"`
CanonizedHost string `json:"canonized_host,omitempty"`
CanonizedImage string `json:"canonized_image,omitempty"`
CanonizedJumpPoint string `json:"canonized_jump_point,omitempty"`
FBISChangedThing string `json:"fbis_changed_thing,omitempty"`
CanonizedLink string `json:"canonized_link,omitempty"`
CanonizedPath string `json:"canonized_path,omitempty"`
LanguageString string `json:"language_string,omitempty"`
CanonizedRedirect string `json:"canonized_redirect,omitempty"`
Uniqueness string `json:"uniqueness,omitempty"`
CanonizedURLOtherHref string `json:"canonized_url_other_href,omitempty"`
CanonizedURLOtherSrc string `json:"canonized_url_other_src,omitempty"`
CanonizedURLScript string `json:"canonized_url_script,omitempty"`
OldStyleChecksum string `json:"old_style_checksum,omitempty"`
UncompressedDatOffset int64 `json:"uncompressed_dat_offset,omitempty"`
IP string `json:"ip,omitempty"`
Frame string `json:"frame,omitempty"`
OriginalHost string `json:"original_host,omitempty"`
Image string `json:"image,omitempty"`
OriginalJumpPoint string `json:"original_jump_point,omitempty"`
Link string `json:"link,omitempty"`
ArcDocumentLength int64 `json:"arc_document_length,omitempty"`
Port int `json:"port,omitempty"`
OriginalPath string `json:"original_path,omitempty"`
Title string `json:"title,omitempty"`
UncompressedArcOffset int64 `json:"uncompressed_arc_offset,omitempty"`
URLOtherHref string `json:"url_other_href,omitempty"`
URLOtherSrc string `json:"url_other_src,omitempty"`
URLScript string `json:"url_script,omitempty"`
}
// FieldIndex represents the indices of fields in the CDX file
type FieldIndex map[byte]int
// DefaultFields represents the default CDX fields in order
var DefaultFields = []byte{'N', 'b', 'a', 'm', 's', 'k', 'r', 'M', 'S', 'V', 'g'}