-
Notifications
You must be signed in to change notification settings - Fork 48
/
sitediff.example.yaml
81 lines (71 loc) · 2.48 KB
/
sitediff.example.yaml
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
# Include other configuration files, merging them with this file.
includes:
- extra-rules.yaml
# Settings.
#
# If you use "sitediff init" with the right parameters, it will generate
# this section for you.
settings:
# Crawl 2 levels deep.
depth: 2
# Wait for 250ms between requests.
interval: 250
# Make only 1 request at a time - no simultaneous requests.
# Concurrency has to be one when an interval is set.
concurrency: 1
# Don't follow links to PDF files.
exclude: '.*\.pdf'
# Curl options, if any.
curl_opts:
max_recv_speed_large: 10000
# Rules under this element apply only to the 'before' site.
before:
# URL of the 'before' version of the site.
url: http://localhost/old
# Sanitizations and DOM transformations, just like the general ones
# demonstrated above, but applied only to the 'before' site.
dom_transform:
- title: Example
type: remove
selector: div.updates-required
# Rules under this element apply only to the 'after' site.
after:
# URL of the 'after' version of the site.
url: http://localhost/new
# The root element to compare.
#
# Usually, sitediff compares the HTML of the entire page. If you'd rather
# check just a subset of the page, specify a selector here. For example, the
# line below causes only the body to be compared, ignoring the HTML head.
selector: 'body'
# General regular expression rules, applied to both versions of the site.
sanitization:
# Normalize input tags containg random tokens.
- title: Remove form-build-id
pattern: '<input type="hidden" name="form_build_id" value="form-[a-zA-Z0-9_-]+" *\/?>'
substitute: '<input type="hidden" name="form_build_id" value="__form_build_id__">'
# Replace meta property="twitter:*" with meta name="twitter:*".
- title: Meta 'property' changed to 'name'
pattern: 'property="twitter:'
substitute: 'name="twitter:'
# 'selector' limits this rule to only within the selected elements.
selector: meta
# 'path' limits this rule to only certain pages.
path: /user
# General DOM transforms, applied to both versions of the site.
dom_transform:
# Remove article elements, replacing them with their content
- title: Unwrap article elements
type: unwrap
selector: article
# Remove classes from divs
- title: Remove classes bar and baz from divs
type: remove_class
selector: div
class:
- class-bar
- class-baz
# Remove a div ID.
- title: Remove block containing current time.
type: remove
selector: div#block-time