-
Notifications
You must be signed in to change notification settings - Fork 11
/
parse_settings.py
159 lines (137 loc) · 4.28 KB
/
parse_settings.py
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
"""
Module to get setting details between two dates.
To perform a scraper run, use: python parse_settings.py afterdate beforedate
(dates in format (m)m-(d)d-yyyy)
"""
import datetime as dt
import logging
import sys
from typing import Any, Dict, List, Optional
import os
import click
import simplejson as json
import scrapers
logger = logging.getLogger()
logging.basicConfig(stream=sys.stdout)
def get_days_between_dates(afterdate: str, beforedate: str):
"Return a list of individual days between two dates"
# get days between as int
n_days = (beforedate - afterdate).days
# return each individual day, including the last one
return [(afterdate + dt.timedelta(days=i)) for i in range(n_days + 1)]
def parse_settings_on_cloud(
afterdate: str,
beforedate: str,
write_to_sheets=True,
scraper: Optional[scrapers.FakeScraper] = None,
):
"""
Same as `parse_settings()` (see below) but without command line interface and showbrowser option.
Outputs scraped results to a gsheet:Settings_scheduler if `write_to_sheets` is True
"""
if scraper is None:
scraper = scrapers.TravisScraper()
logger.info(f"Parsing settings between {afterdate} and {beforedate}.")
days_to_pull = get_days_between_dates(afterdate=afterdate, beforedate=beforedate)
pulled_settings = scraper.make_setting_list(days_to_pull)
if scraper is None:
scraper = scrapers.TravisScraper(headless=True)
if isinstance(scraper, scrapers.TravisScraper):
import persist
for setting in pulled_settings:
persist.rest_setting(setting)
# maybe make this cleaner in sql? future work
if write_to_sheets:
import gsheet
gsheet.write_pulled_settings(pulled_settings)
def parse_settings(
afterdate: str,
beforedate: str,
outfile: str,
showbrowser=False,
county: str = "travis",
):
"""Gets data for all settings between `afterdate` and `beforedate` and sends results to PostgreSQL database."""
scraper = scrapers.SCRAPER_NAMES[county](headless=not showbrowser)
days_to_pull = get_days_between_dates(afterdate=afterdate, beforedate=beforedate)
pulled_settings = scraper.make_setting_list(days_to_pull)
return pulled_settings
def _parse_and_persist_settings(
afterdate: dt.date,
beforedate: dt.date,
outfile: str = "",
showbrowser: bool = False,
db: bool = True,
gs: bool = True,
county: str = "travis",
):
pulled_settings = parse_settings(
afterdate, beforedate, outfile, showbrowser, county=county
)
if db:
import persist
for setting in pulled_settings:
persist.rest_setting(setting)
# write to google sheets if credentials exist
if os.getenv("GOOGLE_SHEETS_CREDS_JSON") is None:
gs = False
if gs:
print(gs)
import gsheet
gsheet.write_pulled_settings(pulled_settings)
if outfile:
json.dump(pulled_settings, outfile, default=dict)
return pulled_settings
@click.command()
@click.argument(
"afterdate",
type=click.DateTime(formats=["%Y-%m-%d", "%m-%d-%Y", "%m/%d/%Y"]),
nargs=1,
)
@click.argument(
"beforedate",
type=click.DateTime(formats=["%Y-%m-%d", "%m-%d-%Y", "%m/%d/%Y"]),
nargs=1,
)
@click.option("--outfile", type=click.File(mode="w"), required=False)
@click.option(
"--showbrowser / --headless",
default=False,
help="whether to operate in headless mode or not",
)
@click.option(
"--db / --no-db",
default=True,
help="whether to persist data to database",
)
@click.option(
"--gs / --no-gs",
default=True,
help="whether to persist data to Google Sheets",
)
@click.option(
"--county",
type=click.Choice(scrapers.SCRAPER_NAMES, case_sensitive=False),
default="travis",
)
def parse_and_persist_settings(
afterdate: dt.date,
beforedate: dt.date,
outfile: str = "",
showbrowser: bool = False,
db: bool = True,
gs: bool = True,
county: str = "travis",
):
"""Pass same values to an alias function that isn't a Click command."""
return _parse_and_persist_settings(
afterdate=afterdate,
beforedate=beforedate,
outfile=outfile,
showbrowser=showbrowser,
db=db,
gs=gs,
county=county,
)
if __name__ == "__main__":
parse_and_persist_settings()