-
Notifications
You must be signed in to change notification settings - Fork 1
/
get-creator-answers-questions-for-arbitrary-time-period.rb
executable file
·148 lines (138 loc) · 4.75 KB
/
get-creator-answers-questions-for-arbitrary-time-period.rb
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
#!/usr/bin/env ruby
require 'json'
require 'rubygems'
require 'typhoeus'
require 'awesome_print'
require 'json'
require 'time'
require 'date'
require 'csv'
require 'logger'
logger = Logger.new(STDERR)
logger.level = Logger::DEBUG
def getKitsuneResponse(url, params, logger)
logger.debug url
logger.debug params
try_count = 0
begin
result = Typhoeus::Request.get(
url,
:params => params )
x = JSON.parse(result.body)
rescue JSON::ParserError => e
try_count += 1
if try_count < 4
$stderr.printf("JSON::ParserError exception, retry:%d\n",\
try_count)
sleep(10)
retry
else
$stderr.printf("JSON::ParserError exception, retrying FAILED\n")
x = nil
end
end
return x
end
if ARGV.length < 6
puts "usage: #{$0} yyyy mm dd end-yyyy mm id"
exit
end
# because of issue 3686, https://github.com/mozilla/kitsune/issues/3686,
# go back one day and forward one day
created_time = Time.gm(ARGV[0].to_i, ARGV[1].to_i, ARGV[2].to_i)
greater_than_time = (created_time - 3600 * 24).strftime("%Y-%-m-%-e")
less_than = Time.gm(ARGV[3].to_i, ARGV[4].to_i, ARGV[5].to_i)
end_time = Time.gm(ARGV[3].to_i, ARGV[4].to_i, ARGV[5].to_i, 23, 59, 59)
less_than_time = (less_than + 3600 * 24).strftime("%Y-%-m-%-e")
less_than_time_parsed = Time.parse(less_than_time + " 00:00:00 UTC")
logger.debug "created_time" + created_time.to_s
logger.debug "greater than time" + greater_than_time.to_s
logger.debug "less than" + less_than.to_s
logger.debug "less than time" + less_than_time.to_s
url_params = {
:format => "json",
:product => "firefox",
:created__gt => greater_than_time,
:created__lt => less_than_time,
:ordering => "+created",
}
url = "https://support.mozilla.org/api/2/question/"
end_program = false
question_number = 0
issue_3686_offset = 7 * 3600 # 7 hours off
csv = []
while !end_program
sleep(1.0) # sleep 1 second between API calls
questions = getKitsuneResponse(url, url_params, logger)
url = questions["next"]
if url.nil?
logger.debug "nil next url"
else
logger.debug "next url:" + url
end
url_params = nil
questions["results"].each do|q|
logger.ap q
#logger.debug "question:" + q.ai(options = {html: true})
updated = q["updated"]
logger.debug "created from API:" + q["created"] + "<-- this is PST not UTC despite the 'Z'"
# All times returned by the API are in PST not PDT and not UTC
# All URL parameters for time are also in PST not UTC
# See https://github.com/mozilla/kitsune/issues/3961 and
# https://github.com/mozilla/kitsune/issues/3946
# The above may change in the future if we migrate the Kitsune database to UTC
created = Time.parse(q["created"].gsub("Z", "PST"))
logger.debug "created with PST correction:" + created.to_s
if !updated.nil?
logger.debug "updated from API:" + updated + "<-- this is PST not UTC despite the 'Z'"
updated = Time.parse(q["updated"].gsub("Z", "PST"))
logger.debug "updated with PST correction:" + updated.to_s
end
id = q["id"]
logger.debug "QUESTION id:" + id.to_s
question_number += 1
logger.debug "QUESTION number:" + question_number.to_s
tags = q["tags"]
tag_str = ""
tags.each { |t| tag_str = tag_str + t["slug"] + ";" }
answers = q["answers"]
answers_str = ""
answers.each { |a| answers_str = answers_str + a.to_s + ";" }
creator = q["creator"]["username"]
logger.debug 'createdtop:' + created.to_i.to_s
logger.debug 'created_time:' + created_time.to_i.to_s
logger.debug 'less_than_time_parsed:' + less_than_time_parsed.to_i.to_s
logger.debug 'answers_str:' + answers_str
logger.debug 'creator:' + creator
if created.to_i >= created_time.to_i && created.to_i <= end_time.to_i
logger.debug "NOT skipping"
csv.push(
[
id, created.to_s, q["updated"].to_s, q["title"], q["content"].tr("\n"," "),
tag_str, q["product"], q["topic"], q["locale"],
answers_str, creator
])
else
logger.debug "SKIPPING"
end
if !url.nil?
logger.debug 'url:' + url
else
logger.debug 'url is nil'
end
logger.debug 'created:' + q["created"].to_i.to_s
logger.debug'created_time:' + created_time.to_i.to_s
if (created.to_i < created_time.to_i) || url.nil?
end_program = true
break
end
end
end
headers = ['id', 'created', 'updated', 'title', 'content', 'tags', 'product', 'topic',
'locale', 'answers', 'creator']
FILENAME = sprintf("%4.4d-%2.2d-%2.2d-%4.4d-%2.2d-%2.2d-firefox-creator-answers-desktop-all-locales.csv",
ARGV[0].to_i, ARGV[1].to_i, ARGV[2].to_i,
ARGV[3].to_i, ARGV[4].to_i, ARGV[5].to_i)
CSV.open(FILENAME, "w", write_headers: true, headers: headers) do |csv_object|
csv.each {|row_array| csv_object << row_array }
end