-
Notifications
You must be signed in to change notification settings - Fork 0
/
ex_15_8_twitterspider2rships.py
109 lines (91 loc) · 3.79 KB
/
ex_15_8_twitterspider2rships.py
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
import urllib.request, urllib.parse, urllib.error
import twurl # remember to put the hidden.py file in the same folder
import json
import sqlite3
import ssl
TWITTER_URL = 'https://api.twitter.com/1.1/friends/list.json'
conn = sqlite3.connect('friends.sqlite') #different DB file
cur = conn.cursor()
'''Create tables with primary keys and constraints.'''
cur.execute('''CREATE TABLE IF NOT EXISTS People
(id INTEGER PRIMARY KEY, name TEXT UNIQUE, retrieved INTEGER)''')
cur.execute('''CREATE TABLE IF NOT EXISTS Follows
(from_id INTEGER, to_id INTEGER, UNIQUE(from_id, to_id))''')
# Ignore SSL certificate errors
ctx = ssl.create_default_context()
ctx.check_hostname = False
ctx.verify_mode = ssl.CERT_NONE
'''When we have a logical key for a person (i.e., account name)
and we need the id value for the person, depending on whether or
not the person is already in the People table we either need to:
(1) look up the person in the People table and retrieve the id value for the person
or (2) add the person to the People table and get the id value for the newly added row.
'''
while True:
acct = input('Enter a Twitter account, or quit: ')
if (acct == 'quit'): break
if (len(acct) < 1):
cur.execute('SELECT id, name FROM People WHERE retrieved=0 LIMIT 1')
try:
(id, acct) = cur.fetchone()
except:
print('No unretrieved Twitter accounts found')
continue
else:
cur.execute('SELECT id FROM People WHERE name = ? LIMIT 1', (acct, ))
try:
id = cur.fetchone()[0]
except:
cur.execute('''INSERT OR IGNORE INTO People (name, retrieved) VALUES (?, 0)''', (acct, ))
conn.commit() # so that the next searches can find it
if cur.rowcount != 1:
print('Error inserting account:', acct)
continue
id = cur.lastrowid # this is the primary key SQL assigned
url = twurl.augment(TWITTER_URL, {'screen_name': acct, 'count': '100'})
print('Retrieving account', acct)
try:
connection = urllib.request.urlopen(url, context=ctx)
except Exception as err:
print('Failed to Retrieve', err)
break
data = connection.read().decode()
headers = dict(connection.getheaders())
print('Remaining', headers['x-rate-limit-remaining'])
try: # this will fail when there is a problem with the json string
js = json.loads(data)
except:
print('Unable to parse json')
print(data)
break
# Debugging
# print(json.dumps(js, indent=4))
# this gode will fail when json is good but there are no user keys
if 'users' not in js: # dumping the data out to check for errors
print('Incorrect JSON received')
print(json.dumps(js, indent=4))
continue
cur.execute('UPDATE People SET retrieved=1 WHERE name = ?', (acct, ))
countnew = 0
countold = 0
for u in js['users']:
friend = u['screen_name']
print(friend)
cur.execute('SELECT id FROM People WHERE name = ? LIMIT 1',(friend, ))
try:
friend_id = cur.fetchone()[0]
countold = countold + 1
except:
cur.execute('''INSERT OR IGNORE INTO People (name, retrieved) VALUES (?, 0)''', (friend, ))
conn.commit()
if cur.rowcount != 1:
print('Error inserting account:', friend)
continue
friend_id = cur.lastrowid
countnew = countnew + 1
'''Insert the row that captures the “follows” relationship.'''
cur.execute('''INSERT OR IGNORE INTO Follows (from_id, to_id)VALUES (?, ?)''', (id, friend_id))
print('New accounts=', countnew, ' revisited=', countold)
print('Remaining', headers['x-rate-limit-remaining'])
conn.commit()
cur.close()