-
Notifications
You must be signed in to change notification settings - Fork 0
/
script.py
147 lines (112 loc) · 5 KB
/
script.py
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
# -*- coding: utf-8 -*-
"""
Script that scrapes rss feed and sends updates via console or email.
If desired it sends emails to entered recipients over the SimplifiedGmailApi.
"""
import json
import logging
import os
import sys
import feedparser
import requests
# Setup the Gmail API - set USE_GMAIL True if you want to use the Simplified Gmail API
USE_GMAIL = False
# Paths for important directories and files - from home directory (Cron Job Optimization)
HOME_DIR = os.path.expanduser('~')
# change this to the directory your script is: !!!!!!!!!!!!!!!!!
DIR_OF_SCRIPT = os.path.join(HOME_DIR,
os.path.join("Documents", "SelfhostedIliasRssReader"))
# GitHubBeta/SelfhostedIliasRssReader
# Paths to important files
PATH_FOR_LOG = os.path.join(DIR_OF_SCRIPT, "script.log")
PATH_OF_DATA_FILE = os.path.join(DIR_OF_SCRIPT, 'data.json')
PATH_OF_CREDENTIALS_FILE = os.path.join(DIR_OF_SCRIPT, 'credentials.json')
PATH_OF_CSS_FILE = os.path.join(DIR_OF_SCRIPT, 'main.css')
PATH_OF_HTML_CHECK_FILE = os.path.join(DIR_OF_SCRIPT, 'check.html')
logging.basicConfig(filename=PATH_FOR_LOG, level=logging.DEBUG)
# read from credentials personal URL and credentials for RSS feed
if os.path.isfile(PATH_OF_CREDENTIALS_FILE):
with open(PATH_OF_CREDENTIALS_FILE) as json_file:
DATA = json.load(json_file)
# get information from file
URL = DATA["url"]
USER_NAME = DATA["username"]
PASSWORD = DATA["password"]
# read recipients if email is activated
if USE_GMAIL:
RECIPIENTS = DATA["recipients"]
else:
# if nothing was found quit the script
print("ERROR: No credentials and url was found!")
sys.exit()
# Get current RSS content
CONTENT = requests.get(URL, auth=(USER_NAME, PASSWORD))
# Parse RSS content
PARSED_CONTENT = feedparser.parse(CONTENT.content)
# Check if the script ever run
if os.path.isfile(PATH_OF_DATA_FILE):
with open(PATH_OF_DATA_FILE) as json_file:
DATA = json.load(json_file)
# get the saved date from this file
LATEST_DATE = DATA['date']
print("Date of last update: " + DATA['date'])
print("Current date of newest entry: " +
PARSED_CONTENT['entries'][0]['published'])
# fast check if there are any new entries - quit if yes
if DATA['date'] == PARSED_CONTENT['entries'][0]['published']:
print("no new articles")
sys.exit()
else:
LATEST_DATE = "no latest date"
# Save the published date of the latest entry in a file
with open(PATH_OF_DATA_FILE, 'w') as outfile:
json.dump({"date": PARSED_CONTENT['entries'][0]['published']}, outfile)
WALKING_RSS_STRING = ""
WALKING_RSS_STRING_TITLES = ""
COUNT = 0
# Read all entries and convert them to html
for entry in PARSED_CONTENT['entries']:
# check if the current entry is the one that was read in the execution before
if LATEST_DATE == entry['published']:
print("Newest entry detected!!!")
break
WALKING_RSS_STRING_TITLES += entry['title'] + ", "
WALKING_RSS_STRING += '<div class="entry"><div class="size"><p class="title">' + \
entry['title'] + '</p><p class="date">' + \
entry['published'][:-6] + "</p></div>"
WALKING_RSS_STRING += '<p class="content">' + \
entry['summary_detail']['value'] + '</p>'
WALKING_RSS_STRING += '<a href="' + \
entry['link'] + \
'"><button class="link-button">Link to content</button></a></div>'
COUNT += 1
# if more than one new entry was found add a list of the entry topics
if COUNT > 1:
WALKING_RSS_STRING = '<p class="hide">Topics: ' + \
WALKING_RSS_STRING_TITLES[:-2] + ' >> </p>' + WALKING_RSS_STRING
print(WALKING_RSS_STRING)
# Send email with new RSS entries if desired
if USE_GMAIL:
# Gmail Imports (not important for the actual crawler)
from SendGmailSimplified.SendGmailSimplified import SimplifiedGmailApi
from premailer import transform
with open(PATH_OF_CSS_FILE, 'r') as cssfile:
CSS_DATA = cssfile.read().rstrip("\n")
EMAIL_TEXT = ('<!DOCTYPE html><html><head><style type="text/css">' + CSS_DATA +
"</style></head><body>" + WALKING_RSS_STRING + "</body></html>")
EMAIL_TEXT = transform(EMAIL_TEXT).replace("\n", "")
with open(PATH_OF_HTML_CHECK_FILE, 'w') as outfile:
outfile.write(EMAIL_TEXT)
DIR_OF_GMAIL_API_FILES = os.path.join(DIR_OF_SCRIPT,
os.path.join("SendGmailSimplified",
"gmail_api_files"))
PATH_OF_CLIENT_DATA = os.path.join(
DIR_OF_GMAIL_API_FILES, "client_data.json")
PATH_OF_CLIENT_SECRET = os.path.join(
DIR_OF_GMAIL_API_FILES, "client_secret.json")
GMAIL_SERVER = SimplifiedGmailApi(PATH_OF_CLIENT_DATA,
PATH_OF_CLIENT_SECRET,
DIR_OF_GMAIL_API_FILES)
for recipient in RECIPIENTS:
GMAIL_SERVER.send_html(
recipient, "Ilias RSS Update (" + str(COUNT) + ")", EMAIL_TEXT)