Assignment title: Information
from collections import namedtuple
from config import getLogger
from bs4 import BeautifulSoup
import requests
import datetime
import json
from pytz import timezone, utc
from dateutil.parser import parse
from bots import RedditBot
from config.bot_config import CONFIG
# region constants
SUBMISSION_INTERVAL_HOURS = CONFIG['intervals']['submission_interval_hours']
SUBREDDITS = CONFIG['subreddits']
# endregion
title = namedtuple('title', 'date')
BASE_URL = "http://www.upressonline.com/fauevents/"
TABLE_ROW = "{title} | {date} | {description}\n"
HYPERLINK = "[{text}]({url})"
HEADER_DIVIDER = "---|---|----\n"
TABLE_HEADER = TABLE_ROW.format(title='Title', date='Date', description='Description') + HEADER_DIVIDER
logger = getLogger()
"""
To use string formatting on the HYPERLINK template, you have to use keyword arguments, e.g.
link = HYPERLINK.format(text="Click Me", url="http://example.com")
This will let you put links inside your Reddit posts.
"""
class EventBot(RedditBot):
def __init__(self, user_name, *args, **kwargs):
super(EventBot, self).__init__(user_name=user_name, *args, **kwargs)
self.base_url = BASE_URL
self.subreddits = CONFIG.get('subreddits', None) or ['FAUbot']
@staticmethod
def has_event_passed(event_json):
event_dict = EventBot._get_event_dict(event_json)
timestamp = event_dict['date']
full_date = timestamp.replace(" @ ", " ")
dash_idx = full_date.index('-')
date = full_date[:dash_idx - 1]
start_datetime = timezone("US/Eastern").localize(parse(date), is_dst=None).astimezone(utc)
now = utc.localize(datetime.datetime.utcnow()) # get current time in UTC timezone
return now > start_datetime # True if now is after start time
@staticmethod
def _get_event_html():
"""
Makes the HTTP request to the event calendar website.
:return: String containing HTML, or None if the response is not 200 OK.
"""
logger.info("Getting event calendar HTML from {}".format(BASE_URL))
r = requests.get(BASE_URL)
if r.status_code == requests.codes.ok:
data = r.text
return data
logger.warning("Returning None, Response not OK: code={}".format(r.status_code))
return None
@staticmethod
def _get_event_dict(event_json):
"""
Takes the relevant values out of the event JSON and creates a simpler dictionary
that is used to format the string TABLE_ROW.
:param event_json: JSON stripped from the event's data-tribejson HTML attribute.
:type event_json: str
:return: A dict containing the relevant event data
"""
event_dict = json.loads(event_json)
return {'title': HYPERLINK.format(text=event_dict['title'], url=event_dict['permalink']),
'date': event_dict['dateDisplay'],
'description': event_dict['excerpt'][3:-4] or "None provided",}
@staticmethod
def _make_reddit_table(html):
"""
Scrapes event data from HTML and creates a Reddit table with it.
:param html: HTML from the event website
:type data: str
:return: A single string containing a Reddit markdown table
"""
logger.info("Generating reddit table")
# start with the header, and append a new row for each event
table = TABLE_HEADER
soup = BeautifulSoup(html, "html.parser")
for event in soup.find_all('div', attrs={'data-tribejson': True}):
event_json = event.get('data-tribejson')
event_dict = EventBot._get_event_dict(event_json)
if EventBot.has_event_passed(event_json) is False:
table += TABLE_ROW.format(**event_dict)
return table
def get_reddit_table(self):
"""
Uses all the helper functions to get the HTML, scrape it, and generate a Reddit table.
:return: A single string containing a Reddit markdown table, or None if an error happens.
"""
table = None
html = self._get_event_html()
if html:
table = EventBot._make_reddit_table(html)
else:
logger.error("Table could not be generated.")
return table
def is_already_submitted(self, title, subreddit):
"""
Checks if a URL has already been shared on self.subreddit.
Because praw.Reddit.search returns a generator instead of a list,
we have to actually loop through it to see if the post exists.
If no post exists, the loop won't happen and it will return False.
:param url: The url that will be searched for
:param subreddit: The subreddit where the url will be searched for
:return: True if the url has already been posted to the subreddit
"""
for title in self.r.search("title:"+title, subreddit=subreddit):
if title:
return True
return False
def edit_existing_table(self):
html = self._get_event_html()
table = self._make_reddit_table(html)
return table
def create_new_table(self):
html = self._get_event_html()
table = self._make_reddit_table(html)
return table
def submit_table(self, title_tuple):
"""
Submit a link to Reddit, and save the submission time to the database.
:param link_tuple: A namedtuple with a url and a title.
"""
for subreddit in self.subreddits:
if self.is_already_submitted(title_tuple.title, subreddit):
logger.info("Table already submitted: subreddit=[{}], title=[{}]".format(subreddit, title_tuple.title))
table = self.edit_exisiting_table()
# sleep for shorter time if time to submit but random article was already submitted
self.sleep_interval = 5
print(table)
else:
logger.info("Submitting link: subreddit=[{}], url=[{}]".format(subreddit, title_tuple.title))
self.r.submit(subreddit, title_tuple.title, url=title_tuple.url)
table = self.create_new_table()
print(table)
def do_scheduled_submit(self,event_json):
"""
Check if enough time has passed since the last submission. If it has, submit a new link and save the current
submission time. This is the NewsBot's main logic function.
"""
if self.is_time_to_submit():
event_dict = EventBot._get_event_dict(event_json)
title = event_dict['title']
if title:
self.submit_table(title)
else:
logger.info("No articles have been published yet today.")
else:
logger.info("Not time to submit.")
def is_time_to_submit(self):
"""
Check if enough time has passed to submit another article.
This function checks the creation time of FAUbot's newest submissions. If at least 24 hours has passed since the
last article submission, it is time to submit a new article. The 24 hour interval is configurable in
config/bot_config.yaml.
:return: True if enough time has passed for a new article to be submitted.
"""
is_time = True
me = self.r.get_me()
now = datetime.datetime.utcnow()
target_interval = datetime.timedelta(hours=SUBMISSION_INTERVAL_HOURS)
logger.info("Checking if time to submit: targetInterval=[{}]".format(target_interval))
for post in me.get_submitted(sort="new", time="day"):
if post.url.startswith(self.base_url):
created = datetime.datetime.utcfromtimestamp(post.created_utc)
difference = now - created
if difference < target_interval:
logger.info("Not time to submit: currentTime=[{}], lastSubmissionTime=[{}], "
"difference=[{:5.2f} hrs]".format(now, created, (difference.seconds/60)/60))
is_time = False
break
else:
logger.info("Time to submit article. currentTime=[{}]".format(now))
return is_time
def work(self):
table = self.get_reddit_table()
self.submit_table(title)
def main():
test = EventBot(RedditBot, run_once=True) # the bot will only do one loop if you set that to True
test.work()
if __name__ == '__main__':
main()
>