Skip to content
This repository has been archived by the owner on Jun 9, 2022. It is now read-only.

Commit

Permalink
Merge pull request #58 from RadioRevolt/cache-external-responses-#55
Browse files Browse the repository at this point in the history
Cache external responses, fixes #55
  • Loading branch information
tobinus authored Aug 17, 2016
2 parents ac38bee + 2d9f3db commit 7630059
Show file tree
Hide file tree
Showing 14 changed files with 158 additions and 74 deletions.
2 changes: 1 addition & 1 deletion calculate_durations.py
Original file line number Diff line number Diff line change
Expand Up @@ -78,7 +78,7 @@ def main():
logger.info("Collecting episodes...")

all_episodes = list()
es = EpisodeSource(generator.requests)
es = generator.episode_source
es.populate_all_episodes_list()
for show in [all_shows[show_id] for show_id in chosen_shows]:
try:
Expand Down
37 changes: 17 additions & 20 deletions generator/generate_feed.py
Original file line number Diff line number Diff line change
@@ -1,5 +1,6 @@
import logging

import copy
from .metadata_sources.skip_episode import SkipEpisode
from . import metadata_sources, set_up_logger
from .episode_source import EpisodeSource
Expand All @@ -10,7 +11,7 @@
from . import settings as SETTINGS
import requests

from cached_property import cached_property
from cached_property import threaded_cached_property as cached_property
from clint.textui import progress

import sys
Expand All @@ -28,6 +29,7 @@ def __init__(self, pretty_xml=False, quiet=False, xslt=None):
self.requests.headers.update({"User-Agent": "podcast-feed-gen"})

self.show_source = ShowSource(self.requests)
self.episode_source = EpisodeSource(self.requests)
self.pretty_xml = pretty_xml
self.re_remove_chars = re.compile(r"[^\w\d]|_")

Expand Down Expand Up @@ -81,14 +83,15 @@ def generate_feed(self, show_id: int, force: bool =True) -> bytes:
str: The RSS podcast feed for the given show_id.
"""
try:
show = self.show_source.shows[show_id]
show = copy.deepcopy(self.show_source.shows[show_id])
except KeyError as e:
raise NoSuchShowError from e

return self._generate_feed(show, skip_empty=not force, enable_skip_show=not force)
return self._generate_feed(show, skip_empty=not force,
enable_skip_show=not force)

def _generate_feed(self, show: Show, skip_empty: bool =True, enable_skip_show: bool =True,
episode_source: EpisodeSource =None) -> bytes:
def _generate_feed(self, show: Show, skip_empty: bool = True,
enable_skip_show: bool = True) -> bytes:
"""Generate RSS feed for the provided show.
This differs from generate_feed in that it accept Show, not show_id, as argument.
Expand All @@ -98,8 +101,6 @@ def _generate_feed(self, show: Show, skip_empty: bool =True, enable_skip_show: b
sources.
skip_empty (bool): Set to true to raise exception if there are no episodes for this show.
enable_skip_show (bool): Skip this show if any Show Metadata source raises SkipShow.
episode_source (EpisodeSource): The EpisodeSource which will be used.
A new one will be created if not given.
Returns:
str: The RSS podcast feed for the given show.
Expand All @@ -111,17 +112,15 @@ def _generate_feed(self, show: Show, skip_empty: bool =True, enable_skip_show: b
show.xslt = self.xslt

# Add episodes
if not episode_source:
episode_source = EpisodeSource(self.requests)
try:
episode_source.episode_list(show)
self.episode_source.episode_list(show)
except NoEpisodesError as e:
if skip_empty:
raise e
else:
# Go on and generate empty feed
pass
show.add_episodes_to_feed(episode_source, self.episode_metadata_sources)
show.add_episodes_to_feed(self.episode_source, self.episode_metadata_sources)

# Generate!
return show.rss_str(minimize=not self.pretty_xml)
Expand All @@ -139,8 +138,7 @@ def generate_feeds_sequence(self, shows) -> dict:

# Ensure we only download list of episodes once
logger.info("Downloading metadata, this could take a while...")
es = EpisodeSource(self.requests)
self._prepare_for_batch(es)
self.prepare_for_batch()

feeds = dict()
for show in progress.bar(shows, hide=self.hide_progressbar):
Expand All @@ -149,17 +147,17 @@ def generate_feeds_sequence(self, shows) -> dict:
logger.debug("{0: <60} ({1:03}/{2:03})".format(show.name, i, num_shows))
try:
# Do the job
feeds[show.id] = self._generate_feed(show, episode_source=es)
feeds[show.id] = self._generate_feed(show)
except (NoEpisodesError, SkipShow):
# Skip this show
pass

logger.info("Done creating the feeds.")
return feeds

def _prepare_for_batch(self, es):
def prepare_for_batch(self):
logger.debug("Preparing for processing multiple shows")
es.populate_all_episodes_list()
self.episode_source.populate_all_episodes_list()
for source in self.episode_metadata_sources:
source.prepare_batch()
for source in self.show_metadata_sources:
Expand Down Expand Up @@ -190,11 +188,10 @@ def populate_show_metadata(self, show, enable_skip_show: bool=True):
def generate_feed_with_all_episodes(self, title=None):
show = Show(name=title or SETTINGS.ALL_EPISODES_FEED_TITLE, id=0)
show.xslt = self.xslt
es = EpisodeSource(self.requests)
self._prepare_for_batch(es)
self.prepare_for_batch()
# Get all episodes
episodes = [es.episode(self.show_source.shows[ep['program_defnr']], ep)
for ep in es.all_episodes if ep['program_defnr'] != 0]
episodes = [self.episode_source.episode(self.show_source.shows[ep['program_defnr']], ep)
for ep in self.episode_source.all_episodes if ep['program_defnr'] != 0]
# Populate metadata
for episode in progress.bar(episodes, hide=self.hide_progressbar):
logger.debug("Populating episode %s (from %s)", episode.title,
Expand Down
2 changes: 1 addition & 1 deletion generator/metadata_sources/base_manual_changes.py
Original file line number Diff line number Diff line change
@@ -1,5 +1,5 @@
import logging
from cached_property import cached_property
from cached_property import threaded_cached_property as cached_property
import json
import os.path
import sys
Expand Down
23 changes: 13 additions & 10 deletions generator/metadata_sources/episode/chimera.py
Original file line number Diff line number Diff line change
Expand Up @@ -6,24 +6,31 @@
import requests
from markdown import Markdown
import urllib.parse
from cached_property import cached_property
from cached_property import threaded_cached_property as cached_property
from sys import stderr
from podgen import htmlencode
from threading import RLock


class Chimera(EpisodeMetadataSource):
def __init__(self, *args, **kwargs):
super().__init__(*args, **kwargs)
self._episodes_by_chimera_id = dict()
self.markdown = Markdown(output="html5")
self._episode_list_locks = dict()

def _get_episodes(self, digas_id):
chimera_id = self._shows_by_digas_id[digas_id]
try:
return self._episodes_by_chimera_id[chimera_id]
chimera_id = self._shows_by_digas_id[digas_id]
except KeyError:
self._episodes_by_chimera_id[chimera_id] = self._fetch_episodes(chimera_id)
return self._episodes_by_chimera_id[chimera_id]
# Digas ID not recognized
return []
with self._episode_list_locks.setdefault(chimera_id, RLock()):
try:
return self._episodes_by_chimera_id[chimera_id]
except KeyError:
self._episodes_by_chimera_id[chimera_id] = self._fetch_episodes(chimera_id)
return self._episodes_by_chimera_id[chimera_id]

@cached_property
def _shows_by_digas_id(self):
Expand All @@ -42,11 +49,7 @@ def _fetch_episodes(self, chimera_id):
return {episode['podcast_url']: episode for episode in episodes}

def accepts(self, episode) -> bool:
try:
return super().accepts(episode) and episode.media.url in self._get_episodes(episode.show.id)
except KeyError:
# Show not in Chimera
return False
return super().accepts(episode) and episode.media.url in self._get_episodes(episode.show.id)

def populate(self, episode) -> None:
metadata = self._get_episodes(episode.show.id)[episode.media.url]
Expand Down
2 changes: 1 addition & 1 deletion generator/metadata_sources/episode/manual_changes.py
Original file line number Diff line number Diff line change
@@ -1,7 +1,7 @@
import logging
from .. import EpisodeMetadataSource
from ..base_manual_changes import BaseManualChanges
from cached_property import cached_property
from cached_property import threaded_cached_property as cached_property
import json
import os.path
import sys
Expand Down
2 changes: 1 addition & 1 deletion generator/metadata_sources/show/chimera.py
Original file line number Diff line number Diff line change
@@ -1,6 +1,6 @@
from .. import ShowMetadataSource
import requests
from cached_property import cached_property
from cached_property import threaded_cached_property as cached_property

ORIG_IMAGE_PREFIX = "http://dusken.no/media/thumbs/uploads/images/"
ORIG_IMAGE_SUFFIX = ".170x170_q85_crop_upscale.jpg"
Expand Down
2 changes: 1 addition & 1 deletion generator/metadata_sources/show/manual_changes.py
Original file line number Diff line number Diff line change
Expand Up @@ -4,7 +4,7 @@
from .. import ShowMetadataSource
from ..base_manual_changes import BaseManualChanges
from ...settings import METADATA_SOURCE
from cached_property import cached_property
from cached_property import threaded_cached_property as cached_property
import json
import os.path
import sys
Expand Down
6 changes: 5 additions & 1 deletion generator/show_source.py
Original file line number Diff line number Diff line change
@@ -1,8 +1,10 @@
import datetime

import requests
import requests.auth
from .settings import SHOW_SOURCE as SETTINGS
from .show import Show
from cached_property import cached_property
from cached_property import threaded_cached_property as cached_property


class ShowSource:
Expand All @@ -12,10 +14,12 @@ def __init__(self, request_session: requests.Session):
"""
Use the given requests session when fetching data."""
self.requests = request_session
self.last_fetched = None

@cached_property
def shows(self):
"""dict: Dictionary with all shows, with their DigAS ID as key and Show instance as value."""
self.last_fetched = datetime.datetime.now(datetime.timezone.utc)
return self._get_all_shows()

def _get_all_shows(self) -> dict:
Expand Down
5 changes: 4 additions & 1 deletion server.py
Original file line number Diff line number Diff line change
Expand Up @@ -8,6 +8,8 @@ def parse_cli_arguments():
help="Activate debugging, overriding the option in webserver/settings.py "
"(you shouldn't use this script in production, but especially not with"
" this option!! You might reveal secret information to others.)")
parser.add_argument("--port", "-p", default=5000, type=int,
help="Port to run the server on.")
parser.add_argument("host", nargs="?", default="127.0.0.1", help="Accept connections for this host. "
"Set to 0.0.0.0 to enable connections from anywhere (not safe!). "
"Defaults to 127.0.0.1, which means only connections from this computer.")
Expand All @@ -17,7 +19,8 @@ def parse_cli_arguments():
if __name__ == '__main__':
parser, args = parse_cli_arguments()
host = args.host
port = args.port

if args.debug:
webserver.feed_server.app.debug = True
webserver.feed_server.app.run(host=host)
webserver.feed_server.app.run(host=host, port=port)
53 changes: 28 additions & 25 deletions webserver/feed_server.py
Original file line number Diff line number Diff line change
@@ -1,3 +1,5 @@
import threading

from generator import set_up_logger
import base64

Expand All @@ -15,6 +17,7 @@
import urllib.parse
import os.path
import logging
import datetime


# Set up logging so all log messages include request information
Expand All @@ -30,7 +33,7 @@ def filter(self, record):
record.agent_browser_version = request.user_agent.version
record.agent = request.user_agent.string
else:
record.method = "Outside of request context (before first request?)"
record.method = "Outside of request context"
record.path = ""
record.ip = ""
record.agent_platform = ""
Expand All @@ -40,21 +43,6 @@ def filter(self, record):
return True


class SkipSameUrl(logging.Filter):
def __init__(self):
super().__init__()
self.paths = dict()

def filter(self, record):
if request:
if record.getMessage() in self.paths.setdefault(request.path, set()):
return False
else:
self.paths[request.path].add(record.getMessage())
return True
else:
return True

# Format the message so that the extra information is outputted
log_formatter = logging.Formatter(fmt="""\
================================================================================
Expand All @@ -73,8 +61,6 @@ def filter(self, record):
)

# Put our filter and formatter to use
logging.getLogger("").addFilter(SkipSameUrl())
logging.getLogger("py.warnings").addFilter(SkipSameUrl())
set_up_logger.rotatingHandler.setFormatter(log_formatter)
set_up_logger.rotatingHandler.addFilter(ContextFilter())

Expand All @@ -94,6 +80,23 @@ def xslt_url():
return url_for('static', filename="style.xsl")


_create_gen_lock = threading.RLock()
_gen = (None, None)


def get_podcast_feed_generator():
global _gen
with _create_gen_lock:
gen, expires_at = _gen
if gen is None or \
datetime.datetime.now(datetime.timezone.utc) > expires_at:
logging.info("Creating PodcastFeedGenerator")
gen = PodcastFeedGenerator(pretty_xml=True, quiet=True, xslt=xslt_url())
gen.prepare_for_batch()
_gen = (gen, datetime.datetime.now(datetime.timezone.utc) + settings.SOURCE_DATA_TTL)
return gen


@app.before_request
def ignore_get():
if request.base_url != request.url:
Expand All @@ -107,19 +110,19 @@ def redirect_to_favicon():

@app.route('/all')
def output_all_feed():
gen = PodcastFeedGenerator(quiet=True, xslt=xslt_url(), pretty_xml=True)
gen = get_podcast_feed_generator()
gen.register_redirect_services(get_redirect_sound, get_redirect_article)

feed = gen.generate_feed_with_all_episodes()
return _prepare_feed_response(feed, 10 * 60)
return _prepare_feed_response(feed, datetime.timedelta(minutes=10))


@app.route('/<show_name>')
def output_feed(show_name):
# Replace image so it fits iTunes' specifications
metadata_sources.SHOW_METADATA_SOURCES.append(logo.ReplaceImageURL)
# Make it pretty, so curious people can learn from it
gen = PodcastFeedGenerator(quiet=True, xslt=xslt_url(), pretty_xml=True)
gen = get_podcast_feed_generator()
try:
show, canonical_slug = \
url_service.get_canonical_slug_for_slug(show_name, gen)
Expand All @@ -138,21 +141,21 @@ def output_feed(show_name):
PodcastFeedGenerator.register_redirect_services(get_redirect_sound, get_redirect_article)

feed = gen.generate_feed(show.id)
return _prepare_feed_response(feed, 60 * 60)
return _prepare_feed_response(feed, settings.FEED_TTL)


def _prepare_feed_response(feed, max_age) -> Response:
def _prepare_feed_response(feed, max_age: datetime.timedelta) -> Response:
resp = make_response(feed)
resp.headers['Content-Type'] = 'application/xml'
resp.cache_control.max_age = max_age
resp.cache_control.max_age = int(max_age.total_seconds())
resp.cache_control.public = True
return resp


@app.route('/api/url/<show>')
def api_url_show(show):
try:
return url_for_feed(url_service.create_slug_for(int(show), PodcastFeedGenerator(quiet=True)))
return url_for_feed(url_service.create_slug_for(int(show), get_podcast_feed_generator()))
except (NoSuchShowError, ValueError):
abort(404)

Expand Down
4 changes: 2 additions & 2 deletions webserver/logo.py
Original file line number Diff line number Diff line change
Expand Up @@ -13,7 +13,7 @@
from generator.no_episodes_error import NoEpisodesError
import sys
import traceback
from cached_property import cached_property
from cached_property import threaded_cached_property as cached_property
from clint.textui import progress


Expand Down Expand Up @@ -229,7 +229,7 @@ def create_local_copy_for_all_shows_with_episodes(cls, gen, quiet: bool = False,
for show in shows:
gen.populate_show_metadata(show, False)

episode_source = EpisodeSource(gen.requests)
episode_source = gen.episode_source
episode_source.populate_all_episodes_list()

selected_images = list()
Expand Down
Loading

0 comments on commit 7630059

Please sign in to comment.