forked from mswart/openmensa-parsers
-
Notifications
You must be signed in to change notification settings - Fork 1
/
Copy pathaachen.py
69 lines (57 loc) · 2.55 KB
/
aachen.py
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
#!python3
from urllib.request import urlopen
from bs4 import BeautifulSoup as parse
from bs4.element import NavigableString, Tag
from utils import Parser
from pyopenmensa.feed import OpenMensaCanteen, buildLegend
legend = None
def parse_day(canteen, day, data):
# 1. menues
note = data.find(id='note')
if note:
canteen.setDayClosed(day)
return
for menu in data.find(attrs={'class': 'menues'}).find_all('tr'):
# category:
category = menu.find('span', attrs={'class': 'menue-category'}).text.strip()
# split name and notes:
name = ''
notes = set()
for namePart in menu.find('span', attrs={'class': 'menue-desc'}).children:
if type(namePart) is NavigableString:
name += namePart.string
elif type(namePart) is Tag:
notes.update(namePart.text.strip().split(','))
name = name.strip()
notes = [legend.get(n, n) for n in notes]
# price:
price = menu.find('span', attrs={'class': 'menue-price'}).text.strip()
# store data
canteen.addMeal(day, category, name, notes, price)
def parse_url(url, today=False):
canteen = OpenMensaCanteen()
# todo only for: Tellergericht, vegetarisch, Klassiker, Empfehlung des Tages:
canteen.setAdditionalCharges('student', {'other': 1.5})
document = parse(urlopen(url).read())
global legend
regex = '(?P<name>(\d|[A-Z])+)\)\s*' + \
'(?P<value>\w+((\s+\w+)*[^0-9)]))'
legend = buildLegend(legend, document.find(id='additives').text, regex=regex)
days = ('montag', 'dienstag', 'mittwoch', 'donnerstag', 'freitag',
'montagNaechste', 'dienstagNaechste', 'mittwochNaechste', 'donnerstagNaechste', 'freitagNaechste')
for day in days:
data = document.find('div', id=day)
headline = document.find('a', attrs={'data-anchor': '#' + day})
parse_day(canteen, headline.text, data)
return canteen.toXMLFeed()
parser = Parser('aachen', handler=parse_url,
shared_prefix='http://www.studentenwerk-aachen.de/speiseplaene/')
parser.define('academica', suffix='academica-w.html')
parser.define('ahorn', suffix='ahornstrasse-w.html')
parser.define('templergraben', suffix='templergraben-w.html')
parser.define('bayernallee', suffix='bayernallee-w.html')
parser.define('eups', suffix='eupenerstrasse-w.html')
parser.define('goethe', suffix='goethestrasse-w.html')
parser.define('vita', suffix='vita-w.html')
parser.define('zeltmensa', suffix='forum-w.html')
parser.define('juelich', suffix='juelich-w.html')