-
Notifications
You must be signed in to change notification settings - Fork 53
/
Copy pathdemo1.py
44 lines (35 loc) · 1.44 KB
/
demo1.py
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
"""
Some examples of fun things that can be done using the current 'API'
"""
from repool_util import loadPubs, openPDFs
def demo1():
"""
You wrote an algorithm and benchmarked it on the MNIST dataset. You are
wondering how your results compare with those in the literature:
1. Finds all publications that mention mnist
2. Print out their titles
3. Open the three latest publications that mention it at least twice
Pre-requisites:
- Assumes 'pubs_nips' exists and that pdf text is present.
This can be obtained by running
nips_download_parse.py and then nips_add_pdftext.py, or by downloading it
from site (https://sites.google.com/site/researchpooler/home)
Side-effects:
- will use os call to open a pdf with default program
"""
print "loading the NIPS publications dataset..."
pubs = loadPubs('pubs_nips')
# get all papers that mention mnist
p = [x for x in pubs if 'mnist' in x.get('pdf_text',{})]
print "titles of papers that mention MNIST dataset:"
for x in p:
print x['title']
print "total of %d publications mention MNIST." %(len(p),)
# sort by number of occurences
occ = [(x['year'], x['pdf']) for i,x in enumerate(p) if x['pdf_text']['mnist']>1]
occ.sort(reverse = True)
# open the top 3 latest in browser
print "opening the top 3..."
openPDFs([x for year,x in occ[:3]])
if __name__ == '__main__':
demo1()