forked from sholiday/desire2download
-
Notifications
You must be signed in to change notification settings - Fork 1
Commit
This commit does not belong to any branch on this repository, and may belong to a fork outside of the repository.
- Loading branch information
0 parents
commit 85b1baf
Showing
4 changed files
with
418 additions
and
0 deletions.
There are no files selected for viewing
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -0,0 +1,36 @@ | ||
# Initially from http://help.github.com/ignore-files/ | ||
# Compiled source # | ||
################### | ||
*.com | ||
*.class | ||
*.dll | ||
*.exe | ||
*.o | ||
*.so | ||
*.pyc | ||
|
||
# Packages # | ||
############ | ||
# it's better to unpack these files and commit the raw source | ||
# git has its own built in compression methods | ||
*.7z | ||
*.dmg | ||
*.gz | ||
*.iso | ||
*.jar | ||
*.rar | ||
*.tar | ||
*.zip | ||
|
||
# Logs and databases # | ||
###################### | ||
*.log | ||
*.sql | ||
*.sqlite | ||
|
||
# OS generated files # | ||
###################### | ||
.DS_Store? | ||
ehthumbs.db | ||
Icon? | ||
Thumbs.db |
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -0,0 +1,136 @@ | ||
#!/usr/bin/env python | ||
# encoding: utf-8 | ||
""" | ||
d2d.py | ||
Created by Stephen Holiday on 2011-09-15. | ||
Copyright (c) 2011 Stephen Holiday. All rights reserved. | ||
# desire2download # | ||
[Stephen Holiday](http://stephenholiday.com) | ||
d2d is a tool to download all of the content from the University of Waterloo's | ||
new learning management system which uses Desire2Learn instead of the old Angel | ||
based UWACE. | ||
d2d was inspired by Jamie Wong's fabulous [UWAngel-CLI](https://github.com/phleet/UWAngel-CLI) | ||
written in Ruby. | ||
d2d is somewhat hacky and has not been tested extensively. If you do find a bug, | ||
please [let me know](mailto:[email protected]) | ||
## Usage ## | ||
Using d2d is easy: | ||
./d2d.py --username scholida | ||
Password: | ||
Logging In... | ||
Logged In | ||
Finding courses... | ||
ECE 224 - Fall 2011 | ||
+ ECE 224 - Fall 2011/Labs/Lab Tools Tutorial.html (1.70K) | ||
+ ECE 224 - Fall 2011/Labs/Lab 1/lab1_checklist-s2010.pdf (107.65K) | ||
... | ||
d2d will not download a file if it has been already saved. | ||
""" | ||
|
||
import getopt | ||
|
||
from desire2download import Desire2Download | ||
from getpass import getpass | ||
import sys | ||
reload(sys) | ||
sys.setdefaultencoding("utf-8") | ||
|
||
help_message = ''' | ||
Desire2Download | ||
=============== | ||
Download all of the content from the University of Waterloo's | ||
new learning management system which uses Desire2Learn instead of the old Angel | ||
based UWACE. | ||
d2d was inspired by Jamie Wong's fabulous [UWAngel-CLI](https://github.com/phleet/UWAngel-CLI) | ||
written in Ruby. | ||
d2d is somewhat hacky and has not been tested extensively. If you do find a bug, | ||
please [let me know](mailto:[email protected]) | ||
Using d2d is easy: | ||
./d2d.py --username scholida | ||
Password: | ||
Logging In... | ||
Logged In | ||
Finding courses... | ||
ECE 224 - Fall 2011 | ||
+ ECE 224 - Fall 2011/Labs/Lab Tools Tutorial.html (1.70K) | ||
+ ECE 224 - Fall 2011/Labs/Lab 1/lab1_checklist-s2010.pdf (107.65K) | ||
... | ||
d2d will not download a file if it has been already saved. | ||
Other Options: | ||
-h This help message | ||
-u, --username [username] set your username | ||
-p, --password [password] set your password | ||
''' | ||
|
||
class Usage(Exception): | ||
def __init__(self, msg): | ||
self.msg = msg | ||
|
||
|
||
def main(argv=None): | ||
if argv is None: | ||
argv = sys.argv | ||
try: | ||
try: | ||
opts, args = getopt.getopt(argv[1:], "hup:v", ["help", "username=", "password="]) | ||
except getopt.error, msg: | ||
raise Usage(msg) | ||
|
||
username = None | ||
password = None | ||
|
||
# option processing | ||
for option, value in opts: | ||
if option == "-v": | ||
verbose = True | ||
if option in ("-h", "--help"): | ||
raise Usage(help_message) | ||
if option in ("-u", "--username"): | ||
username = value | ||
if option in ("-p", "--password"): | ||
password = value | ||
|
||
if username is None: | ||
username = raw_input('Username: ') | ||
if password is None: | ||
password = getpass() | ||
|
||
|
||
# Start the actual work | ||
d2d = Desire2Download(username,password) | ||
|
||
d2d.login() | ||
links = d2d.get_course_links() | ||
for link in links: | ||
print link.text | ||
document_tree = d2d.get_course_documents(link) | ||
d2d.download_tree(document_tree, [link.text]) | ||
|
||
except Usage, err: | ||
print >> sys.stderr, sys.argv[0].split("/")[-1] + ": " + str(err.msg) | ||
print >> sys.stderr, "\t for help use --help" | ||
return 2 | ||
|
||
|
||
if __name__ == "__main__": | ||
sys.exit(main()) |
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -0,0 +1,218 @@ | ||
#!/usr/bin/env python | ||
# encoding: utf-8 | ||
""" | ||
desire2download.py | ||
Created by Stephen Holiday on 2011-09-15. | ||
Copyright (c) 2011 Stephen Holiday. All rights reserved. | ||
""" | ||
|
||
import re | ||
import os | ||
import urlparse | ||
from urllib import urlencode | ||
import mechanize | ||
import BeautifulSoup | ||
|
||
import sys | ||
reload(sys) | ||
sys.setdefaultencoding("utf-8") | ||
|
||
class Desire2Download(object): | ||
base_url = 'https://learn.uwaterloo.ca/d2l/lp/homepage/home.d2l?ou=6606' | ||
cas_login = 'https://cas.uwaterloo.ca/cas/login?service=http%3a%2f%2flearn.uwaterloo.ca%2fd2l%2forgtools%2fCAS%2fDefault.aspx' | ||
def __init__(self, username, password): | ||
self.username=username | ||
self.password=password | ||
|
||
self.br = mechanize.Browser(factory=mechanize.RobustFactory()) | ||
self.br.set_handle_refresh(mechanize._http.HTTPRefreshProcessor(), max_time=1) | ||
self.br.addheaders = [('User-agent', 'Mozilla/5.0 (X11; U; Linux i686; en-US; rv:1.9.0.1) Gecko/2008071615 Fedora/3.0.1-1.fc9 Firefox/3.0.1')] | ||
|
||
|
||
def safe_unicode(self,obj): | ||
try: | ||
return str(obj) | ||
except UnicodeEncodeError: | ||
# obj is unicode | ||
return unicode(obj).encode('utf-8') | ||
|
||
def login(self): | ||
print 'Logging In...' | ||
|
||
self.br.open(self.cas_login) | ||
|
||
self.br.select_form(nr=0) | ||
self.br['username']=self.username | ||
self.br['password']=self.password | ||
response = self.br.submit().read() | ||
print 'Logged In' | ||
|
||
def get_course_links(self): | ||
print 'Finding courses...' | ||
links=list() | ||
for link in self.br.links(): | ||
matches=re.match('[A-Z]+ [0-9]{3} - [A-Z][a-z]+ 20[0-9]{2}', link.text) | ||
if matches is not None: | ||
links.append(link) | ||
return links | ||
|
||
def _nice_regex(self,regex,content,group): | ||
res=re.search(regex,content) | ||
if res!=None: | ||
return res.group(group) | ||
else: | ||
return '' | ||
|
||
def convert_bytes(self, bytes): | ||
''' | ||
Stolen from http://www.5dollarwhitebox.org/drupal/node/84 | ||
''' | ||
bytes = float(bytes) | ||
if bytes >= 1099511627776: | ||
terabytes = bytes / 1099511627776 | ||
size = '%.2fT' % terabytes | ||
elif bytes >= 1073741824: | ||
gigabytes = bytes / 1073741824 | ||
size = '%.2fG' % gigabytes | ||
elif bytes >= 1048576: | ||
megabytes = bytes / 1048576 | ||
size = '%.2fM' % megabytes | ||
elif bytes >= 1024: | ||
kilobytes = bytes / 1024 | ||
size = '%.2fK' % kilobytes | ||
else: | ||
size = '%.2fb' % bytes | ||
return size | ||
|
||
def get_course_documents(self, link): | ||
self.br.follow_link(link) | ||
content_link=None | ||
for link_j in self.br.links(): | ||
|
||
if link_j.text == 'Content': | ||
if content_link is None: | ||
content_link=link_j | ||
|
||
self.br.follow_link(content_link) | ||
|
||
print_dl_link=None | ||
for link_k in self.br.links(url_regex='print_download.d2l'): | ||
if print_dl_link is not None: | ||
print_dl_link = link_k | ||
|
||
r = self.br.follow_link(print_dl_link) | ||
|
||
|
||
page = r.read() | ||
#print page | ||
soup = BeautifulSoup.BeautifulSoup(page) | ||
table = soup.find(id='z_n') | ||
|
||
document_tree={} | ||
path_to_root = list() | ||
|
||
rows=table.findAll('tr') | ||
for row in rows[1:]: | ||
columns = row.findAll('td') | ||
|
||
depth = len(columns)-2 | ||
|
||
cell = None | ||
for column in columns: | ||
if column.has_key('class') and column['class'] == 'd_gn': | ||
cell = column | ||
|
||
cell_str = ''.join(map(lambda x: x.__str__(), cell.contents )) | ||
|
||
is_heading = True | ||
if re.search('href=', cell_str): | ||
is_heading = False | ||
link = cell.a | ||
if hasattr(link, 'img'): | ||
link.img.extract() | ||
|
||
title = ''.join(map(lambda x: x.__str__(), link.contents )) | ||
|
||
ou = self._nice_regex('\?ou\=([0-9]+)', link['href'], 1) | ||
tId = self._nice_regex('\&tId\=([0-9]+)', link['href'], 1) | ||
|
||
link_href = 'https://learn.uwaterloo.ca/d2l/lms/content/preview.d2l?tId=%s&ou=%s'%(tId, ou) | ||
|
||
|
||
cur_tree_node = document_tree | ||
for cur_path_node in path_to_root: | ||
key = cur_path_node['title'] | ||
if not cur_tree_node.has_key(key): | ||
cur_tree_node[key]=dict() | ||
cur_tree_node=cur_tree_node[key] | ||
|
||
cur_tree_node[title]=link_href | ||
|
||
else: | ||
cell_str = cell_str.replace(' ','').strip() | ||
cell_str = cell_str.replace('<strong>','').replace('</strong>','').strip() | ||
node = {'heading':True, 'title':cell_str} | ||
|
||
|
||
if len(path_to_root) < depth: | ||
path_to_root.append(node) | ||
else: | ||
path_to_root=path_to_root[:depth] | ||
path_to_root.append(node) | ||
|
||
|
||
return document_tree | ||
|
||
def download_tree(self, root, _path=list()): | ||
for k in root: | ||
path=_path[:] | ||
|
||
node = root[k] | ||
|
||
if type(node) is dict: | ||
path.append(k) | ||
self.download_tree(node, path) | ||
else: | ||
title = k | ||
url = node | ||
path = '/'.join(map(lambda x: x.replace('/','\/'), path)) | ||
|
||
try: | ||
os.makedirs(path) | ||
except: | ||
pass | ||
|
||
#print url | ||
page = self.br.open(url).read() | ||
soup = BeautifulSoup.BeautifulSoup(page) | ||
url = soup.find('iframe')['src'] | ||
url_path = url.split('?')[0] | ||
split = urlparse.urlsplit(url_path) | ||
if split.netloc == '': | ||
url = 'https://learn.uwaterloo.ca%s'%url_path | ||
else: | ||
url = url_path | ||
url_path = split.path | ||
|
||
clean_url = url.replace(' ', '%20') | ||
|
||
if 'https://learn.uwaterloo.ca/d2l/common/dialogs/' in url: | ||
pass | ||
|
||
else: | ||
|
||
file_name = os.path.split(url_path)[1] | ||
path_and_filename = '%s/%s'%(path,file_name.strip('/')) | ||
|
||
if os.path.isfile(path_and_filename): | ||
print ' - %s (Already Saved)'%path_and_filename | ||
else: | ||
|
||
content = self.br.open(clean_url).read() | ||
|
||
f = open(path_and_filename, 'w') | ||
f.write(content) | ||
f.close() | ||
|
||
print ' + %s (%s)'%(path_and_filename, self.convert_bytes(len(content))) |
Oops, something went wrong.