Skip to content

Commit

Permalink
Initial check in.
Browse files Browse the repository at this point in the history
  • Loading branch information
sholiday committed Sep 16, 2011
0 parents commit 85b1baf
Show file tree
Hide file tree
Showing 4 changed files with 418 additions and 0 deletions.
36 changes: 36 additions & 0 deletions .gitignore
Original file line number Diff line number Diff line change
@@ -0,0 +1,36 @@
# Initially from http://help.github.com/ignore-files/
# Compiled source #
###################
*.com
*.class
*.dll
*.exe
*.o
*.so
*.pyc

# Packages #
############
# it's better to unpack these files and commit the raw source
# git has its own built in compression methods
*.7z
*.dmg
*.gz
*.iso
*.jar
*.rar
*.tar
*.zip

# Logs and databases #
######################
*.log
*.sql
*.sqlite

# OS generated files #
######################
.DS_Store?
ehthumbs.db
Icon?
Thumbs.db
136 changes: 136 additions & 0 deletions d2d.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,136 @@
#!/usr/bin/env python
# encoding: utf-8
"""
d2d.py
Created by Stephen Holiday on 2011-09-15.
Copyright (c) 2011 Stephen Holiday. All rights reserved.
# desire2download #
[Stephen Holiday](http://stephenholiday.com)
d2d is a tool to download all of the content from the University of Waterloo's
new learning management system which uses Desire2Learn instead of the old Angel
based UWACE.
d2d was inspired by Jamie Wong's fabulous [UWAngel-CLI](https://github.com/phleet/UWAngel-CLI)
written in Ruby.
d2d is somewhat hacky and has not been tested extensively. If you do find a bug,
please [let me know](mailto:[email protected])
## Usage ##
Using d2d is easy:
./d2d.py --username scholida
Password:
Logging In...
Logged In
Finding courses...
ECE 224 - Fall 2011
+ ECE 224 - Fall 2011/Labs/Lab Tools Tutorial.html (1.70K)
+ ECE 224 - Fall 2011/Labs/Lab 1/lab1_checklist-s2010.pdf (107.65K)
...
d2d will not download a file if it has been already saved.
"""

import getopt

from desire2download import Desire2Download
from getpass import getpass
import sys
reload(sys)
sys.setdefaultencoding("utf-8")

help_message = '''
Desire2Download
===============
Download all of the content from the University of Waterloo's
new learning management system which uses Desire2Learn instead of the old Angel
based UWACE.
d2d was inspired by Jamie Wong's fabulous [UWAngel-CLI](https://github.com/phleet/UWAngel-CLI)
written in Ruby.
d2d is somewhat hacky and has not been tested extensively. If you do find a bug,
please [let me know](mailto:[email protected])
Using d2d is easy:
./d2d.py --username scholida
Password:
Logging In...
Logged In
Finding courses...
ECE 224 - Fall 2011
+ ECE 224 - Fall 2011/Labs/Lab Tools Tutorial.html (1.70K)
+ ECE 224 - Fall 2011/Labs/Lab 1/lab1_checklist-s2010.pdf (107.65K)
...
d2d will not download a file if it has been already saved.
Other Options:
-h This help message
-u, --username [username] set your username
-p, --password [password] set your password
'''

class Usage(Exception):
def __init__(self, msg):
self.msg = msg


def main(argv=None):
if argv is None:
argv = sys.argv
try:
try:
opts, args = getopt.getopt(argv[1:], "hup:v", ["help", "username=", "password="])
except getopt.error, msg:
raise Usage(msg)

username = None
password = None

# option processing
for option, value in opts:
if option == "-v":
verbose = True
if option in ("-h", "--help"):
raise Usage(help_message)
if option in ("-u", "--username"):
username = value
if option in ("-p", "--password"):
password = value

if username is None:
username = raw_input('Username: ')
if password is None:
password = getpass()


# Start the actual work
d2d = Desire2Download(username,password)

d2d.login()
links = d2d.get_course_links()
for link in links:
print link.text
document_tree = d2d.get_course_documents(link)
d2d.download_tree(document_tree, [link.text])

except Usage, err:
print >> sys.stderr, sys.argv[0].split("/")[-1] + ": " + str(err.msg)
print >> sys.stderr, "\t for help use --help"
return 2


if __name__ == "__main__":
sys.exit(main())
218 changes: 218 additions & 0 deletions desire2download.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,218 @@
#!/usr/bin/env python
# encoding: utf-8
"""
desire2download.py
Created by Stephen Holiday on 2011-09-15.
Copyright (c) 2011 Stephen Holiday. All rights reserved.
"""

import re
import os
import urlparse
from urllib import urlencode
import mechanize
import BeautifulSoup

import sys
reload(sys)
sys.setdefaultencoding("utf-8")

class Desire2Download(object):
base_url = 'https://learn.uwaterloo.ca/d2l/lp/homepage/home.d2l?ou=6606'
cas_login = 'https://cas.uwaterloo.ca/cas/login?service=http%3a%2f%2flearn.uwaterloo.ca%2fd2l%2forgtools%2fCAS%2fDefault.aspx'
def __init__(self, username, password):
self.username=username
self.password=password

self.br = mechanize.Browser(factory=mechanize.RobustFactory())
self.br.set_handle_refresh(mechanize._http.HTTPRefreshProcessor(), max_time=1)
self.br.addheaders = [('User-agent', 'Mozilla/5.0 (X11; U; Linux i686; en-US; rv:1.9.0.1) Gecko/2008071615 Fedora/3.0.1-1.fc9 Firefox/3.0.1')]


def safe_unicode(self,obj):
try:
return str(obj)
except UnicodeEncodeError:
# obj is unicode
return unicode(obj).encode('utf-8')

def login(self):
print 'Logging In...'

self.br.open(self.cas_login)

self.br.select_form(nr=0)
self.br['username']=self.username
self.br['password']=self.password
response = self.br.submit().read()
print 'Logged In'

def get_course_links(self):
print 'Finding courses...'
links=list()
for link in self.br.links():
matches=re.match('[A-Z]+ [0-9]{3} - [A-Z][a-z]+ 20[0-9]{2}', link.text)
if matches is not None:
links.append(link)
return links

def _nice_regex(self,regex,content,group):
res=re.search(regex,content)
if res!=None:
return res.group(group)
else:
return ''

def convert_bytes(self, bytes):
'''
Stolen from http://www.5dollarwhitebox.org/drupal/node/84
'''
bytes = float(bytes)
if bytes >= 1099511627776:
terabytes = bytes / 1099511627776
size = '%.2fT' % terabytes
elif bytes >= 1073741824:
gigabytes = bytes / 1073741824
size = '%.2fG' % gigabytes
elif bytes >= 1048576:
megabytes = bytes / 1048576
size = '%.2fM' % megabytes
elif bytes >= 1024:
kilobytes = bytes / 1024
size = '%.2fK' % kilobytes
else:
size = '%.2fb' % bytes
return size

def get_course_documents(self, link):
self.br.follow_link(link)
content_link=None
for link_j in self.br.links():

if link_j.text == 'Content':
if content_link is None:
content_link=link_j

self.br.follow_link(content_link)

print_dl_link=None
for link_k in self.br.links(url_regex='print_download.d2l'):
if print_dl_link is not None:
print_dl_link = link_k

r = self.br.follow_link(print_dl_link)


page = r.read()
#print page
soup = BeautifulSoup.BeautifulSoup(page)
table = soup.find(id='z_n')

document_tree={}
path_to_root = list()

rows=table.findAll('tr')
for row in rows[1:]:
columns = row.findAll('td')

depth = len(columns)-2

cell = None
for column in columns:
if column.has_key('class') and column['class'] == 'd_gn':
cell = column

cell_str = ''.join(map(lambda x: x.__str__(), cell.contents ))

is_heading = True
if re.search('href=', cell_str):
is_heading = False
link = cell.a
if hasattr(link, 'img'):
link.img.extract()

title = ''.join(map(lambda x: x.__str__(), link.contents ))

ou = self._nice_regex('\?ou\=([0-9]+)', link['href'], 1)
tId = self._nice_regex('\&tId\=([0-9]+)', link['href'], 1)

link_href = 'https://learn.uwaterloo.ca/d2l/lms/content/preview.d2l?tId=%s&ou=%s'%(tId, ou)


cur_tree_node = document_tree
for cur_path_node in path_to_root:
key = cur_path_node['title']
if not cur_tree_node.has_key(key):
cur_tree_node[key]=dict()
cur_tree_node=cur_tree_node[key]

cur_tree_node[title]=link_href

else:
cell_str = cell_str.replace(' ','').strip()
cell_str = cell_str.replace('<strong>','').replace('</strong>','').strip()
node = {'heading':True, 'title':cell_str}


if len(path_to_root) < depth:
path_to_root.append(node)
else:
path_to_root=path_to_root[:depth]
path_to_root.append(node)


return document_tree

def download_tree(self, root, _path=list()):
for k in root:
path=_path[:]

node = root[k]

if type(node) is dict:
path.append(k)
self.download_tree(node, path)
else:
title = k
url = node
path = '/'.join(map(lambda x: x.replace('/','\/'), path))

try:
os.makedirs(path)
except:
pass

#print url
page = self.br.open(url).read()
soup = BeautifulSoup.BeautifulSoup(page)
url = soup.find('iframe')['src']
url_path = url.split('?')[0]
split = urlparse.urlsplit(url_path)
if split.netloc == '':
url = 'https://learn.uwaterloo.ca%s'%url_path
else:
url = url_path
url_path = split.path

clean_url = url.replace(' ', '%20')

if 'https://learn.uwaterloo.ca/d2l/common/dialogs/' in url:
pass

else:

file_name = os.path.split(url_path)[1]
path_and_filename = '%s/%s'%(path,file_name.strip('/'))

if os.path.isfile(path_and_filename):
print ' - %s (Already Saved)'%path_and_filename
else:

content = self.br.open(clean_url).read()

f = open(path_and_filename, 'w')
f.write(content)
f.close()

print ' + %s (%s)'%(path_and_filename, self.convert_bytes(len(content)))
Loading

0 comments on commit 85b1baf

Please sign in to comment.