-
Notifications
You must be signed in to change notification settings - Fork 0
/
Copy pathseed.py
104 lines (68 loc) · 2.57 KB
/
seed.py
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
"""Utility file to seed ratings database from MovieLens data in seed_data/"""
from sqlalchemy import func
from model import User
from model import Rating
from model import Movie
from datetime import datetime
from model import connect_to_db, db
from server import app
def load_users():
"""Load users from u.user into database."""
print "Users"
# Delete all rows in table, so if we need to run this a second time,
# we won't be trying to add duplicate users
User.query.delete()
# Read u.user file and insert data
for row in open("seed_data/u.user"):
row = row.rstrip()
user_id, age, gender, occupation, zipcode = row.split("|")
user = User(user_id=user_id, age=age, zipcode=zipcode)
# We need to add to the session or it won't ever be stored
db.session.add(user)
# Once we're done, we should commit our work
db.session.commit()
def load_movies():
"""Load movies from u.item into database."""
print "Movies"
Movie.query.delete()
for row in open("seed_data/u.item"):
row = row.rstrip()
movie_id, title, released_at, video_release_date, imdb_url = row.split("|")[:5]
if released_at:
released_at = datetime.strptime(released_at, "%d-%b-%Y")
else:
released_at = None
# release_year = title(-6:-1)
title = title[:-7]
title = title.decode("latin-1")
movie = Movie(movie_id=movie_id, title=title, released_at=released_at, imdb_url=imdb_url)
db.session.add(movie)
db.session.commit()
def load_ratings():
"""Load ratings from u.data into database."""
print "Ratings"
Rating.query.delete()
for row in open("seed_data/u.data"):
row = row.rstrip()
user_id, movie_id, score, timestamp = row.split("\t")
rating = Rating(movie_id=movie_id, user_id=user_id, score=score)
db.session.add(rating)
db.session.commit()
def set_val_user_id():
"""Set value for the next user_id after seeding database"""
# Get the Max user_id in the database
result = db.session.query(func.max(User.user_id)).one()
max_id = int(result[0])
# Set the value for the next user_id to be max_id + 1
query = "SELECT setval('users_user_id_seq', :new_id)"
db.session.execute(query, {'new_id': max_id + 1})
db.session.commit()
if __name__ == "__main__":
connect_to_db(app)
# In case tables haven't been created, create them
db.create_all()
# Import different types of data
load_users()
load_movies()
load_ratings()
set_val_user_id()