-
-
Notifications
You must be signed in to change notification settings - Fork 8
/
Copy pathimdb.ts
210 lines (179 loc) · 5.32 KB
/
imdb.ts
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
161
162
163
164
165
166
167
168
169
170
171
172
173
174
175
176
177
178
179
180
181
182
183
184
185
186
187
188
189
190
191
192
193
194
195
196
197
198
199
200
201
202
203
204
205
206
207
208
209
210
import fs from 'node:fs/promises'
import util from 'node:util'
import * as movier from 'movier'
import { parse as parseCSV } from 'csv-parse'
import { Level } from 'level'
import pThrottle from 'p-throttle'
import * as types from '../types'
import * as config from './config'
/**
* Rate-limit HTTP requests to IMDB. Note that each call to
* `movier.getTitleDetailsByIMDBId` includes multiple HTTP GET requests.
*/
const throttle = pThrottle({
limit: 10,
interval: 2000
})
export const getTitleDetailsByIMDBId = throttle((titleId: string) =>
movier.getTitleDetailsByIMDBId(titleId, {
select: {
detailsLang: true,
name: true,
genres: true,
mainType: true,
plot: true,
keywords: true,
countriesOfOrigin: true,
languages: true,
ageCategoryTitle: true,
boxOffice: true,
mainRate: true,
allRates: true,
runtime: true
// directors: false,
// writers: false,
// producers: false,
// casts: false,
// posterImage: false,
// allImages: false,
// goofs: false,
// quotes: false,
// taglines: false,
// productionCompanies: false,
// awards: false,
// awardsSummary: false,
// dates: false,
// allReleaseDates: false
}
})
)
export async function loadIMDBMoviesDB() {
const db = new Level<string, types.imdb.Movie>(config.imdbMoviesDbPath, {
valueEncoding: 'json'
})
await db.open()
return db
}
export async function loadIMDBRatingsFromDataDump(): Promise<types.IMDBRatings> {
const imdbRatings: types.IMDBRatings = {}
try {
console.log(
`loading IMDB ratings from data dump (${config.imdbRatingsPath})`
)
const parse: any = util.promisify(parseCSV)
const rawCSV = await fs.readFile(config.imdbRatingsPath, {
encoding: 'utf-8'
})
const imdbRatingsRaw: Array<Array<string>> = await parse(rawCSV, {
delimiter: '\t'
})
for (const imdbRatingRaw of imdbRatingsRaw) {
const [imdbId, ratingRaw, numVotesRaw] = imdbRatingRaw
const rating = Number.parseFloat(ratingRaw)
const numVotes = Number.parseInt(numVotesRaw)
imdbRatings[imdbId] = {
rating,
numVotes
}
}
console.warn(
`loaded ${Object.keys(imdbRatings).length} IMDB ratings from data dump (${
config.imdbRatingsPath
})`
)
} catch (err) {
console.warn(
`warn: unable to load IMDB ratings from data dump (${config.imdbRatingsPath})`,
err
)
}
return imdbRatings
}
/**
* Augments a normalized TMDB movie with additional metadata from IMDB.
*
* In most cases, we prefer the IMDB data over TMDB equivalents.
*
* This function also filters many movies which are unlikely to be relevant
* for most use cases.
*/
export function populateMovieWithIMDBInfo(
movie: types.Movie,
{
imdbRatings,
imdbMovie
}: { imdbRatings?: types.IMDBRatings; imdbMovie?: types.imdb.Movie }
): types.Movie | null {
if (!movie.imdbId) {
return movie
}
const imdbRating = imdbRatings ? imdbRatings[movie.imdbId] : null
let hasIMDBRating = false
if (imdbMovie) {
if (imdbMovie.genres?.length) {
const genres = imdbMovie.genres.map((genre) => genre.toLowerCase())
movie.genres = movie.genres.concat(genres)
// ensure genres are unique
movie.genres = Array.from(new Set(movie.genres))
}
if (imdbMovie.keywords?.length) {
movie.keywords = imdbMovie.keywords
}
if (imdbMovie.countriesOfOrigin?.length) {
movie.countriesOfOrigin = imdbMovie.countriesOfOrigin
}
if (imdbMovie.languages?.length) {
movie.languages = imdbMovie.languages
}
if (imdbMovie.ageCategoryTitle) {
movie.mpaaRating = imdbMovie.ageCategoryTitle
}
if (imdbMovie.plot) {
if (movie.plot && imdbMovie.plot?.trim().endsWith('Read all')) {
// ignore truncated plots
} else {
// otherwise favor the IMDB plot over the TMDB plot
movie.plot = imdbMovie.plot.replace(/\.\.\. read all$/i, '...')
}
}
if (imdbMovie.boxOffice) {
if (imdbMovie.boxOffice.budget > 0) {
movie.budget = `${imdbMovie.boxOffice.budget}`
}
if (imdbMovie.boxOffice.worldwide > 0) {
movie.revenue = `${imdbMovie.boxOffice.worldwide}`
}
}
if (imdbMovie.mainRate?.rateSource?.toLowerCase() === 'imdb') {
hasIMDBRating = true
movie.imdbRating = imdbMovie.mainRate.rate
movie.imdbVotes = imdbMovie.mainRate.votesCount
}
const metacriticRate = imdbMovie.allRates?.find(
(rate) => rate.rateSource?.toLowerCase() === 'metacritics'
)
if (metacriticRate) {
movie.metacriticRating = metacriticRate.rate
movie.metacriticVotes = metacriticRate.votesCount
}
movie.imdbType = imdbMovie.mainType
const genres = new Set(movie.genres)
if (genres.has('short')) {
if (imdbMovie.mainType === 'movie') {
movie.imdbType = 'short'
}
// ignore IMDB-labeled short films
return null
}
}
if (imdbRating) {
// if we have IMDB ratings from two sources, take the one with more votes,
// which is likely to be more recent
if (!hasIMDBRating || imdbRating.numVotes > movie.imdbVotes) {
hasIMDBRating = true
movie.imdbRating = imdbRating.rating
movie.imdbVotes = imdbRating.numVotes
}
}
return movie
}