File size: 2,419 Bytes
694e4f6
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
# Retrieve data from TMDB using API

import requests as req
import pandas as pd
from conf import api_key

ses = req.session()

genre_api = 'https://api.themoviedb.org/3/genre/movie/list?api_key=' + api_key + '&language=en-US&page=1'

g = ses.get(genre_api)
genres = g.json()
g_id = []
g_name = []
for i in genres['genres']:
    g_id.append(i['id'])
    g_name.append(i['name'])

ids = []
titles = []
desc = []
pop = []
rate = []
date = []
resp = []
genre = []
rev = []


for k in range(1,500): # Returns 7387 movies as data
    api = 'https://api.themoviedb.org/3/movie/popular?api_key=' + api_key + '&language=en-US&page='+str(k)
    data = ses.get(api)
    j = data.json()
    for i in j['results']:
        if i['original_language'] == 'en':
            try:
                ids.append(i['id'])
            except:
                ids.append(None)
            try:
                titles.append(i['title'])
            except:
                titles.append(None)
            try:
                desc.append(i['overview'])
            except:
                desc.append(None)
            try:
                pop.append(i['popularity'])
            except:
                pop.append(None)
            try:
                rate.append(i['vote_average'])
            except:
                rate.append(None)
            try:
                date.append(i['release_date'])
            except:
                date.append(None)
            try:
                resp.append(i['vote_count'])
            except:
                resp.append(None)
            try:
                p = []
                for u in range(len(i['genre_ids'])):
                    if i['genre_ids'][u] in g_id:
                        p.append(g_name[g_id.index(i['genre_ids'][u])])
                genre.append(p)
            except:
                genre.append(None)
            try:
                rev_api = 'https://api.themoviedb.org/3/movie/' + str(i['id']) + '?api_key=' + api_key + '&language=en-US'
                r = ses.get(rev_api)
                re = r.json()
                rev.append(re['revenue'])
            except:
                rev.append(None)

df = pd.DataFrame({'Id':ids, 'Title':titles,'Description':desc,'Popularity':pop,'Genres':genre,'Release Date': date,'Rating':rate,'Vote Count':resp,'Revenue':rev})
df.to_csv('movies2.csv')