Spaces:
Runtime error
Runtime error
File size: 57,867 Bytes
b4d8eca |
1 |
{"metadata":{"kernelspec":{"language":"python","display_name":"Python 3","name":"python3"},"language_info":{"name":"python","version":"3.7.10","mimetype":"text/x-python","codemirror_mode":{"name":"ipython","version":3},"pygments_lexer":"ipython3","nbconvert_exporter":"python","file_extension":".py"}},"nbformat_minor":4,"nbformat":4,"cells":[{"cell_type":"code","source":"# This Python 3 environment comes with many helpful analytics libraries installed\n# It is defined by the kaggle/python Docker image: https://github.com/kaggle/docker-python\n# For example, here's several helpful packages to load\n\nimport numpy as np # linear algebra\nimport pandas as pd # data processing, CSV file I/O (e.g. pd.read_csv)\n\n# Input data files are available in the read-only \"../input/\" directory\n# For example, running this (by clicking run or pressing Shift+Enter) will list all files under the input directory\n\nimport os\nfor dirname, _, filenames in os.walk('/kaggle/input'):\n for filename in filenames:\n print(os.path.join(dirname, filename))\n\n# You can write up to 20GB to the current directory (/kaggle/working/) that gets preserved as output when you create a version using \"Save & Run All\" \n# You can also write temporary files to /kaggle/temp/, but they won't be saved outside of the current session","metadata":{"_uuid":"8f2839f25d086af736a60e9eeb907d3b93b6e0e5","_cell_guid":"b1076dfc-b9ad-4769-8c92-a6c4dae69d19","execution":{"iopub.status.busy":"2021-06-21T12:32:55.393741Z","iopub.execute_input":"2021-06-21T12:32:55.394360Z","iopub.status.idle":"2021-06-21T12:32:55.415999Z","shell.execute_reply.started":"2021-06-21T12:32:55.394257Z","shell.execute_reply":"2021-06-21T12:32:55.414664Z"},"trusted":true},"execution_count":1,"outputs":[{"name":"stdout","text":"/kaggle/input/tmdb-movie-metadata/tmdb_5000_movies.csv\n/kaggle/input/tmdb-movie-metadata/tmdb_5000_credits.csv\n","output_type":"stream"}]},{"cell_type":"code","source":"movies = pd.read_csv('/kaggle/input/tmdb-movie-metadata/tmdb_5000_movies.csv')\ncredits = pd.read_csv('/kaggle/input/tmdb-movie-metadata/tmdb_5000_credits.csv') ","metadata":{"execution":{"iopub.status.busy":"2021-06-21T12:36:31.347717Z","iopub.execute_input":"2021-06-21T12:36:31.348105Z","iopub.status.idle":"2021-06-21T12:36:32.003856Z","shell.execute_reply.started":"2021-06-21T12:36:31.348073Z","shell.execute_reply":"2021-06-21T12:36:32.002744Z"},"trusted":true},"execution_count":30,"outputs":[]},{"cell_type":"code","source":"movies.head(2)","metadata":{"execution":{"iopub.status.busy":"2021-06-21T12:36:33.757346Z","iopub.execute_input":"2021-06-21T12:36:33.757778Z","iopub.status.idle":"2021-06-21T12:36:33.781729Z","shell.execute_reply.started":"2021-06-21T12:36:33.757743Z","shell.execute_reply":"2021-06-21T12:36:33.780738Z"},"trusted":true},"execution_count":31,"outputs":[{"execution_count":31,"output_type":"execute_result","data":{"text/plain":" budget genres \\\n0 237000000 [{\"id\": 28, \"name\": \"Action\"}, {\"id\": 12, \"nam... \n1 300000000 [{\"id\": 12, \"name\": \"Adventure\"}, {\"id\": 14, \"... \n\n homepage id \\\n0 http://www.avatarmovie.com/ 19995 \n1 http://disney.go.com/disneypictures/pirates/ 285 \n\n keywords original_language \\\n0 [{\"id\": 1463, \"name\": \"culture clash\"}, {\"id\":... en \n1 [{\"id\": 270, \"name\": \"ocean\"}, {\"id\": 726, \"na... en \n\n original_title \\\n0 Avatar \n1 Pirates of the Caribbean: At World's End \n\n overview popularity \\\n0 In the 22nd century, a paraplegic Marine is di... 150.437577 \n1 Captain Barbossa, long believed to be dead, ha... 139.082615 \n\n production_companies \\\n0 [{\"name\": \"Ingenious Film Partners\", \"id\": 289... \n1 [{\"name\": \"Walt Disney Pictures\", \"id\": 2}, {\"... \n\n production_countries release_date revenue \\\n0 [{\"iso_3166_1\": \"US\", \"name\": \"United States o... 2009-12-10 2787965087 \n1 [{\"iso_3166_1\": \"US\", \"name\": \"United States o... 2007-05-19 961000000 \n\n runtime spoken_languages status \\\n0 162.0 [{\"iso_639_1\": \"en\", \"name\": \"English\"}, {\"iso... Released \n1 169.0 [{\"iso_639_1\": \"en\", \"name\": \"English\"}] Released \n\n tagline \\\n0 Enter the World of Pandora. \n1 At the end of the world, the adventure begins. \n\n title vote_average vote_count \n0 Avatar 7.2 11800 \n1 Pirates of the Caribbean: At World's End 6.9 4500 ","text/html":"<div>\n<style scoped>\n .dataframe tbody tr th:only-of-type {\n vertical-align: middle;\n }\n\n .dataframe tbody tr th {\n vertical-align: top;\n }\n\n .dataframe thead th {\n text-align: right;\n }\n</style>\n<table border=\"1\" class=\"dataframe\">\n <thead>\n <tr style=\"text-align: right;\">\n <th></th>\n <th>budget</th>\n <th>genres</th>\n <th>homepage</th>\n <th>id</th>\n <th>keywords</th>\n <th>original_language</th>\n <th>original_title</th>\n <th>overview</th>\n <th>popularity</th>\n <th>production_companies</th>\n <th>production_countries</th>\n <th>release_date</th>\n <th>revenue</th>\n <th>runtime</th>\n <th>spoken_languages</th>\n <th>status</th>\n <th>tagline</th>\n <th>title</th>\n <th>vote_average</th>\n <th>vote_count</th>\n </tr>\n </thead>\n <tbody>\n <tr>\n <th>0</th>\n <td>237000000</td>\n <td>[{\"id\": 28, \"name\": \"Action\"}, {\"id\": 12, \"nam...</td>\n <td>http://www.avatarmovie.com/</td>\n <td>19995</td>\n <td>[{\"id\": 1463, \"name\": \"culture clash\"}, {\"id\":...</td>\n <td>en</td>\n <td>Avatar</td>\n <td>In the 22nd century, a paraplegic Marine is di...</td>\n <td>150.437577</td>\n <td>[{\"name\": \"Ingenious Film Partners\", \"id\": 289...</td>\n <td>[{\"iso_3166_1\": \"US\", \"name\": \"United States o...</td>\n <td>2009-12-10</td>\n <td>2787965087</td>\n <td>162.0</td>\n <td>[{\"iso_639_1\": \"en\", \"name\": \"English\"}, {\"iso...</td>\n <td>Released</td>\n <td>Enter the World of Pandora.</td>\n <td>Avatar</td>\n <td>7.2</td>\n <td>11800</td>\n </tr>\n <tr>\n <th>1</th>\n <td>300000000</td>\n <td>[{\"id\": 12, \"name\": \"Adventure\"}, {\"id\": 14, \"...</td>\n <td>http://disney.go.com/disneypictures/pirates/</td>\n <td>285</td>\n <td>[{\"id\": 270, \"name\": \"ocean\"}, {\"id\": 726, \"na...</td>\n <td>en</td>\n <td>Pirates of the Caribbean: At World's End</td>\n <td>Captain Barbossa, long believed to be dead, ha...</td>\n <td>139.082615</td>\n <td>[{\"name\": \"Walt Disney Pictures\", \"id\": 2}, {\"...</td>\n <td>[{\"iso_3166_1\": \"US\", \"name\": \"United States o...</td>\n <td>2007-05-19</td>\n <td>961000000</td>\n <td>169.0</td>\n <td>[{\"iso_639_1\": \"en\", \"name\": \"English\"}]</td>\n <td>Released</td>\n <td>At the end of the world, the adventure begins.</td>\n <td>Pirates of the Caribbean: At World's End</td>\n <td>6.9</td>\n <td>4500</td>\n </tr>\n </tbody>\n</table>\n</div>"},"metadata":{}}]},{"cell_type":"code","source":"movies.shape","metadata":{"execution":{"iopub.status.busy":"2021-06-21T12:33:06.307921Z","iopub.execute_input":"2021-06-21T12:33:06.308311Z","iopub.status.idle":"2021-06-21T12:33:06.316052Z","shell.execute_reply.started":"2021-06-21T12:33:06.308279Z","shell.execute_reply":"2021-06-21T12:33:06.314903Z"},"trusted":true},"execution_count":4,"outputs":[{"execution_count":4,"output_type":"execute_result","data":{"text/plain":"(4803, 20)"},"metadata":{}}]},{"cell_type":"code","source":"credits.head()","metadata":{"execution":{"iopub.status.busy":"2021-06-21T12:33:08.787389Z","iopub.execute_input":"2021-06-21T12:33:08.787791Z","iopub.status.idle":"2021-06-21T12:33:08.800864Z","shell.execute_reply.started":"2021-06-21T12:33:08.787758Z","shell.execute_reply":"2021-06-21T12:33:08.799834Z"},"trusted":true},"execution_count":5,"outputs":[{"execution_count":5,"output_type":"execute_result","data":{"text/plain":" movie_id title \\\n0 19995 Avatar \n1 285 Pirates of the Caribbean: At World's End \n2 206647 Spectre \n3 49026 The Dark Knight Rises \n4 49529 John Carter \n\n cast \\\n0 [{\"cast_id\": 242, \"character\": \"Jake Sully\", \"... \n1 [{\"cast_id\": 4, \"character\": \"Captain Jack Spa... \n2 [{\"cast_id\": 1, \"character\": \"James Bond\", \"cr... \n3 [{\"cast_id\": 2, \"character\": \"Bruce Wayne / Ba... \n4 [{\"cast_id\": 5, \"character\": \"John Carter\", \"c... \n\n crew \n0 [{\"credit_id\": \"52fe48009251416c750aca23\", \"de... \n1 [{\"credit_id\": \"52fe4232c3a36847f800b579\", \"de... \n2 [{\"credit_id\": \"54805967c3a36829b5002c41\", \"de... \n3 [{\"credit_id\": \"52fe4781c3a36847f81398c3\", \"de... \n4 [{\"credit_id\": \"52fe479ac3a36847f813eaa3\", \"de... ","text/html":"<div>\n<style scoped>\n .dataframe tbody tr th:only-of-type {\n vertical-align: middle;\n }\n\n .dataframe tbody tr th {\n vertical-align: top;\n }\n\n .dataframe thead th {\n text-align: right;\n }\n</style>\n<table border=\"1\" class=\"dataframe\">\n <thead>\n <tr style=\"text-align: right;\">\n <th></th>\n <th>movie_id</th>\n <th>title</th>\n <th>cast</th>\n <th>crew</th>\n </tr>\n </thead>\n <tbody>\n <tr>\n <th>0</th>\n <td>19995</td>\n <td>Avatar</td>\n <td>[{\"cast_id\": 242, \"character\": \"Jake Sully\", \"...</td>\n <td>[{\"credit_id\": \"52fe48009251416c750aca23\", \"de...</td>\n </tr>\n <tr>\n <th>1</th>\n <td>285</td>\n <td>Pirates of the Caribbean: At World's End</td>\n <td>[{\"cast_id\": 4, \"character\": \"Captain Jack Spa...</td>\n <td>[{\"credit_id\": \"52fe4232c3a36847f800b579\", \"de...</td>\n </tr>\n <tr>\n <th>2</th>\n <td>206647</td>\n <td>Spectre</td>\n <td>[{\"cast_id\": 1, \"character\": \"James Bond\", \"cr...</td>\n <td>[{\"credit_id\": \"54805967c3a36829b5002c41\", \"de...</td>\n </tr>\n <tr>\n <th>3</th>\n <td>49026</td>\n <td>The Dark Knight Rises</td>\n <td>[{\"cast_id\": 2, \"character\": \"Bruce Wayne / Ba...</td>\n <td>[{\"credit_id\": \"52fe4781c3a36847f81398c3\", \"de...</td>\n </tr>\n <tr>\n <th>4</th>\n <td>49529</td>\n <td>John Carter</td>\n <td>[{\"cast_id\": 5, \"character\": \"John Carter\", \"c...</td>\n <td>[{\"credit_id\": \"52fe479ac3a36847f813eaa3\", \"de...</td>\n </tr>\n </tbody>\n</table>\n</div>"},"metadata":{}}]},{"cell_type":"code","source":"movies = movies.merge(credits,on='title')","metadata":{"execution":{"iopub.status.busy":"2021-06-21T12:36:38.686521Z","iopub.execute_input":"2021-06-21T12:36:38.687005Z","iopub.status.idle":"2021-06-21T12:36:38.713343Z","shell.execute_reply.started":"2021-06-21T12:36:38.686963Z","shell.execute_reply":"2021-06-21T12:36:38.711939Z"},"trusted":true},"execution_count":32,"outputs":[]},{"cell_type":"code","source":"movies.head()\n# budget\n# homepage\n# id\n# original_language\n# original_title\n# popularity\n# production_comapny\n# production_countries\n# release-date(not sure)","metadata":{"execution":{"iopub.status.busy":"2021-06-19T12:48:23.470187Z","iopub.execute_input":"2021-06-19T12:48:23.470754Z","iopub.status.idle":"2021-06-19T12:48:23.5041Z","shell.execute_reply.started":"2021-06-19T12:48:23.470724Z","shell.execute_reply":"2021-06-19T12:48:23.503327Z"},"trusted":true},"execution_count":null,"outputs":[]},{"cell_type":"code","source":"movies = movies[['movie_id','title','overview','genres','keywords','cast','crew']]","metadata":{"execution":{"iopub.status.busy":"2021-06-21T12:36:41.393326Z","iopub.execute_input":"2021-06-21T12:36:41.393733Z","iopub.status.idle":"2021-06-21T12:36:41.406883Z","shell.execute_reply.started":"2021-06-21T12:36:41.393699Z","shell.execute_reply":"2021-06-21T12:36:41.405822Z"},"trusted":true},"execution_count":33,"outputs":[]},{"cell_type":"code","source":"movies.head()","metadata":{"execution":{"iopub.status.busy":"2021-06-21T12:33:39.516481Z","iopub.execute_input":"2021-06-21T12:33:39.517061Z","iopub.status.idle":"2021-06-21T12:33:39.532499Z","shell.execute_reply.started":"2021-06-21T12:33:39.517012Z","shell.execute_reply":"2021-06-21T12:33:39.531584Z"},"trusted":true},"execution_count":8,"outputs":[{"execution_count":8,"output_type":"execute_result","data":{"text/plain":" movie_id title \\\n0 19995 Avatar \n1 285 Pirates of the Caribbean: At World's End \n2 206647 Spectre \n3 49026 The Dark Knight Rises \n4 49529 John Carter \n\n overview \\\n0 In the 22nd century, a paraplegic Marine is di... \n1 Captain Barbossa, long believed to be dead, ha... \n2 A cryptic message from Bond’s past sends him o... \n3 Following the death of District Attorney Harve... \n4 John Carter is a war-weary, former military ca... \n\n genres \\\n0 [{\"id\": 28, \"name\": \"Action\"}, {\"id\": 12, \"nam... \n1 [{\"id\": 12, \"name\": \"Adventure\"}, {\"id\": 14, \"... \n2 [{\"id\": 28, \"name\": \"Action\"}, {\"id\": 12, \"nam... \n3 [{\"id\": 28, \"name\": \"Action\"}, {\"id\": 80, \"nam... \n4 [{\"id\": 28, \"name\": \"Action\"}, {\"id\": 12, \"nam... \n\n keywords \\\n0 [{\"id\": 1463, \"name\": \"culture clash\"}, {\"id\":... \n1 [{\"id\": 270, \"name\": \"ocean\"}, {\"id\": 726, \"na... \n2 [{\"id\": 470, \"name\": \"spy\"}, {\"id\": 818, \"name... \n3 [{\"id\": 849, \"name\": \"dc comics\"}, {\"id\": 853,... \n4 [{\"id\": 818, \"name\": \"based on novel\"}, {\"id\":... \n\n cast \\\n0 [{\"cast_id\": 242, \"character\": \"Jake Sully\", \"... \n1 [{\"cast_id\": 4, \"character\": \"Captain Jack Spa... \n2 [{\"cast_id\": 1, \"character\": \"James Bond\", \"cr... \n3 [{\"cast_id\": 2, \"character\": \"Bruce Wayne / Ba... \n4 [{\"cast_id\": 5, \"character\": \"John Carter\", \"c... \n\n crew \n0 [{\"credit_id\": \"52fe48009251416c750aca23\", \"de... \n1 [{\"credit_id\": \"52fe4232c3a36847f800b579\", \"de... \n2 [{\"credit_id\": \"54805967c3a36829b5002c41\", \"de... \n3 [{\"credit_id\": \"52fe4781c3a36847f81398c3\", \"de... \n4 [{\"credit_id\": \"52fe479ac3a36847f813eaa3\", \"de... ","text/html":"<div>\n<style scoped>\n .dataframe tbody tr th:only-of-type {\n vertical-align: middle;\n }\n\n .dataframe tbody tr th {\n vertical-align: top;\n }\n\n .dataframe thead th {\n text-align: right;\n }\n</style>\n<table border=\"1\" class=\"dataframe\">\n <thead>\n <tr style=\"text-align: right;\">\n <th></th>\n <th>movie_id</th>\n <th>title</th>\n <th>overview</th>\n <th>genres</th>\n <th>keywords</th>\n <th>cast</th>\n <th>crew</th>\n </tr>\n </thead>\n <tbody>\n <tr>\n <th>0</th>\n <td>19995</td>\n <td>Avatar</td>\n <td>In the 22nd century, a paraplegic Marine is di...</td>\n <td>[{\"id\": 28, \"name\": \"Action\"}, {\"id\": 12, \"nam...</td>\n <td>[{\"id\": 1463, \"name\": \"culture clash\"}, {\"id\":...</td>\n <td>[{\"cast_id\": 242, \"character\": \"Jake Sully\", \"...</td>\n <td>[{\"credit_id\": \"52fe48009251416c750aca23\", \"de...</td>\n </tr>\n <tr>\n <th>1</th>\n <td>285</td>\n <td>Pirates of the Caribbean: At World's End</td>\n <td>Captain Barbossa, long believed to be dead, ha...</td>\n <td>[{\"id\": 12, \"name\": \"Adventure\"}, {\"id\": 14, \"...</td>\n <td>[{\"id\": 270, \"name\": \"ocean\"}, {\"id\": 726, \"na...</td>\n <td>[{\"cast_id\": 4, \"character\": \"Captain Jack Spa...</td>\n <td>[{\"credit_id\": \"52fe4232c3a36847f800b579\", \"de...</td>\n </tr>\n <tr>\n <th>2</th>\n <td>206647</td>\n <td>Spectre</td>\n <td>A cryptic message from Bond’s past sends him o...</td>\n <td>[{\"id\": 28, \"name\": \"Action\"}, {\"id\": 12, \"nam...</td>\n <td>[{\"id\": 470, \"name\": \"spy\"}, {\"id\": 818, \"name...</td>\n <td>[{\"cast_id\": 1, \"character\": \"James Bond\", \"cr...</td>\n <td>[{\"credit_id\": \"54805967c3a36829b5002c41\", \"de...</td>\n </tr>\n <tr>\n <th>3</th>\n <td>49026</td>\n <td>The Dark Knight Rises</td>\n <td>Following the death of District Attorney Harve...</td>\n <td>[{\"id\": 28, \"name\": \"Action\"}, {\"id\": 80, \"nam...</td>\n <td>[{\"id\": 849, \"name\": \"dc comics\"}, {\"id\": 853,...</td>\n <td>[{\"cast_id\": 2, \"character\": \"Bruce Wayne / Ba...</td>\n <td>[{\"credit_id\": \"52fe4781c3a36847f81398c3\", \"de...</td>\n </tr>\n <tr>\n <th>4</th>\n <td>49529</td>\n <td>John Carter</td>\n <td>John Carter is a war-weary, former military ca...</td>\n <td>[{\"id\": 28, \"name\": \"Action\"}, {\"id\": 12, \"nam...</td>\n <td>[{\"id\": 818, \"name\": \"based on novel\"}, {\"id\":...</td>\n <td>[{\"cast_id\": 5, \"character\": \"John Carter\", \"c...</td>\n <td>[{\"credit_id\": \"52fe479ac3a36847f813eaa3\", \"de...</td>\n </tr>\n </tbody>\n</table>\n</div>"},"metadata":{}}]},{"cell_type":"code","source":"import ast","metadata":{"execution":{"iopub.status.busy":"2021-06-21T12:34:19.382331Z","iopub.execute_input":"2021-06-21T12:34:19.382856Z","iopub.status.idle":"2021-06-21T12:34:19.387416Z","shell.execute_reply.started":"2021-06-21T12:34:19.382822Z","shell.execute_reply":"2021-06-21T12:34:19.386451Z"},"trusted":true},"execution_count":12,"outputs":[]},{"cell_type":"code","source":"def convert(text):\n L = []\n for i in ast.literal_eval(text):\n L.append(i['name']) \n return L ","metadata":{"execution":{"iopub.status.busy":"2021-06-21T12:36:47.539904Z","iopub.execute_input":"2021-06-21T12:36:47.540453Z","iopub.status.idle":"2021-06-21T12:36:47.545014Z","shell.execute_reply.started":"2021-06-21T12:36:47.540418Z","shell.execute_reply":"2021-06-21T12:36:47.544243Z"},"trusted":true},"execution_count":34,"outputs":[]},{"cell_type":"code","source":"movies.dropna(inplace=True)","metadata":{"execution":{"iopub.status.busy":"2021-06-21T12:36:50.059006Z","iopub.execute_input":"2021-06-21T12:36:50.059538Z","iopub.status.idle":"2021-06-21T12:36:50.073095Z","shell.execute_reply.started":"2021-06-21T12:36:50.059504Z","shell.execute_reply":"2021-06-21T12:36:50.071577Z"},"trusted":true},"execution_count":35,"outputs":[]},{"cell_type":"code","source":"movies['genres'] = movies['genres'].apply(convert)\nmovies.head()","metadata":{"execution":{"iopub.status.busy":"2021-06-21T12:36:52.343645Z","iopub.execute_input":"2021-06-21T12:36:52.344121Z","iopub.status.idle":"2021-06-21T12:36:52.523910Z","shell.execute_reply.started":"2021-06-21T12:36:52.344082Z","shell.execute_reply":"2021-06-21T12:36:52.522805Z"},"trusted":true},"execution_count":36,"outputs":[{"execution_count":36,"output_type":"execute_result","data":{"text/plain":" movie_id title \\\n0 19995 Avatar \n1 285 Pirates of the Caribbean: At World's End \n2 206647 Spectre \n3 49026 The Dark Knight Rises \n4 49529 John Carter \n\n overview \\\n0 In the 22nd century, a paraplegic Marine is di... \n1 Captain Barbossa, long believed to be dead, ha... \n2 A cryptic message from Bond’s past sends him o... \n3 Following the death of District Attorney Harve... \n4 John Carter is a war-weary, former military ca... \n\n genres \\\n0 [Action, Adventure, Fantasy, Science Fiction] \n1 [Adventure, Fantasy, Action] \n2 [Action, Adventure, Crime] \n3 [Action, Crime, Drama, Thriller] \n4 [Action, Adventure, Science Fiction] \n\n keywords \\\n0 [{\"id\": 1463, \"name\": \"culture clash\"}, {\"id\":... \n1 [{\"id\": 270, \"name\": \"ocean\"}, {\"id\": 726, \"na... \n2 [{\"id\": 470, \"name\": \"spy\"}, {\"id\": 818, \"name... \n3 [{\"id\": 849, \"name\": \"dc comics\"}, {\"id\": 853,... \n4 [{\"id\": 818, \"name\": \"based on novel\"}, {\"id\":... \n\n cast \\\n0 [{\"cast_id\": 242, \"character\": \"Jake Sully\", \"... \n1 [{\"cast_id\": 4, \"character\": \"Captain Jack Spa... \n2 [{\"cast_id\": 1, \"character\": \"James Bond\", \"cr... \n3 [{\"cast_id\": 2, \"character\": \"Bruce Wayne / Ba... \n4 [{\"cast_id\": 5, \"character\": \"John Carter\", \"c... \n\n crew \n0 [{\"credit_id\": \"52fe48009251416c750aca23\", \"de... \n1 [{\"credit_id\": \"52fe4232c3a36847f800b579\", \"de... \n2 [{\"credit_id\": \"54805967c3a36829b5002c41\", \"de... \n3 [{\"credit_id\": \"52fe4781c3a36847f81398c3\", \"de... \n4 [{\"credit_id\": \"52fe479ac3a36847f813eaa3\", \"de... ","text/html":"<div>\n<style scoped>\n .dataframe tbody tr th:only-of-type {\n vertical-align: middle;\n }\n\n .dataframe tbody tr th {\n vertical-align: top;\n }\n\n .dataframe thead th {\n text-align: right;\n }\n</style>\n<table border=\"1\" class=\"dataframe\">\n <thead>\n <tr style=\"text-align: right;\">\n <th></th>\n <th>movie_id</th>\n <th>title</th>\n <th>overview</th>\n <th>genres</th>\n <th>keywords</th>\n <th>cast</th>\n <th>crew</th>\n </tr>\n </thead>\n <tbody>\n <tr>\n <th>0</th>\n <td>19995</td>\n <td>Avatar</td>\n <td>In the 22nd century, a paraplegic Marine is di...</td>\n <td>[Action, Adventure, Fantasy, Science Fiction]</td>\n <td>[{\"id\": 1463, \"name\": \"culture clash\"}, {\"id\":...</td>\n <td>[{\"cast_id\": 242, \"character\": \"Jake Sully\", \"...</td>\n <td>[{\"credit_id\": \"52fe48009251416c750aca23\", \"de...</td>\n </tr>\n <tr>\n <th>1</th>\n <td>285</td>\n <td>Pirates of the Caribbean: At World's End</td>\n <td>Captain Barbossa, long believed to be dead, ha...</td>\n <td>[Adventure, Fantasy, Action]</td>\n <td>[{\"id\": 270, \"name\": \"ocean\"}, {\"id\": 726, \"na...</td>\n <td>[{\"cast_id\": 4, \"character\": \"Captain Jack Spa...</td>\n <td>[{\"credit_id\": \"52fe4232c3a36847f800b579\", \"de...</td>\n </tr>\n <tr>\n <th>2</th>\n <td>206647</td>\n <td>Spectre</td>\n <td>A cryptic message from Bond’s past sends him o...</td>\n <td>[Action, Adventure, Crime]</td>\n <td>[{\"id\": 470, \"name\": \"spy\"}, {\"id\": 818, \"name...</td>\n <td>[{\"cast_id\": 1, \"character\": \"James Bond\", \"cr...</td>\n <td>[{\"credit_id\": \"54805967c3a36829b5002c41\", \"de...</td>\n </tr>\n <tr>\n <th>3</th>\n <td>49026</td>\n <td>The Dark Knight Rises</td>\n <td>Following the death of District Attorney Harve...</td>\n <td>[Action, Crime, Drama, Thriller]</td>\n <td>[{\"id\": 849, \"name\": \"dc comics\"}, {\"id\": 853,...</td>\n <td>[{\"cast_id\": 2, \"character\": \"Bruce Wayne / Ba...</td>\n <td>[{\"credit_id\": \"52fe4781c3a36847f81398c3\", \"de...</td>\n </tr>\n <tr>\n <th>4</th>\n <td>49529</td>\n <td>John Carter</td>\n <td>John Carter is a war-weary, former military ca...</td>\n <td>[Action, Adventure, Science Fiction]</td>\n <td>[{\"id\": 818, \"name\": \"based on novel\"}, {\"id\":...</td>\n <td>[{\"cast_id\": 5, \"character\": \"John Carter\", \"c...</td>\n <td>[{\"credit_id\": \"52fe479ac3a36847f813eaa3\", \"de...</td>\n </tr>\n </tbody>\n</table>\n</div>"},"metadata":{}}]},{"cell_type":"code","source":"movies['keywords'] = movies['keywords'].apply(convert)\nmovies.head()","metadata":{"execution":{"iopub.status.busy":"2021-06-21T12:36:55.304070Z","iopub.execute_input":"2021-06-21T12:36:55.304439Z","iopub.status.idle":"2021-06-21T12:36:55.738525Z","shell.execute_reply.started":"2021-06-21T12:36:55.304408Z","shell.execute_reply":"2021-06-21T12:36:55.737123Z"},"trusted":true},"execution_count":37,"outputs":[{"execution_count":37,"output_type":"execute_result","data":{"text/plain":" movie_id title \\\n0 19995 Avatar \n1 285 Pirates of the Caribbean: At World's End \n2 206647 Spectre \n3 49026 The Dark Knight Rises \n4 49529 John Carter \n\n overview \\\n0 In the 22nd century, a paraplegic Marine is di... \n1 Captain Barbossa, long believed to be dead, ha... \n2 A cryptic message from Bond’s past sends him o... \n3 Following the death of District Attorney Harve... \n4 John Carter is a war-weary, former military ca... \n\n genres \\\n0 [Action, Adventure, Fantasy, Science Fiction] \n1 [Adventure, Fantasy, Action] \n2 [Action, Adventure, Crime] \n3 [Action, Crime, Drama, Thriller] \n4 [Action, Adventure, Science Fiction] \n\n keywords \\\n0 [culture clash, future, space war, space colon... \n1 [ocean, drug abuse, exotic island, east india ... \n2 [spy, based on novel, secret agent, sequel, mi... \n3 [dc comics, crime fighter, terrorist, secret i... \n4 [based on novel, mars, medallion, space travel... \n\n cast \\\n0 [{\"cast_id\": 242, \"character\": \"Jake Sully\", \"... \n1 [{\"cast_id\": 4, \"character\": \"Captain Jack Spa... \n2 [{\"cast_id\": 1, \"character\": \"James Bond\", \"cr... \n3 [{\"cast_id\": 2, \"character\": \"Bruce Wayne / Ba... \n4 [{\"cast_id\": 5, \"character\": \"John Carter\", \"c... \n\n crew \n0 [{\"credit_id\": \"52fe48009251416c750aca23\", \"de... \n1 [{\"credit_id\": \"52fe4232c3a36847f800b579\", \"de... \n2 [{\"credit_id\": \"54805967c3a36829b5002c41\", \"de... \n3 [{\"credit_id\": \"52fe4781c3a36847f81398c3\", \"de... \n4 [{\"credit_id\": \"52fe479ac3a36847f813eaa3\", \"de... ","text/html":"<div>\n<style scoped>\n .dataframe tbody tr th:only-of-type {\n vertical-align: middle;\n }\n\n .dataframe tbody tr th {\n vertical-align: top;\n }\n\n .dataframe thead th {\n text-align: right;\n }\n</style>\n<table border=\"1\" class=\"dataframe\">\n <thead>\n <tr style=\"text-align: right;\">\n <th></th>\n <th>movie_id</th>\n <th>title</th>\n <th>overview</th>\n <th>genres</th>\n <th>keywords</th>\n <th>cast</th>\n <th>crew</th>\n </tr>\n </thead>\n <tbody>\n <tr>\n <th>0</th>\n <td>19995</td>\n <td>Avatar</td>\n <td>In the 22nd century, a paraplegic Marine is di...</td>\n <td>[Action, Adventure, Fantasy, Science Fiction]</td>\n <td>[culture clash, future, space war, space colon...</td>\n <td>[{\"cast_id\": 242, \"character\": \"Jake Sully\", \"...</td>\n <td>[{\"credit_id\": \"52fe48009251416c750aca23\", \"de...</td>\n </tr>\n <tr>\n <th>1</th>\n <td>285</td>\n <td>Pirates of the Caribbean: At World's End</td>\n <td>Captain Barbossa, long believed to be dead, ha...</td>\n <td>[Adventure, Fantasy, Action]</td>\n <td>[ocean, drug abuse, exotic island, east india ...</td>\n <td>[{\"cast_id\": 4, \"character\": \"Captain Jack Spa...</td>\n <td>[{\"credit_id\": \"52fe4232c3a36847f800b579\", \"de...</td>\n </tr>\n <tr>\n <th>2</th>\n <td>206647</td>\n <td>Spectre</td>\n <td>A cryptic message from Bond’s past sends him o...</td>\n <td>[Action, Adventure, Crime]</td>\n <td>[spy, based on novel, secret agent, sequel, mi...</td>\n <td>[{\"cast_id\": 1, \"character\": \"James Bond\", \"cr...</td>\n <td>[{\"credit_id\": \"54805967c3a36829b5002c41\", \"de...</td>\n </tr>\n <tr>\n <th>3</th>\n <td>49026</td>\n <td>The Dark Knight Rises</td>\n <td>Following the death of District Attorney Harve...</td>\n <td>[Action, Crime, Drama, Thriller]</td>\n <td>[dc comics, crime fighter, terrorist, secret i...</td>\n <td>[{\"cast_id\": 2, \"character\": \"Bruce Wayne / Ba...</td>\n <td>[{\"credit_id\": \"52fe4781c3a36847f81398c3\", \"de...</td>\n </tr>\n <tr>\n <th>4</th>\n <td>49529</td>\n <td>John Carter</td>\n <td>John Carter is a war-weary, former military ca...</td>\n <td>[Action, Adventure, Science Fiction]</td>\n <td>[based on novel, mars, medallion, space travel...</td>\n <td>[{\"cast_id\": 5, \"character\": \"John Carter\", \"c...</td>\n <td>[{\"credit_id\": \"52fe479ac3a36847f813eaa3\", \"de...</td>\n </tr>\n </tbody>\n</table>\n</div>"},"metadata":{}}]},{"cell_type":"code","source":"import ast\nast.literal_eval('[{\"id\": 28, \"name\": \"Action\"}, {\"id\": 12, \"name\": \"Adventure\"}, {\"id\": 14, \"name\": \"Fantasy\"}, {\"id\": 878, \"name\": \"Science Fiction\"}]')","metadata":{"execution":{"iopub.status.busy":"2021-06-19T13:07:21.572154Z","iopub.execute_input":"2021-06-19T13:07:21.572473Z","iopub.status.idle":"2021-06-19T13:07:21.578686Z","shell.execute_reply.started":"2021-06-19T13:07:21.572446Z","shell.execute_reply":"2021-06-19T13:07:21.577661Z"},"trusted":true},"execution_count":null,"outputs":[]},{"cell_type":"code","source":"def convert3(text):\n L = []\n counter = 0\n for i in ast.literal_eval(text):\n if counter < 3:\n L.append(i['name'])\n counter+=1\n return L ","metadata":{"execution":{"iopub.status.busy":"2021-06-21T12:36:59.510859Z","iopub.execute_input":"2021-06-21T12:36:59.511226Z","iopub.status.idle":"2021-06-21T12:36:59.517043Z","shell.execute_reply.started":"2021-06-21T12:36:59.511192Z","shell.execute_reply":"2021-06-21T12:36:59.515878Z"},"trusted":true},"execution_count":38,"outputs":[]},{"cell_type":"code","source":"movies['cast'] = movies['cast'].apply(convert)\nmovies.head()","metadata":{"execution":{"iopub.status.busy":"2021-06-21T12:37:00.828661Z","iopub.execute_input":"2021-06-21T12:37:00.829409Z","iopub.status.idle":"2021-06-21T12:37:04.117090Z","shell.execute_reply.started":"2021-06-21T12:37:00.829355Z","shell.execute_reply":"2021-06-21T12:37:04.115822Z"},"trusted":true},"execution_count":39,"outputs":[{"execution_count":39,"output_type":"execute_result","data":{"text/plain":" movie_id title \\\n0 19995 Avatar \n1 285 Pirates of the Caribbean: At World's End \n2 206647 Spectre \n3 49026 The Dark Knight Rises \n4 49529 John Carter \n\n overview \\\n0 In the 22nd century, a paraplegic Marine is di... \n1 Captain Barbossa, long believed to be dead, ha... \n2 A cryptic message from Bond’s past sends him o... \n3 Following the death of District Attorney Harve... \n4 John Carter is a war-weary, former military ca... \n\n genres \\\n0 [Action, Adventure, Fantasy, Science Fiction] \n1 [Adventure, Fantasy, Action] \n2 [Action, Adventure, Crime] \n3 [Action, Crime, Drama, Thriller] \n4 [Action, Adventure, Science Fiction] \n\n keywords \\\n0 [culture clash, future, space war, space colon... \n1 [ocean, drug abuse, exotic island, east india ... \n2 [spy, based on novel, secret agent, sequel, mi... \n3 [dc comics, crime fighter, terrorist, secret i... \n4 [based on novel, mars, medallion, space travel... \n\n cast \\\n0 [Sam Worthington, Zoe Saldana, Sigourney Weave... \n1 [Johnny Depp, Orlando Bloom, Keira Knightley, ... \n2 [Daniel Craig, Christoph Waltz, Léa Seydoux, R... \n3 [Christian Bale, Michael Caine, Gary Oldman, A... \n4 [Taylor Kitsch, Lynn Collins, Samantha Morton,... \n\n crew \n0 [{\"credit_id\": \"52fe48009251416c750aca23\", \"de... \n1 [{\"credit_id\": \"52fe4232c3a36847f800b579\", \"de... \n2 [{\"credit_id\": \"54805967c3a36829b5002c41\", \"de... \n3 [{\"credit_id\": \"52fe4781c3a36847f81398c3\", \"de... \n4 [{\"credit_id\": \"52fe479ac3a36847f813eaa3\", \"de... ","text/html":"<div>\n<style scoped>\n .dataframe tbody tr th:only-of-type {\n vertical-align: middle;\n }\n\n .dataframe tbody tr th {\n vertical-align: top;\n }\n\n .dataframe thead th {\n text-align: right;\n }\n</style>\n<table border=\"1\" class=\"dataframe\">\n <thead>\n <tr style=\"text-align: right;\">\n <th></th>\n <th>movie_id</th>\n <th>title</th>\n <th>overview</th>\n <th>genres</th>\n <th>keywords</th>\n <th>cast</th>\n <th>crew</th>\n </tr>\n </thead>\n <tbody>\n <tr>\n <th>0</th>\n <td>19995</td>\n <td>Avatar</td>\n <td>In the 22nd century, a paraplegic Marine is di...</td>\n <td>[Action, Adventure, Fantasy, Science Fiction]</td>\n <td>[culture clash, future, space war, space colon...</td>\n <td>[Sam Worthington, Zoe Saldana, Sigourney Weave...</td>\n <td>[{\"credit_id\": \"52fe48009251416c750aca23\", \"de...</td>\n </tr>\n <tr>\n <th>1</th>\n <td>285</td>\n <td>Pirates of the Caribbean: At World's End</td>\n <td>Captain Barbossa, long believed to be dead, ha...</td>\n <td>[Adventure, Fantasy, Action]</td>\n <td>[ocean, drug abuse, exotic island, east india ...</td>\n <td>[Johnny Depp, Orlando Bloom, Keira Knightley, ...</td>\n <td>[{\"credit_id\": \"52fe4232c3a36847f800b579\", \"de...</td>\n </tr>\n <tr>\n <th>2</th>\n <td>206647</td>\n <td>Spectre</td>\n <td>A cryptic message from Bond’s past sends him o...</td>\n <td>[Action, Adventure, Crime]</td>\n <td>[spy, based on novel, secret agent, sequel, mi...</td>\n <td>[Daniel Craig, Christoph Waltz, Léa Seydoux, R...</td>\n <td>[{\"credit_id\": \"54805967c3a36829b5002c41\", \"de...</td>\n </tr>\n <tr>\n <th>3</th>\n <td>49026</td>\n <td>The Dark Knight Rises</td>\n <td>Following the death of District Attorney Harve...</td>\n <td>[Action, Crime, Drama, Thriller]</td>\n <td>[dc comics, crime fighter, terrorist, secret i...</td>\n <td>[Christian Bale, Michael Caine, Gary Oldman, A...</td>\n <td>[{\"credit_id\": \"52fe4781c3a36847f81398c3\", \"de...</td>\n </tr>\n <tr>\n <th>4</th>\n <td>49529</td>\n <td>John Carter</td>\n <td>John Carter is a war-weary, former military ca...</td>\n <td>[Action, Adventure, Science Fiction]</td>\n <td>[based on novel, mars, medallion, space travel...</td>\n <td>[Taylor Kitsch, Lynn Collins, Samantha Morton,...</td>\n <td>[{\"credit_id\": \"52fe479ac3a36847f813eaa3\", \"de...</td>\n </tr>\n </tbody>\n</table>\n</div>"},"metadata":{}}]},{"cell_type":"code","source":"movies['cast'] = movies['cast'].apply(lambda x:x[0:3])","metadata":{"execution":{"iopub.status.busy":"2021-06-21T12:37:07.352464Z","iopub.execute_input":"2021-06-21T12:37:07.352886Z","iopub.status.idle":"2021-06-21T12:37:07.367808Z","shell.execute_reply.started":"2021-06-21T12:37:07.352854Z","shell.execute_reply":"2021-06-21T12:37:07.366250Z"},"trusted":true},"execution_count":40,"outputs":[]},{"cell_type":"code","source":"def fetch_director(text):\n L = []\n for i in ast.literal_eval(text):\n if i['job'] == 'Director':\n L.append(i['name'])\n return L ","metadata":{"execution":{"iopub.status.busy":"2021-06-21T12:37:09.762909Z","iopub.execute_input":"2021-06-21T12:37:09.763317Z","iopub.status.idle":"2021-06-21T12:37:09.770917Z","shell.execute_reply.started":"2021-06-21T12:37:09.763278Z","shell.execute_reply":"2021-06-21T12:37:09.770002Z"},"trusted":true},"execution_count":41,"outputs":[]},{"cell_type":"code","source":"movies['crew'] = movies['crew'].apply(fetch_director)","metadata":{"execution":{"iopub.status.busy":"2021-06-21T12:37:12.189921Z","iopub.execute_input":"2021-06-21T12:37:12.190468Z","iopub.status.idle":"2021-06-21T12:37:15.825662Z","shell.execute_reply.started":"2021-06-21T12:37:12.190407Z","shell.execute_reply":"2021-06-21T12:37:15.824562Z"},"trusted":true},"execution_count":42,"outputs":[]},{"cell_type":"code","source":"#movies['overview'] = movies['overview'].apply(lambda x:x.split())\nmovies.sample(5)","metadata":{"execution":{"iopub.status.busy":"2021-06-21T12:34:54.994397Z","iopub.execute_input":"2021-06-21T12:34:54.994779Z","iopub.status.idle":"2021-06-21T12:34:55.019276Z","shell.execute_reply.started":"2021-06-21T12:34:54.994738Z","shell.execute_reply":"2021-06-21T12:34:55.017718Z"},"trusted":true},"execution_count":22,"outputs":[{"execution_count":22,"output_type":"execute_result","data":{"text/plain":" movie_id title \\\n1183 9889 Shallow Hal \n4215 15976 The Bubble \n1571 22947 Up in the Air \n205 58574 Sherlock Holmes: A Game of Shadows \n352 10674 Mulan \n\n overview \\\n1183 A shallow man falls in love with a 300 pound w... \n4215 The movie follows a group of young friends in ... \n1571 George Clooney plays the dry cynical character... \n205 There is a new criminal mastermind at large (P... \n352 A tomboyish girl disguises herself as a young ... \n\n genres \\\n1183 [Comedy, Romance] \n4215 [Drama, Romance] \n1571 [Drama, Romance] \n205 [Adventure, Action, Crime, Mystery] \n352 [Animation, Family, Adventure] \n\n keywords \\\n1183 [overweight, beauty, hypnosis, overweight man,... \n4215 [gay] \n1571 [suitcase, business, omaha, on the road, downs... \n205 [detective inspector, steampunk, criminal mast... \n352 [homeland, musical, training, daughter, cricke... \n\n cast \\\n1183 [Gwyneth Paltrow, Jack Black, Jason Alexander] \n4215 [Ohad Knoller, Yousef Sweid, Daniella Wircer] \n1571 [George Clooney, Vera Farmiga, Anna Kendrick] \n205 [Robert Downey Jr., Jude Law, Jared Harris] \n352 [Eddie Murphy, Jackie Chan, Ming-Na Wen] \n\n crew \n1183 [Bobby Farrelly, Peter Farrelly] \n4215 [Eytan Fox] \n1571 [Jason Reitman] \n205 [Guy Ritchie] \n352 [Tony Bancroft, Barry Cook] ","text/html":"<div>\n<style scoped>\n .dataframe tbody tr th:only-of-type {\n vertical-align: middle;\n }\n\n .dataframe tbody tr th {\n vertical-align: top;\n }\n\n .dataframe thead th {\n text-align: right;\n }\n</style>\n<table border=\"1\" class=\"dataframe\">\n <thead>\n <tr style=\"text-align: right;\">\n <th></th>\n <th>movie_id</th>\n <th>title</th>\n <th>overview</th>\n <th>genres</th>\n <th>keywords</th>\n <th>cast</th>\n <th>crew</th>\n </tr>\n </thead>\n <tbody>\n <tr>\n <th>1183</th>\n <td>9889</td>\n <td>Shallow Hal</td>\n <td>A shallow man falls in love with a 300 pound w...</td>\n <td>[Comedy, Romance]</td>\n <td>[overweight, beauty, hypnosis, overweight man,...</td>\n <td>[Gwyneth Paltrow, Jack Black, Jason Alexander]</td>\n <td>[Bobby Farrelly, Peter Farrelly]</td>\n </tr>\n <tr>\n <th>4215</th>\n <td>15976</td>\n <td>The Bubble</td>\n <td>The movie follows a group of young friends in ...</td>\n <td>[Drama, Romance]</td>\n <td>[gay]</td>\n <td>[Ohad Knoller, Yousef Sweid, Daniella Wircer]</td>\n <td>[Eytan Fox]</td>\n </tr>\n <tr>\n <th>1571</th>\n <td>22947</td>\n <td>Up in the Air</td>\n <td>George Clooney plays the dry cynical character...</td>\n <td>[Drama, Romance]</td>\n <td>[suitcase, business, omaha, on the road, downs...</td>\n <td>[George Clooney, Vera Farmiga, Anna Kendrick]</td>\n <td>[Jason Reitman]</td>\n </tr>\n <tr>\n <th>205</th>\n <td>58574</td>\n <td>Sherlock Holmes: A Game of Shadows</td>\n <td>There is a new criminal mastermind at large (P...</td>\n <td>[Adventure, Action, Crime, Mystery]</td>\n <td>[detective inspector, steampunk, criminal mast...</td>\n <td>[Robert Downey Jr., Jude Law, Jared Harris]</td>\n <td>[Guy Ritchie]</td>\n </tr>\n <tr>\n <th>352</th>\n <td>10674</td>\n <td>Mulan</td>\n <td>A tomboyish girl disguises herself as a young ...</td>\n <td>[Animation, Family, Adventure]</td>\n <td>[homeland, musical, training, daughter, cricke...</td>\n <td>[Eddie Murphy, Jackie Chan, Ming-Na Wen]</td>\n <td>[Tony Bancroft, Barry Cook]</td>\n </tr>\n </tbody>\n</table>\n</div>"},"metadata":{}}]},{"cell_type":"code","source":"def collapse(L):\n L1 = []\n for i in L:\n L1.append(i.replace(\" \",\"\"))\n return L1","metadata":{"execution":{"iopub.status.busy":"2021-06-21T12:37:20.072749Z","iopub.execute_input":"2021-06-21T12:37:20.073320Z","iopub.status.idle":"2021-06-21T12:37:20.079118Z","shell.execute_reply.started":"2021-06-21T12:37:20.073270Z","shell.execute_reply":"2021-06-21T12:37:20.077997Z"},"trusted":true},"execution_count":43,"outputs":[]},{"cell_type":"code","source":"movies['cast'] = movies['cast'].apply(collapse)\nmovies['crew'] = movies['crew'].apply(collapse)\nmovies['genres'] = movies['genres'].apply(collapse)\nmovies['keywords'] = movies['keywords'].apply(collapse)","metadata":{"execution":{"iopub.status.busy":"2021-06-21T12:37:21.282768Z","iopub.execute_input":"2021-06-21T12:37:21.283329Z","iopub.status.idle":"2021-06-21T12:37:21.486755Z","shell.execute_reply.started":"2021-06-21T12:37:21.283292Z","shell.execute_reply":"2021-06-21T12:37:21.485878Z"},"trusted":true},"execution_count":44,"outputs":[]},{"cell_type":"code","source":"movies.head()","metadata":{"execution":{"iopub.status.busy":"2021-06-21T12:35:23.278025Z","iopub.execute_input":"2021-06-21T12:35:23.278589Z","iopub.status.idle":"2021-06-21T12:35:23.311346Z","shell.execute_reply.started":"2021-06-21T12:35:23.278539Z","shell.execute_reply":"2021-06-21T12:35:23.309971Z"},"trusted":true},"execution_count":26,"outputs":[{"execution_count":26,"output_type":"execute_result","data":{"text/plain":" movie_id title \\\n0 19995 Avatar \n1 285 Pirates of the Caribbean: At World's End \n2 206647 Spectre \n3 49026 The Dark Knight Rises \n4 49529 John Carter \n\n overview \\\n0 In the 22nd century, a paraplegic Marine is di... \n1 Captain Barbossa, long believed to be dead, ha... \n2 A cryptic message from Bond’s past sends him o... \n3 Following the death of District Attorney Harve... \n4 John Carter is a war-weary, former military ca... \n\n genres \\\n0 [Action, Adventure, Fantasy, ScienceFiction] \n1 [Adventure, Fantasy, Action] \n2 [Action, Adventure, Crime] \n3 [Action, Crime, Drama, Thriller] \n4 [Action, Adventure, ScienceFiction] \n\n keywords \\\n0 [cultureclash, future, spacewar, spacecolony, ... \n1 [ocean, drugabuse, exoticisland, eastindiatrad... \n2 [spy, basedonnovel, secretagent, sequel, mi6, ... \n3 [dccomics, crimefighter, terrorist, secretiden... \n4 [basedonnovel, mars, medallion, spacetravel, p... \n\n cast crew \n0 [SamWorthington, ZoeSaldana, SigourneyWeaver] [JamesCameron] \n1 [JohnnyDepp, OrlandoBloom, KeiraKnightley] [GoreVerbinski] \n2 [DanielCraig, ChristophWaltz, LéaSeydoux] [SamMendes] \n3 [ChristianBale, MichaelCaine, GaryOldman] [ChristopherNolan] \n4 [TaylorKitsch, LynnCollins, SamanthaMorton] [AndrewStanton] ","text/html":"<div>\n<style scoped>\n .dataframe tbody tr th:only-of-type {\n vertical-align: middle;\n }\n\n .dataframe tbody tr th {\n vertical-align: top;\n }\n\n .dataframe thead th {\n text-align: right;\n }\n</style>\n<table border=\"1\" class=\"dataframe\">\n <thead>\n <tr style=\"text-align: right;\">\n <th></th>\n <th>movie_id</th>\n <th>title</th>\n <th>overview</th>\n <th>genres</th>\n <th>keywords</th>\n <th>cast</th>\n <th>crew</th>\n </tr>\n </thead>\n <tbody>\n <tr>\n <th>0</th>\n <td>19995</td>\n <td>Avatar</td>\n <td>In the 22nd century, a paraplegic Marine is di...</td>\n <td>[Action, Adventure, Fantasy, ScienceFiction]</td>\n <td>[cultureclash, future, spacewar, spacecolony, ...</td>\n <td>[SamWorthington, ZoeSaldana, SigourneyWeaver]</td>\n <td>[JamesCameron]</td>\n </tr>\n <tr>\n <th>1</th>\n <td>285</td>\n <td>Pirates of the Caribbean: At World's End</td>\n <td>Captain Barbossa, long believed to be dead, ha...</td>\n <td>[Adventure, Fantasy, Action]</td>\n <td>[ocean, drugabuse, exoticisland, eastindiatrad...</td>\n <td>[JohnnyDepp, OrlandoBloom, KeiraKnightley]</td>\n <td>[GoreVerbinski]</td>\n </tr>\n <tr>\n <th>2</th>\n <td>206647</td>\n <td>Spectre</td>\n <td>A cryptic message from Bond’s past sends him o...</td>\n <td>[Action, Adventure, Crime]</td>\n <td>[spy, basedonnovel, secretagent, sequel, mi6, ...</td>\n <td>[DanielCraig, ChristophWaltz, LéaSeydoux]</td>\n <td>[SamMendes]</td>\n </tr>\n <tr>\n <th>3</th>\n <td>49026</td>\n <td>The Dark Knight Rises</td>\n <td>Following the death of District Attorney Harve...</td>\n <td>[Action, Crime, Drama, Thriller]</td>\n <td>[dccomics, crimefighter, terrorist, secretiden...</td>\n <td>[ChristianBale, MichaelCaine, GaryOldman]</td>\n <td>[ChristopherNolan]</td>\n </tr>\n <tr>\n <th>4</th>\n <td>49529</td>\n <td>John Carter</td>\n <td>John Carter is a war-weary, former military ca...</td>\n <td>[Action, Adventure, ScienceFiction]</td>\n <td>[basedonnovel, mars, medallion, spacetravel, p...</td>\n <td>[TaylorKitsch, LynnCollins, SamanthaMorton]</td>\n <td>[AndrewStanton]</td>\n </tr>\n </tbody>\n</table>\n</div>"},"metadata":{}}]},{"cell_type":"code","source":"movies['overview'] = movies['overview'].apply(lambda x:x.split())","metadata":{"execution":{"iopub.status.busy":"2021-06-21T12:37:31.936003Z","iopub.execute_input":"2021-06-21T12:37:31.936583Z","iopub.status.idle":"2021-06-21T12:37:31.975155Z","shell.execute_reply.started":"2021-06-21T12:37:31.936546Z","shell.execute_reply":"2021-06-21T12:37:31.973928Z"},"trusted":true},"execution_count":45,"outputs":[]},{"cell_type":"code","source":"movies['tags'] = movies['overview'] + movies['genres'] + movies['keywords'] + movies['cast'] + movies['crew']","metadata":{"execution":{"iopub.status.busy":"2021-06-21T12:37:34.465925Z","iopub.execute_input":"2021-06-21T12:37:34.466349Z","iopub.status.idle":"2021-06-21T12:37:34.572742Z","shell.execute_reply.started":"2021-06-21T12:37:34.466313Z","shell.execute_reply":"2021-06-21T12:37:34.571676Z"},"trusted":true},"execution_count":46,"outputs":[]},{"cell_type":"code","source":"new = movies.drop(columns=['overview','genres','keywords','cast','crew'])\n#new.head()","metadata":{"execution":{"iopub.status.busy":"2021-06-21T12:37:40.974644Z","iopub.execute_input":"2021-06-21T12:37:40.975174Z","iopub.status.idle":"2021-06-21T12:37:40.981843Z","shell.execute_reply.started":"2021-06-21T12:37:40.975140Z","shell.execute_reply":"2021-06-21T12:37:40.981059Z"},"trusted":true},"execution_count":47,"outputs":[]},{"cell_type":"code","source":"new['tags'] = new['tags'].apply(lambda x: \" \".join(x))\nnew.head()","metadata":{"execution":{"iopub.status.busy":"2021-06-21T12:38:00.130879Z","iopub.execute_input":"2021-06-21T12:38:00.131473Z","iopub.status.idle":"2021-06-21T12:38:00.160438Z","shell.execute_reply.started":"2021-06-21T12:38:00.131439Z","shell.execute_reply":"2021-06-21T12:38:00.159476Z"},"trusted":true},"execution_count":48,"outputs":[{"execution_count":48,"output_type":"execute_result","data":{"text/plain":" movie_id title \\\n0 19995 Avatar \n1 285 Pirates of the Caribbean: At World's End \n2 206647 Spectre \n3 49026 The Dark Knight Rises \n4 49529 John Carter \n\n tags \n0 In the 22nd century, a paraplegic Marine is di... \n1 Captain Barbossa, long believed to be dead, ha... \n2 A cryptic message from Bond’s past sends him o... \n3 Following the death of District Attorney Harve... \n4 John Carter is a war-weary, former military ca... ","text/html":"<div>\n<style scoped>\n .dataframe tbody tr th:only-of-type {\n vertical-align: middle;\n }\n\n .dataframe tbody tr th {\n vertical-align: top;\n }\n\n .dataframe thead th {\n text-align: right;\n }\n</style>\n<table border=\"1\" class=\"dataframe\">\n <thead>\n <tr style=\"text-align: right;\">\n <th></th>\n <th>movie_id</th>\n <th>title</th>\n <th>tags</th>\n </tr>\n </thead>\n <tbody>\n <tr>\n <th>0</th>\n <td>19995</td>\n <td>Avatar</td>\n <td>In the 22nd century, a paraplegic Marine is di...</td>\n </tr>\n <tr>\n <th>1</th>\n <td>285</td>\n <td>Pirates of the Caribbean: At World's End</td>\n <td>Captain Barbossa, long believed to be dead, ha...</td>\n </tr>\n <tr>\n <th>2</th>\n <td>206647</td>\n <td>Spectre</td>\n <td>A cryptic message from Bond’s past sends him o...</td>\n </tr>\n <tr>\n <th>3</th>\n <td>49026</td>\n <td>The Dark Knight Rises</td>\n <td>Following the death of District Attorney Harve...</td>\n </tr>\n <tr>\n <th>4</th>\n <td>49529</td>\n <td>John Carter</td>\n <td>John Carter is a war-weary, former military ca...</td>\n </tr>\n </tbody>\n</table>\n</div>"},"metadata":{}}]},{"cell_type":"code","source":"from sklearn.feature_extraction.text import CountVectorizer\ncv = CountVectorizer(max_features=5000,stop_words='english')\n ","metadata":{"execution":{"iopub.status.busy":"2021-06-21T12:38:06.544599Z","iopub.execute_input":"2021-06-21T12:38:06.545110Z","iopub.status.idle":"2021-06-21T12:38:07.488307Z","shell.execute_reply.started":"2021-06-21T12:38:06.545079Z","shell.execute_reply":"2021-06-21T12:38:07.487238Z"},"trusted":true},"execution_count":49,"outputs":[]},{"cell_type":"code","source":"vector = cv.fit_transform(new['tags']).toarray()","metadata":{"execution":{"iopub.status.busy":"2021-06-21T12:38:10.163889Z","iopub.execute_input":"2021-06-21T12:38:10.164314Z","iopub.status.idle":"2021-06-21T12:38:10.821200Z","shell.execute_reply.started":"2021-06-21T12:38:10.164279Z","shell.execute_reply":"2021-06-21T12:38:10.820175Z"},"trusted":true},"execution_count":50,"outputs":[]},{"cell_type":"code","source":"vector.shape","metadata":{"execution":{"iopub.status.busy":"2021-06-19T14:00:41.894223Z","iopub.execute_input":"2021-06-19T14:00:41.894747Z","iopub.status.idle":"2021-06-19T14:00:41.900786Z","shell.execute_reply.started":"2021-06-19T14:00:41.894699Z","shell.execute_reply":"2021-06-19T14:00:41.899989Z"},"trusted":true},"execution_count":null,"outputs":[]},{"cell_type":"code","source":"from sklearn.metrics.pairwise import cosine_similarity","metadata":{"execution":{"iopub.status.busy":"2021-06-21T12:38:13.339041Z","iopub.execute_input":"2021-06-21T12:38:13.339451Z","iopub.status.idle":"2021-06-21T12:38:13.390575Z","shell.execute_reply.started":"2021-06-21T12:38:13.339412Z","shell.execute_reply":"2021-06-21T12:38:13.389373Z"},"trusted":true},"execution_count":51,"outputs":[]},{"cell_type":"code","source":"similarity = cosine_similarity(vector)","metadata":{"execution":{"iopub.status.busy":"2021-06-21T12:38:15.769495Z","iopub.execute_input":"2021-06-21T12:38:15.770001Z","iopub.status.idle":"2021-06-21T12:38:18.170463Z","shell.execute_reply.started":"2021-06-21T12:38:15.769960Z","shell.execute_reply":"2021-06-21T12:38:18.169319Z"},"trusted":true},"execution_count":52,"outputs":[]},{"cell_type":"code","source":"similarity","metadata":{"execution":{"iopub.status.busy":"2021-06-19T14:06:45.472777Z","iopub.execute_input":"2021-06-19T14:06:45.473087Z","iopub.status.idle":"2021-06-19T14:06:45.479647Z","shell.execute_reply.started":"2021-06-19T14:06:45.473061Z","shell.execute_reply":"2021-06-19T14:06:45.478831Z"},"trusted":true},"execution_count":null,"outputs":[]},{"cell_type":"code","source":"new[new['title'] == 'The Lego Movie'].index[0]","metadata":{"execution":{"iopub.status.busy":"2021-06-19T14:08:46.7159Z","iopub.execute_input":"2021-06-19T14:08:46.716222Z","iopub.status.idle":"2021-06-19T14:08:46.7239Z","shell.execute_reply.started":"2021-06-19T14:08:46.716196Z","shell.execute_reply":"2021-06-19T14:08:46.722946Z"},"trusted":true},"execution_count":null,"outputs":[]},{"cell_type":"code","source":"def recommend(movie):\n index = new[new['title'] == movie].index[0]\n distances = sorted(list(enumerate(similarity[index])),reverse=True,key = lambda x: x[1])\n for i in distances[1:6]:\n print(new.iloc[i[0]].title)\n \n ","metadata":{"execution":{"iopub.status.busy":"2021-06-21T12:38:22.542487Z","iopub.execute_input":"2021-06-21T12:38:22.542900Z","iopub.status.idle":"2021-06-21T12:38:22.549786Z","shell.execute_reply.started":"2021-06-21T12:38:22.542867Z","shell.execute_reply":"2021-06-21T12:38:22.548271Z"},"trusted":true},"execution_count":53,"outputs":[]},{"cell_type":"code","source":"recommend('Gandhi')","metadata":{"execution":{"iopub.status.busy":"2021-06-21T12:38:24.914258Z","iopub.execute_input":"2021-06-21T12:38:24.914650Z","iopub.status.idle":"2021-06-21T12:38:24.930018Z","shell.execute_reply.started":"2021-06-21T12:38:24.914616Z","shell.execute_reply":"2021-06-21T12:38:24.928876Z"},"trusted":true},"execution_count":54,"outputs":[{"name":"stdout","text":"Gandhi, My Father\nThe Wind That Shakes the Barley\nA Passage to India\nGuiana 1838\nRamanujan\n","output_type":"stream"}]},{"cell_type":"code","source":"import pickle","metadata":{"execution":{"iopub.status.busy":"2021-06-21T12:39:59.606475Z","iopub.execute_input":"2021-06-21T12:39:59.606924Z","iopub.status.idle":"2021-06-21T12:39:59.612177Z","shell.execute_reply.started":"2021-06-21T12:39:59.606890Z","shell.execute_reply":"2021-06-21T12:39:59.610858Z"},"trusted":true},"execution_count":55,"outputs":[]},{"cell_type":"code","source":"pickle.dump(new,open('movie_list.pkl','wb'))\npickle.dump(similarity,open('similarity.pkl','wb'))","metadata":{"execution":{"iopub.status.busy":"2021-06-21T12:40:53.373186Z","iopub.execute_input":"2021-06-21T12:40:53.373581Z","iopub.status.idle":"2021-06-21T12:40:53.784869Z","shell.execute_reply.started":"2021-06-21T12:40:53.373547Z","shell.execute_reply":"2021-06-21T12:40:53.783635Z"},"trusted":true},"execution_count":56,"outputs":[]},{"cell_type":"code","source":"","metadata":{},"execution_count":null,"outputs":[]}]} |