Spaces:
Sleeping
Sleeping
Update app.py
Browse files
app.py
CHANGED
|
@@ -727,7 +727,7 @@ def find_similar_movies(query_description, top_n=3):
|
|
| 727 |
return movie_kb.find_similar_movies(query_description, top_n)
|
| 728 |
|
| 729 |
# Original functions for analysis
|
| 730 |
-
def predict_box_office(movie_description, similar_movies):
|
| 731 |
|
| 732 |
import numpy as np
|
| 733 |
|
|
@@ -742,9 +742,13 @@ def predict_box_office(movie_description, similar_movies):
|
|
| 742 |
for movie_info in similar_movies:
|
| 743 |
movie = movie_info["movie"]
|
| 744 |
sim_score = movie_info["similarity_score"]
|
|
|
|
|
|
|
|
|
|
|
|
|
| 745 |
inflation_factor = get_inflation_adjustment(movie["year"])
|
| 746 |
year_weight = get_year_weight(movie["year"])
|
| 747 |
-
adjusted_bo = movie["box_office"] * year_weight *
|
| 748 |
adjusted_box_offices.append(sim_score * adjusted_bo)
|
| 749 |
total_weight += sim_score * year_weight
|
| 750 |
|
|
@@ -765,7 +769,7 @@ def predict_box_office(movie_description, similar_movies):
|
|
| 765 |
genre_movies = [m for m in movie_knowledge_base if genre in m["genre"] and m["budget"] > 0]
|
| 766 |
if not genre_movies:
|
| 767 |
continue
|
| 768 |
-
success_rate = sum(1 for m in genre_movies if (m["box_office"] / m["budget"]) >=
|
| 769 |
rois = [m["box_office"] / m["budget"] for m in genre_movies]
|
| 770 |
roi_median = np.median(rois)
|
| 771 |
revenues = [m["box_office"] for m in genre_movies]
|
|
@@ -789,7 +793,6 @@ def predict_box_office(movie_description, similar_movies):
|
|
| 789 |
|
| 790 |
return simulations
|
| 791 |
|
| 792 |
-
|
| 793 |
def predict_awards(movie_description, similar_movies):
|
| 794 |
"""Predict potential awards based on similar movies."""
|
| 795 |
# Count awards in similar movies and recommend the most common ones
|
|
@@ -845,8 +848,17 @@ def get_similar_movies(movie_description: str):
|
|
| 845 |
@log_function_tool(logger)
|
| 846 |
def get_box_office_prediction(movie_description: str):
|
| 847 |
"""Predict the box office revenue for a movie based on its description."""
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 848 |
similar_movies = find_similar_movies(movie_description, top_n=3)
|
| 849 |
-
simulations = predict_box_office(movie_description, similar_movies)
|
| 850 |
baseline_prediction = np.median(simulations)
|
| 851 |
lower_bound = np.percentile(simulations, 25)
|
| 852 |
upper_bound = np.percentile(simulations, 75)
|
|
@@ -857,8 +869,8 @@ def get_box_office_prediction(movie_description: str):
|
|
| 857 |
else:
|
| 858 |
avg_budget = 1
|
| 859 |
|
| 860 |
-
threshold_exceed = 3 *
|
| 861 |
-
threshold_below = 2 *
|
| 862 |
prob_exceed = float(np.mean(simulations > threshold_exceed))
|
| 863 |
prob_below = float(np.mean(simulations < threshold_below))
|
| 864 |
|
|
@@ -882,7 +894,6 @@ def get_box_office_prediction(movie_description: str):
|
|
| 882 |
},
|
| 883 |
"similar_movies": similar_movie_info
|
| 884 |
}
|
| 885 |
-
|
| 886 |
@function_tool
|
| 887 |
@log_function_tool(logger)
|
| 888 |
def get_award_predictions(movie_description: str):
|
|
|
|
| 727 |
return movie_kb.find_similar_movies(query_description, top_n)
|
| 728 |
|
| 729 |
# Original functions for analysis
|
| 730 |
+
def predict_box_office(movie_description, similar_movies, target_budget):
|
| 731 |
|
| 732 |
import numpy as np
|
| 733 |
|
|
|
|
| 742 |
for movie_info in similar_movies:
|
| 743 |
movie = movie_info["movie"]
|
| 744 |
sim_score = movie_info["similarity_score"]
|
| 745 |
+
if movie["budget"] > 0:
|
| 746 |
+
budget_ratio = target_budget / movie["budget"]
|
| 747 |
+
else:
|
| 748 |
+
budget_ratio = 1
|
| 749 |
inflation_factor = get_inflation_adjustment(movie["year"])
|
| 750 |
year_weight = get_year_weight(movie["year"])
|
| 751 |
+
adjusted_bo = movie["box_office"] * year_weight * budget_ratio
|
| 752 |
adjusted_box_offices.append(sim_score * adjusted_bo)
|
| 753 |
total_weight += sim_score * year_weight
|
| 754 |
|
|
|
|
| 769 |
genre_movies = [m for m in movie_knowledge_base if genre in m["genre"] and m["budget"] > 0]
|
| 770 |
if not genre_movies:
|
| 771 |
continue
|
| 772 |
+
success_rate = sum(1 for m in genre_movies if (m["box_office"] / m["budget"]) >= 3) / len(genre_movies)
|
| 773 |
rois = [m["box_office"] / m["budget"] for m in genre_movies]
|
| 774 |
roi_median = np.median(rois)
|
| 775 |
revenues = [m["box_office"] for m in genre_movies]
|
|
|
|
| 793 |
|
| 794 |
return simulations
|
| 795 |
|
|
|
|
| 796 |
def predict_awards(movie_description, similar_movies):
|
| 797 |
"""Predict potential awards based on similar movies."""
|
| 798 |
# Count awards in similar movies and recommend the most common ones
|
|
|
|
| 848 |
@log_function_tool(logger)
|
| 849 |
def get_box_office_prediction(movie_description: str):
|
| 850 |
"""Predict the box office revenue for a movie based on its description."""
|
| 851 |
+
target_budget = None
|
| 852 |
+
import re
|
| 853 |
+
match = re.search(r"Budget:\s*:\s*(\d+)", movie_description, re.IGNORECASE)
|
| 854 |
+
if match:
|
| 855 |
+
target_budget = float(match.group(1))
|
| 856 |
+
else:
|
| 857 |
+
target_budget = 10000000
|
| 858 |
+
# print(f"the movie budget is {target_budget}")
|
| 859 |
+
|
| 860 |
similar_movies = find_similar_movies(movie_description, top_n=3)
|
| 861 |
+
simulations = predict_box_office(movie_description, similar_movies, target_budget)
|
| 862 |
baseline_prediction = np.median(simulations)
|
| 863 |
lower_bound = np.percentile(simulations, 25)
|
| 864 |
upper_bound = np.percentile(simulations, 75)
|
|
|
|
| 869 |
else:
|
| 870 |
avg_budget = 1
|
| 871 |
|
| 872 |
+
threshold_exceed = 3 * target_budget
|
| 873 |
+
threshold_below = 2 * target_budget
|
| 874 |
prob_exceed = float(np.mean(simulations > threshold_exceed))
|
| 875 |
prob_below = float(np.mean(simulations < threshold_below))
|
| 876 |
|
|
|
|
| 894 |
},
|
| 895 |
"similar_movies": similar_movie_info
|
| 896 |
}
|
|
|
|
| 897 |
@function_tool
|
| 898 |
@log_function_tool(logger)
|
| 899 |
def get_award_predictions(movie_description: str):
|