tahirsher commited on
Commit
7f89231
·
verified ·
1 Parent(s): 1375b92

Update app.py

Browse files
Files changed (1) hide show
  1. app.py +47 -25
app.py CHANGED
@@ -1,20 +1,36 @@
1
  import os
2
  import pandas as pd
 
 
 
3
  from groq import Groq
4
  from sklearn.feature_extraction.text import TfidfVectorizer
5
  from sklearn.metrics.pairwise import cosine_similarity
6
- import gradio as gr
7
  import numpy as np
8
 
9
- # Initialize the Groq client (add your API key directly in the environment)
10
  client = Groq(api_key=os.environ.get("GROQ_API_KEY"))
11
 
12
- # Load the proprietary dataset from the backend folder
13
- dataset_path = "dataset/movie_dataset.csv"
14
- movies_df = pd.read_csv(dataset_path)
 
 
 
 
 
 
 
 
 
 
 
 
 
15
 
16
  # Preprocess the dataset by creating summaries and vectors
17
  def preprocess_data(df):
 
18
  df['summary'] = df.apply(lambda row: f"{row['title']} ({row['release_date']}): {row['overview']} "
19
  f"Genres: {row['genres']} Keywords: {row['keywords']}", axis=1)
20
  return df
@@ -42,24 +58,30 @@ def generate_summary_with_groq(query, retrieved_text):
42
  )
43
  return chat_completion.choices[0].message.content
44
 
45
- # Gradio interface function for the application
46
- def rag_application(user_query):
47
- # Retrieve relevant movie summaries
48
- retrieved_movies = retrieve_similar_movies(user_query, movies_df, tfidf_matrix)
49
- retrieved_summaries = " ".join(retrieved_movies['summary'].values)
50
-
51
- # Generate a summary response based on retrieved movies
52
- generated_summary = generate_summary_with_groq(user_query, retrieved_summaries)
53
- return generated_summary
54
-
55
- # Deploy using Gradio on Hugging Face
56
- interface = gr.Interface(
57
- fn=rag_application,
58
- inputs="text",
59
- outputs="text",
60
- title="Movie RAG-based Application",
61
- description="Ask questions about movies. Proprietary dataset only accessible to the authorized user."
62
- )
 
 
 
 
 
 
63
 
64
- if __name__ == "__main__":
65
- interface.launch()
 
1
  import os
2
  import pandas as pd
3
+ import zipfile
4
+ import requests
5
+ import io
6
  from groq import Groq
7
  from sklearn.feature_extraction.text import TfidfVectorizer
8
  from sklearn.metrics.pairwise import cosine_similarity
 
9
  import numpy as np
10
 
11
+ # Initialize the Groq client
12
  client = Groq(api_key=os.environ.get("GROQ_API_KEY"))
13
 
14
+ # Load the proprietary dataset from GitHub
15
+ def load_dataset_from_github(zip_url):
16
+ # Download the zip file
17
+ response = requests.get(zip_url)
18
+ if response.status_code == 200:
19
+ # Extract the zip file
20
+ with zipfile.ZipFile(io.BytesIO(response.content)) as z:
21
+ z.extractall("dataset") # Extract to the 'dataset' folder
22
+ # Load the CSV file (assuming it's named 'movie_dataset.csv' inside the zip)
23
+ return pd.read_csv("dataset/movie_dataset.csv")
24
+ else:
25
+ raise Exception("Failed to download the dataset.")
26
+
27
+ # URL of the ZIP file containing the dataset on GitHub
28
+ zip_url = "https://github.com/YourUsername/movie-dataset/archive/refs/heads/main.zip"
29
+ movies_df = load_dataset_from_github(zip_url)
30
 
31
  # Preprocess the dataset by creating summaries and vectors
32
  def preprocess_data(df):
33
+ # Combine relevant text columns to form a concise summary for each movie
34
  df['summary'] = df.apply(lambda row: f"{row['title']} ({row['release_date']}): {row['overview']} "
35
  f"Genres: {row['genres']} Keywords: {row['keywords']}", axis=1)
36
  return df
 
58
  )
59
  return chat_completion.choices[0].message.content
60
 
61
+ # Main interactive loop
62
+ def rag_application():
63
+ print("Welcome to the Movie RAG-based Application!")
64
+ while True:
65
+ # Prompt user for a query
66
+ user_query = input("Ask a question about movies or type 'exit' to quit: ")
67
+
68
+ if user_query.lower() in ['exit', 'no', 'quit']:
69
+ print("Exiting the application. Goodbye!")
70
+ break
71
+
72
+ # Retrieve relevant movie summaries
73
+ retrieved_movies = retrieve_similar_movies(user_query, movies_df, tfidf_matrix)
74
+ retrieved_summaries = " ".join(retrieved_movies['summary'].values)
75
+
76
+ # Generate a summary response based on retrieved movies
77
+ generated_summary = generate_summary_with_groq(user_query, retrieved_summaries)
78
+ print("Generated Summary:", generated_summary)
79
+
80
+ # Ask if user wants to continue or exit
81
+ continue_query = input("Do you have another question? (yes/no): ")
82
+ if continue_query.lower() != 'yes':
83
+ print("Exiting the application. Goodbye!")
84
+ break
85
 
86
+ # Run the application
87
+ rag_application()