tahirsher commited on
Commit
89d557f
·
verified ·
1 Parent(s): a2787fb

Update app.py

Browse files
Files changed (1) hide show
  1. app.py +29 -34
app.py CHANGED
@@ -1,36 +1,33 @@
1
  import os
2
  import pandas as pd
3
- import zipfile
4
  import requests
5
  import io
6
  from groq import Groq
7
  from sklearn.feature_extraction.text import TfidfVectorizer
8
  from sklearn.metrics.pairwise import cosine_similarity
9
- import numpy as np
10
 
11
  # Initialize the Groq client
12
- client = Groq(api_key=os.environ.get("Groq_Api_Key"))
13
 
14
  # Load the proprietary dataset from GitHub
15
- def load_dataset_from_github(zip_url):
16
- # Download the zip file
17
- response = requests.get(zip_url)
18
  if response.status_code == 200:
19
- # Extract the zip file
20
- with zipfile.ZipFile(io.BytesIO(response.content)) as z:
21
- z.extractall("dataset") # Extract to the 'dataset' folder
22
- # Load the CSV file (assuming it's named 'movie_dataset.csv' inside the zip)
23
- return pd.read_csv("dataset/movie_dataset.csv")
24
  else:
25
  raise Exception("Failed to download the dataset.")
26
 
27
- # URL of the ZIP file containing the dataset on GitHub
28
- zip_url = "https://github.com/TahirSher/RAG_App_Moives_Datset/blob/main/compressed_data.csv.gz"
29
- movies_df = load_dataset_from_github(zip_url)
30
 
31
  # Preprocess the dataset by creating summaries and vectors
32
  def preprocess_data(df):
33
- # Combine relevant text columns to form a concise summary for each movie
34
  df['summary'] = df.apply(lambda row: f"{row['title']} ({row['release_date']}): {row['overview']} "
35
  f"Genres: {row['genres']} Keywords: {row['keywords']}", axis=1)
36
  return df
@@ -58,30 +55,28 @@ def generate_summary_with_groq(query, retrieved_text):
58
  )
59
  return chat_completion.choices[0].message.content
60
 
61
- # Main interactive loop
62
- def rag_application():
63
- print("Welcome to the Movie RAG-based Application!")
64
- while True:
65
- # Prompt user for a query
66
- user_query = input("Ask a question about movies or type 'exit' to quit: ")
67
-
68
- if user_query.lower() in ['exit', 'no', 'quit']:
69
- print("Exiting the application. Goodbye!")
70
- break
71
-
72
  # Retrieve relevant movie summaries
73
  retrieved_movies = retrieve_similar_movies(user_query, movies_df, tfidf_matrix)
74
  retrieved_summaries = " ".join(retrieved_movies['summary'].values)
75
 
76
  # Generate a summary response based on retrieved movies
77
  generated_summary = generate_summary_with_groq(user_query, retrieved_summaries)
78
- print("Generated Summary:", generated_summary)
79
 
80
- # Ask if user wants to continue or exit
81
- continue_query = input("Do you have another question? (yes/no): ")
82
- if continue_query.lower() != 'yes':
83
- print("Exiting the application. Goodbye!")
84
- break
 
 
85
 
86
- # Run the application
87
- rag_application()
 
1
  import os
2
  import pandas as pd
3
+ import gzip
4
  import requests
5
  import io
6
  from groq import Groq
7
  from sklearn.feature_extraction.text import TfidfVectorizer
8
  from sklearn.metrics.pairwise import cosine_similarity
9
+ import streamlit as st
10
 
11
  # Initialize the Groq client
12
+ client = Groq(api_key=os.environ.get("GROQ_API_KEY"))
13
 
14
  # Load the proprietary dataset from GitHub
15
+ def load_dataset_from_github(gzip_url):
16
+ # Download the Gzip file
17
+ response = requests.get(gzip_url)
18
  if response.status_code == 200:
19
+ # Load the Gzip file and read the CSV
20
+ with gzip.open(io.BytesIO(response.content), 'rt') as f:
21
+ return pd.read_csv(f)
 
 
22
  else:
23
  raise Exception("Failed to download the dataset.")
24
 
25
+ # URL of the Gzip file containing the dataset on GitHub
26
+ gzip_url = "https://github.com/TahirSher/RAG_App_Moives_Datset/raw/main/compressed_data.csv.gz"
27
+ movies_df = load_dataset_from_github(gzip_url)
28
 
29
  # Preprocess the dataset by creating summaries and vectors
30
  def preprocess_data(df):
 
31
  df['summary'] = df.apply(lambda row: f"{row['title']} ({row['release_date']}): {row['overview']} "
32
  f"Genres: {row['genres']} Keywords: {row['keywords']}", axis=1)
33
  return df
 
55
  )
56
  return chat_completion.choices[0].message.content
57
 
58
+ # Streamlit Application
59
+ def main():
60
+ st.title("Movie RAG-based Application")
61
+
62
+ # User input
63
+ user_query = st.text_input("Ask a question about movies:")
64
+
65
+ if user_query:
 
 
 
66
  # Retrieve relevant movie summaries
67
  retrieved_movies = retrieve_similar_movies(user_query, movies_df, tfidf_matrix)
68
  retrieved_summaries = " ".join(retrieved_movies['summary'].values)
69
 
70
  # Generate a summary response based on retrieved movies
71
  generated_summary = generate_summary_with_groq(user_query, retrieved_summaries)
 
72
 
73
+ # Display the generated summary
74
+ st.subheader("Generated Summary:")
75
+ st.write(generated_summary)
76
+
77
+ # Option to ask another question
78
+ if st.button("Ask another question"):
79
+ st.experimental_rerun()
80
 
81
+ if __name__ == "__main__":
82
+ main()