noumanjavaid commited on
Commit
70f8027
·
verified ·
1 Parent(s): aeb1693

Create app.py

Browse files
Files changed (1) hide show
  1. app.py +322 -0
app.py ADDED
@@ -0,0 +1,322 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ import streamlit as st
2
+ import requests
3
+ import pandas as pd
4
+ import time
5
+ from datetime import datetime, timedelta
6
+ from urllib.parse import urlencode
7
+ from openai import OpenAI
8
+ from io import StringIO
9
+ import json
10
+
11
+ # Set up the application title and layout
12
+ st.set_page_config(
13
+ page_title="Steam App Reviews - Themes Analysis",
14
+ )
15
+
16
+ st.title("🎮 Steam App Reviews - Themes Analysis")
17
+
18
+ # Sidebar inputs for user interaction
19
+ st.sidebar.header("User Input Parameters")
20
+
21
+ # User input for OpenAI API key
22
+ api_key_input = st.sidebar.text_input(
23
+ "Enter your OpenAI API Key:",
24
+ type="password",
25
+ help="Your API key will be used to access the OpenAI API for theme extraction.",
26
+ )
27
+
28
+ # Initialize OpenAI client
29
+ client = None
30
+ if api_key_input:
31
+ client = OpenAI(api_key=api_key_input)
32
+ else:
33
+ st.sidebar.warning("Please enter your OpenAI API Key to proceed.")
34
+
35
+ # User input for App ID
36
+ appid = st.sidebar.text_input("Enter the Steam App ID:", value="1782120")
37
+
38
+ # Date input for selecting a week
39
+ st.sidebar.write("Select the date range for reviews:")
40
+ start_date = st.sidebar.date_input(
41
+ "Start Date", value=datetime.today() - timedelta(days=7)
42
+ )
43
+ end_date = st.sidebar.date_input("End Date", value=datetime.today())
44
+
45
+ # Validate dates
46
+ if start_date > end_date:
47
+ st.error("Error: End date must fall after start date.")
48
+ elif not api_key_input:
49
+ st.info("Please input your OpenAI API Key to proceed.")
50
+ else:
51
+ # Fetch reviews button
52
+ if st.sidebar.button("Fetch and Analyze Reviews"):
53
+ st.write("Fetching reviews...")
54
+
55
+ # Convert dates to timestamps
56
+ start_timestamp = int(time.mktime(start_date.timetuple()))
57
+ end_timestamp = int(
58
+ time.mktime((end_date + timedelta(days=1)).timetuple())
59
+ ) - 1 # Include the entire end date
60
+
61
+ # Define the base API URL
62
+ base_url = f"https://store.steampowered.com/appreviews/{appid}?json=1"
63
+
64
+ # Define initial API parameters
65
+ params = {
66
+ "filter": "recent",
67
+ "language": "all",
68
+ "day_range": "365", # Maximum allowed
69
+ "review_type": "all",
70
+ "purchase_type": "all",
71
+ "num_per_page": "100",
72
+ "cursor": "*",
73
+ "filter_offtopic_activity": 0,
74
+ }
75
+
76
+ # Function to fetch reviews
77
+ def fetch_reviews(max_reviews=200):
78
+ """
79
+ Fetches Steam reviews for the specified app within the given date range.
80
+ The function uses pagination to retrieve reviews and applies a limit to avoid infinite loops.
81
+
82
+ Args:
83
+ max_reviews (int): Maximum number of reviews to fetch.
84
+
85
+ Returns:
86
+ list: A list of reviews within the specified date range.
87
+ """
88
+ reviews_list = []
89
+ request_count = 0
90
+ max_requests = 50 # Limit the number of requests to avoid infinite loops
91
+
92
+ while True:
93
+ # URL encode the cursor parameter
94
+ params_encoded = params.copy()
95
+ params_encoded["cursor"] = params["cursor"].replace("+", "%2B")
96
+
97
+ # Construct the full URL with parameters
98
+ url = base_url + "&" + urlencode(params_encoded)
99
+
100
+ # Make the API request
101
+ try:
102
+ response = requests.get(url)
103
+ response.raise_for_status()
104
+ data = response.json()
105
+ except requests.exceptions.RequestException as e:
106
+ st.error(f"Steam API Error: {e}")
107
+ return None
108
+
109
+ # Check if the request was successful
110
+ if data.get("success") != 1:
111
+ st.error("Failed to fetch reviews from Steam API.")
112
+ return None
113
+
114
+ # Append reviews to the list
115
+ reviews = data.get("reviews", [])
116
+ if not reviews:
117
+ # No more reviews
118
+ break
119
+
120
+ for review in reviews:
121
+ # Filter reviews based on timestamp
122
+ if start_timestamp <= review.get("timestamp_created", 0) <= end_timestamp:
123
+ reviews_list.append(review)
124
+ if len(reviews_list) >= max_reviews:
125
+ break
126
+ elif review.get("timestamp_created", 0) < start_timestamp:
127
+ # Since reviews are ordered by most recent, we can break early
128
+ break
129
+
130
+ # Update the cursor for the next batch
131
+ new_cursor = data.get("cursor")
132
+ if params["cursor"] == new_cursor:
133
+ # Exit if the cursor hasn't changed to avoid infinite loop
134
+ break
135
+ params["cursor"] = new_cursor
136
+
137
+ # Check if maximum number of reviews fetched
138
+ if len(reviews_list) >= max_reviews:
139
+ break
140
+
141
+ # Increment request count and check limit
142
+ request_count += 1
143
+ if request_count >= max_requests:
144
+ st.warning("Reached maximum number of requests. Some reviews may not be fetched.")
145
+ break
146
+
147
+ # Optional: To avoid hitting the rate limit
148
+ time.sleep(0.2)
149
+
150
+ return reviews_list
151
+
152
+ # Fetch the reviews
153
+ reviews_data = fetch_reviews(max_reviews=200)
154
+
155
+ # Check if reviews were fetched
156
+ if reviews_data:
157
+ st.success(f"Fetched {len(reviews_data)} reviews from App ID {appid}.")
158
+
159
+ # Create a DataFrame from the review data
160
+ df = pd.DataFrame(
161
+ [
162
+ {
163
+ "Review ID": str(review.get("recommendationid")),
164
+ "Author SteamID": review.get("author", {}).get("steamid"),
165
+ "Language": review.get("language"),
166
+ "Review": review.get("review"),
167
+ "Posted On": datetime.fromtimestamp(
168
+ review.get("timestamp_created", 0)
169
+ ).strftime("%Y-%m-%d %H:%M:%S"),
170
+ }
171
+ for review in reviews_data
172
+ ]
173
+ )
174
+
175
+ # Function to extract themes using OpenAI GPT-4o
176
+ def extract_themes(df):
177
+ """
178
+ Uses OpenAI's GPT-4o model to identify the most common themes,
179
+ provide descriptions, and list review IDs where each theme is mentioned.
180
+
181
+ Args:
182
+ df (DataFrame): DataFrame containing the reviews.
183
+
184
+ Returns:
185
+ DataFrame: A DataFrame containing themes, descriptions, and review references.
186
+ """
187
+ # Combine reviews into a single string with IDs
188
+ reviews_text = "\n".join(
189
+ [
190
+ f"Review ID: {row['Review ID']}\nReview Text: {row['Review']}"
191
+ for _, row in df.iterrows()
192
+ ]
193
+ )
194
+
195
+ # Prepare the prompt
196
+ prompt = f"""
197
+ Analyze the following user reviews and identify the most common themes or topics being discussed.
198
+ For each theme, provide a brief description and list the Review IDs where the theme is mentioned.
199
+
200
+ Provide the output as a JSON array matching the following structure:
201
+ [
202
+ {{"Theme": "<theme_name>", "Description": "<description>", "Review IDs": ["<review_id1>", "<review_id2>", ...]}},
203
+ ...
204
+ ]
205
+
206
+ Ensure the output is valid JSON.
207
+
208
+ Reviews:
209
+ {reviews_text}
210
+ """
211
+ # Call OpenAI API
212
+ response = client.chat.completions.create(
213
+ model="gpt-4o",
214
+ messages=[{"role": "user", "content": prompt}],
215
+ temperature=1,
216
+ max_tokens=2048,
217
+ top_p=1,
218
+ frequency_penalty=0,
219
+ presence_penalty=0,
220
+ response_format={
221
+ "type": "json_schema",
222
+ "json_schema": {
223
+ "name": "analyze_game_reviews",
224
+ "strict": True,
225
+ "schema": {
226
+ "type": "object",
227
+ "properties": {
228
+ "parameters": {
229
+ "type": "object",
230
+ "properties": {
231
+ "themes": {
232
+ "type": "array",
233
+ "description": "List of themes identified in the game reviews",
234
+ "items": {
235
+ "type": "object",
236
+ "properties": {
237
+ "Theme": {
238
+ "type": "string",
239
+ "description": "The main theme derived from the game reviews.",
240
+ },
241
+ "Description": {
242
+ "type": "string",
243
+ "description": "A summary of the issues or sentiments related to the theme.",
244
+ },
245
+ "Review IDs": {
246
+ "type": "array",
247
+ "description": "Array of IDs for reviews that relate to this theme.",
248
+ "items": {
249
+ "type": "string",
250
+ "description": "Unique identifier for a review.",
251
+ },
252
+ },
253
+ },
254
+ "required": [
255
+ "Theme",
256
+ "Description",
257
+ "Review IDs",
258
+ ],
259
+ "additionalProperties": False,
260
+ },
261
+ }
262
+ },
263
+ "required": ["themes"],
264
+ "additionalProperties": False,
265
+ }
266
+ },
267
+ "required": ["parameters"],
268
+ "additionalProperties": False,
269
+ },
270
+ },
271
+ },
272
+ )
273
+
274
+ # Get the response content
275
+ response_text = response.choices[0].message.content # Extract the text
276
+
277
+ # Parse the JSON output
278
+ try:
279
+ response_data = json.loads(response_text)
280
+ themes_list = (
281
+ response_data.get("parameters", {}).get("themes", [])
282
+ ) # Extract the 'themes' list
283
+ themes_df = pd.DataFrame(themes_list) # Create DataFrame from the list
284
+ return themes_df
285
+ except ValueError as ve:
286
+ st.error(f"Error parsing JSON response: {ve}")
287
+ st.error(f"Raw API Response: {response_text}")
288
+ return None
289
+
290
+ # Extract themes
291
+ themes_df = extract_themes(df)
292
+
293
+ if themes_df is not None:
294
+ st.write("### Most Common Themes")
295
+ st.dataframe(themes_df)
296
+
297
+ # Display detailed information for each theme
298
+ for index, row in themes_df.iterrows():
299
+ theme = row["Theme"]
300
+ description = row["Description"]
301
+ review_ids = row["Review IDs"]
302
+
303
+ st.write(f"#### Theme: {theme}")
304
+ st.write(f"**Description:** {description}")
305
+ st.write(f"**Mentioned in {len(review_ids)} reviews.**")
306
+
307
+ # Get the reviews that mention the theme
308
+ try:
309
+ reviews_with_theme = df[
310
+ df["Review ID"].isin(review_ids)
311
+ ][["Review ID", "Review"]]
312
+ st.write("Reviews mentioning this theme:")
313
+ st.dataframe(reviews_with_theme)
314
+ except ValueError as e:
315
+ st.error(
316
+ f"Error displaying reviews for theme '{theme}': {e}"
317
+ )
318
+ else:
319
+ st.warning("Failed to extract themes.")
320
+
321
+ else:
322
+ st.warning("No reviews to display.")