Spaces:
Sleeping
Sleeping
Create app.py
Browse files
app.py
ADDED
@@ -0,0 +1,322 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
1 |
+
import streamlit as st
|
2 |
+
import requests
|
3 |
+
import pandas as pd
|
4 |
+
import time
|
5 |
+
from datetime import datetime, timedelta
|
6 |
+
from urllib.parse import urlencode
|
7 |
+
from openai import OpenAI
|
8 |
+
from io import StringIO
|
9 |
+
import json
|
10 |
+
|
11 |
+
# Set up the application title and layout
|
12 |
+
st.set_page_config(
|
13 |
+
page_title="Steam App Reviews - Themes Analysis",
|
14 |
+
)
|
15 |
+
|
16 |
+
st.title("🎮 Steam App Reviews - Themes Analysis")
|
17 |
+
|
18 |
+
# Sidebar inputs for user interaction
|
19 |
+
st.sidebar.header("User Input Parameters")
|
20 |
+
|
21 |
+
# User input for OpenAI API key
|
22 |
+
api_key_input = st.sidebar.text_input(
|
23 |
+
"Enter your OpenAI API Key:",
|
24 |
+
type="password",
|
25 |
+
help="Your API key will be used to access the OpenAI API for theme extraction.",
|
26 |
+
)
|
27 |
+
|
28 |
+
# Initialize OpenAI client
|
29 |
+
client = None
|
30 |
+
if api_key_input:
|
31 |
+
client = OpenAI(api_key=api_key_input)
|
32 |
+
else:
|
33 |
+
st.sidebar.warning("Please enter your OpenAI API Key to proceed.")
|
34 |
+
|
35 |
+
# User input for App ID
|
36 |
+
appid = st.sidebar.text_input("Enter the Steam App ID:", value="1782120")
|
37 |
+
|
38 |
+
# Date input for selecting a week
|
39 |
+
st.sidebar.write("Select the date range for reviews:")
|
40 |
+
start_date = st.sidebar.date_input(
|
41 |
+
"Start Date", value=datetime.today() - timedelta(days=7)
|
42 |
+
)
|
43 |
+
end_date = st.sidebar.date_input("End Date", value=datetime.today())
|
44 |
+
|
45 |
+
# Validate dates
|
46 |
+
if start_date > end_date:
|
47 |
+
st.error("Error: End date must fall after start date.")
|
48 |
+
elif not api_key_input:
|
49 |
+
st.info("Please input your OpenAI API Key to proceed.")
|
50 |
+
else:
|
51 |
+
# Fetch reviews button
|
52 |
+
if st.sidebar.button("Fetch and Analyze Reviews"):
|
53 |
+
st.write("Fetching reviews...")
|
54 |
+
|
55 |
+
# Convert dates to timestamps
|
56 |
+
start_timestamp = int(time.mktime(start_date.timetuple()))
|
57 |
+
end_timestamp = int(
|
58 |
+
time.mktime((end_date + timedelta(days=1)).timetuple())
|
59 |
+
) - 1 # Include the entire end date
|
60 |
+
|
61 |
+
# Define the base API URL
|
62 |
+
base_url = f"https://store.steampowered.com/appreviews/{appid}?json=1"
|
63 |
+
|
64 |
+
# Define initial API parameters
|
65 |
+
params = {
|
66 |
+
"filter": "recent",
|
67 |
+
"language": "all",
|
68 |
+
"day_range": "365", # Maximum allowed
|
69 |
+
"review_type": "all",
|
70 |
+
"purchase_type": "all",
|
71 |
+
"num_per_page": "100",
|
72 |
+
"cursor": "*",
|
73 |
+
"filter_offtopic_activity": 0,
|
74 |
+
}
|
75 |
+
|
76 |
+
# Function to fetch reviews
|
77 |
+
def fetch_reviews(max_reviews=200):
|
78 |
+
"""
|
79 |
+
Fetches Steam reviews for the specified app within the given date range.
|
80 |
+
The function uses pagination to retrieve reviews and applies a limit to avoid infinite loops.
|
81 |
+
|
82 |
+
Args:
|
83 |
+
max_reviews (int): Maximum number of reviews to fetch.
|
84 |
+
|
85 |
+
Returns:
|
86 |
+
list: A list of reviews within the specified date range.
|
87 |
+
"""
|
88 |
+
reviews_list = []
|
89 |
+
request_count = 0
|
90 |
+
max_requests = 50 # Limit the number of requests to avoid infinite loops
|
91 |
+
|
92 |
+
while True:
|
93 |
+
# URL encode the cursor parameter
|
94 |
+
params_encoded = params.copy()
|
95 |
+
params_encoded["cursor"] = params["cursor"].replace("+", "%2B")
|
96 |
+
|
97 |
+
# Construct the full URL with parameters
|
98 |
+
url = base_url + "&" + urlencode(params_encoded)
|
99 |
+
|
100 |
+
# Make the API request
|
101 |
+
try:
|
102 |
+
response = requests.get(url)
|
103 |
+
response.raise_for_status()
|
104 |
+
data = response.json()
|
105 |
+
except requests.exceptions.RequestException as e:
|
106 |
+
st.error(f"Steam API Error: {e}")
|
107 |
+
return None
|
108 |
+
|
109 |
+
# Check if the request was successful
|
110 |
+
if data.get("success") != 1:
|
111 |
+
st.error("Failed to fetch reviews from Steam API.")
|
112 |
+
return None
|
113 |
+
|
114 |
+
# Append reviews to the list
|
115 |
+
reviews = data.get("reviews", [])
|
116 |
+
if not reviews:
|
117 |
+
# No more reviews
|
118 |
+
break
|
119 |
+
|
120 |
+
for review in reviews:
|
121 |
+
# Filter reviews based on timestamp
|
122 |
+
if start_timestamp <= review.get("timestamp_created", 0) <= end_timestamp:
|
123 |
+
reviews_list.append(review)
|
124 |
+
if len(reviews_list) >= max_reviews:
|
125 |
+
break
|
126 |
+
elif review.get("timestamp_created", 0) < start_timestamp:
|
127 |
+
# Since reviews are ordered by most recent, we can break early
|
128 |
+
break
|
129 |
+
|
130 |
+
# Update the cursor for the next batch
|
131 |
+
new_cursor = data.get("cursor")
|
132 |
+
if params["cursor"] == new_cursor:
|
133 |
+
# Exit if the cursor hasn't changed to avoid infinite loop
|
134 |
+
break
|
135 |
+
params["cursor"] = new_cursor
|
136 |
+
|
137 |
+
# Check if maximum number of reviews fetched
|
138 |
+
if len(reviews_list) >= max_reviews:
|
139 |
+
break
|
140 |
+
|
141 |
+
# Increment request count and check limit
|
142 |
+
request_count += 1
|
143 |
+
if request_count >= max_requests:
|
144 |
+
st.warning("Reached maximum number of requests. Some reviews may not be fetched.")
|
145 |
+
break
|
146 |
+
|
147 |
+
# Optional: To avoid hitting the rate limit
|
148 |
+
time.sleep(0.2)
|
149 |
+
|
150 |
+
return reviews_list
|
151 |
+
|
152 |
+
# Fetch the reviews
|
153 |
+
reviews_data = fetch_reviews(max_reviews=200)
|
154 |
+
|
155 |
+
# Check if reviews were fetched
|
156 |
+
if reviews_data:
|
157 |
+
st.success(f"Fetched {len(reviews_data)} reviews from App ID {appid}.")
|
158 |
+
|
159 |
+
# Create a DataFrame from the review data
|
160 |
+
df = pd.DataFrame(
|
161 |
+
[
|
162 |
+
{
|
163 |
+
"Review ID": str(review.get("recommendationid")),
|
164 |
+
"Author SteamID": review.get("author", {}).get("steamid"),
|
165 |
+
"Language": review.get("language"),
|
166 |
+
"Review": review.get("review"),
|
167 |
+
"Posted On": datetime.fromtimestamp(
|
168 |
+
review.get("timestamp_created", 0)
|
169 |
+
).strftime("%Y-%m-%d %H:%M:%S"),
|
170 |
+
}
|
171 |
+
for review in reviews_data
|
172 |
+
]
|
173 |
+
)
|
174 |
+
|
175 |
+
# Function to extract themes using OpenAI GPT-4o
|
176 |
+
def extract_themes(df):
|
177 |
+
"""
|
178 |
+
Uses OpenAI's GPT-4o model to identify the most common themes,
|
179 |
+
provide descriptions, and list review IDs where each theme is mentioned.
|
180 |
+
|
181 |
+
Args:
|
182 |
+
df (DataFrame): DataFrame containing the reviews.
|
183 |
+
|
184 |
+
Returns:
|
185 |
+
DataFrame: A DataFrame containing themes, descriptions, and review references.
|
186 |
+
"""
|
187 |
+
# Combine reviews into a single string with IDs
|
188 |
+
reviews_text = "\n".join(
|
189 |
+
[
|
190 |
+
f"Review ID: {row['Review ID']}\nReview Text: {row['Review']}"
|
191 |
+
for _, row in df.iterrows()
|
192 |
+
]
|
193 |
+
)
|
194 |
+
|
195 |
+
# Prepare the prompt
|
196 |
+
prompt = f"""
|
197 |
+
Analyze the following user reviews and identify the most common themes or topics being discussed.
|
198 |
+
For each theme, provide a brief description and list the Review IDs where the theme is mentioned.
|
199 |
+
|
200 |
+
Provide the output as a JSON array matching the following structure:
|
201 |
+
[
|
202 |
+
{{"Theme": "<theme_name>", "Description": "<description>", "Review IDs": ["<review_id1>", "<review_id2>", ...]}},
|
203 |
+
...
|
204 |
+
]
|
205 |
+
|
206 |
+
Ensure the output is valid JSON.
|
207 |
+
|
208 |
+
Reviews:
|
209 |
+
{reviews_text}
|
210 |
+
"""
|
211 |
+
# Call OpenAI API
|
212 |
+
response = client.chat.completions.create(
|
213 |
+
model="gpt-4o",
|
214 |
+
messages=[{"role": "user", "content": prompt}],
|
215 |
+
temperature=1,
|
216 |
+
max_tokens=2048,
|
217 |
+
top_p=1,
|
218 |
+
frequency_penalty=0,
|
219 |
+
presence_penalty=0,
|
220 |
+
response_format={
|
221 |
+
"type": "json_schema",
|
222 |
+
"json_schema": {
|
223 |
+
"name": "analyze_game_reviews",
|
224 |
+
"strict": True,
|
225 |
+
"schema": {
|
226 |
+
"type": "object",
|
227 |
+
"properties": {
|
228 |
+
"parameters": {
|
229 |
+
"type": "object",
|
230 |
+
"properties": {
|
231 |
+
"themes": {
|
232 |
+
"type": "array",
|
233 |
+
"description": "List of themes identified in the game reviews",
|
234 |
+
"items": {
|
235 |
+
"type": "object",
|
236 |
+
"properties": {
|
237 |
+
"Theme": {
|
238 |
+
"type": "string",
|
239 |
+
"description": "The main theme derived from the game reviews.",
|
240 |
+
},
|
241 |
+
"Description": {
|
242 |
+
"type": "string",
|
243 |
+
"description": "A summary of the issues or sentiments related to the theme.",
|
244 |
+
},
|
245 |
+
"Review IDs": {
|
246 |
+
"type": "array",
|
247 |
+
"description": "Array of IDs for reviews that relate to this theme.",
|
248 |
+
"items": {
|
249 |
+
"type": "string",
|
250 |
+
"description": "Unique identifier for a review.",
|
251 |
+
},
|
252 |
+
},
|
253 |
+
},
|
254 |
+
"required": [
|
255 |
+
"Theme",
|
256 |
+
"Description",
|
257 |
+
"Review IDs",
|
258 |
+
],
|
259 |
+
"additionalProperties": False,
|
260 |
+
},
|
261 |
+
}
|
262 |
+
},
|
263 |
+
"required": ["themes"],
|
264 |
+
"additionalProperties": False,
|
265 |
+
}
|
266 |
+
},
|
267 |
+
"required": ["parameters"],
|
268 |
+
"additionalProperties": False,
|
269 |
+
},
|
270 |
+
},
|
271 |
+
},
|
272 |
+
)
|
273 |
+
|
274 |
+
# Get the response content
|
275 |
+
response_text = response.choices[0].message.content # Extract the text
|
276 |
+
|
277 |
+
# Parse the JSON output
|
278 |
+
try:
|
279 |
+
response_data = json.loads(response_text)
|
280 |
+
themes_list = (
|
281 |
+
response_data.get("parameters", {}).get("themes", [])
|
282 |
+
) # Extract the 'themes' list
|
283 |
+
themes_df = pd.DataFrame(themes_list) # Create DataFrame from the list
|
284 |
+
return themes_df
|
285 |
+
except ValueError as ve:
|
286 |
+
st.error(f"Error parsing JSON response: {ve}")
|
287 |
+
st.error(f"Raw API Response: {response_text}")
|
288 |
+
return None
|
289 |
+
|
290 |
+
# Extract themes
|
291 |
+
themes_df = extract_themes(df)
|
292 |
+
|
293 |
+
if themes_df is not None:
|
294 |
+
st.write("### Most Common Themes")
|
295 |
+
st.dataframe(themes_df)
|
296 |
+
|
297 |
+
# Display detailed information for each theme
|
298 |
+
for index, row in themes_df.iterrows():
|
299 |
+
theme = row["Theme"]
|
300 |
+
description = row["Description"]
|
301 |
+
review_ids = row["Review IDs"]
|
302 |
+
|
303 |
+
st.write(f"#### Theme: {theme}")
|
304 |
+
st.write(f"**Description:** {description}")
|
305 |
+
st.write(f"**Mentioned in {len(review_ids)} reviews.**")
|
306 |
+
|
307 |
+
# Get the reviews that mention the theme
|
308 |
+
try:
|
309 |
+
reviews_with_theme = df[
|
310 |
+
df["Review ID"].isin(review_ids)
|
311 |
+
][["Review ID", "Review"]]
|
312 |
+
st.write("Reviews mentioning this theme:")
|
313 |
+
st.dataframe(reviews_with_theme)
|
314 |
+
except ValueError as e:
|
315 |
+
st.error(
|
316 |
+
f"Error displaying reviews for theme '{theme}': {e}"
|
317 |
+
)
|
318 |
+
else:
|
319 |
+
st.warning("Failed to extract themes.")
|
320 |
+
|
321 |
+
else:
|
322 |
+
st.warning("No reviews to display.")
|