AIRider commited on
Commit
64db1cd
ยท
verified ยท
1 Parent(s): 671c386

Create app.py

Browse files
Files changed (1) hide show
  1. app.py +184 -0
app.py ADDED
@@ -0,0 +1,184 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ import gradio as gr
2
+ import pandas as pd
3
+ from googleapiclient.discovery import build
4
+ import plotly.express as px
5
+ import base64
6
+ import numpy as np
7
+ from sklearn.feature_extraction.text import TfidfVectorizer
8
+ from sklearn.cluster import KMeans
9
+ from datetime import datetime, timedelta
10
+ import os
11
+ from transformers import InferenceClient # ๊ฐ€์ •: transformers ๋ชจ๋“ˆ ์‚ฌ์šฉ
12
+
13
+ def create_client(model_name):
14
+ return InferenceClient(model_name, token=os.getenv("HF_TOKEN"))
15
+
16
+ client = create_client("CohereForAI/c4ai-command-r-plus")
17
+
18
+ def get_video_stats(api_key, video_id):
19
+ youtube = build("youtube", "v3", developerKey=api_key)
20
+ video_response = youtube.videos().list(
21
+ part="snippet,statistics",
22
+ id=video_id
23
+ ).execute()
24
+
25
+ video = video_response["items"][0]
26
+ title = video["snippet"]["title"]
27
+ channel_id = video["snippet"]["channelId"]
28
+ publish_time = video["snippet"]["publishedAt"]
29
+ view_count = int(video["statistics"].get("viewCount", 0))
30
+ like_count = int(video["statistics"].get("likeCount", 0))
31
+ comment_count = int(video["statistics"].get("commentCount", 0))
32
+
33
+ return {
34
+ "Video ID": video_id,
35
+ "Title": title,
36
+ "publishedAt": publish_time,
37
+ "Channel ID": channel_id,
38
+ "View Count": view_count,
39
+ "Like Count": like_count,
40
+ "Comment Count": comment_count
41
+ }
42
+
43
+ def get_channel_stats(api_key, channel_id):
44
+ youtube = build("youtube", "v3", developerKey=api_key)
45
+ channel_response = youtube.channels().list(
46
+ part="statistics",
47
+ id=channel_id
48
+ ).execute()
49
+
50
+ if channel_response["items"]:
51
+ channel = channel_response["items"][0]
52
+ subscriber_count = int(channel["statistics"]["subscriberCount"])
53
+ else:
54
+ subscriber_count = 0
55
+
56
+ return subscriber_count
57
+
58
+ def get_video_data(api_key, query, max_results, published_after, published_before):
59
+ youtube = build("youtube", "v3", developerKey=api_key)
60
+ video_ids = []
61
+ next_page_token = None
62
+
63
+ while len(video_ids) < max_results:
64
+ search_response = youtube.search().list(
65
+ q=query,
66
+ type="video",
67
+ part="id",
68
+ maxResults=50,
69
+ pageToken=next_page_token,
70
+ order="viewCount",
71
+ publishedAfter=published_after,
72
+ publishedBefore=published_before
73
+ ).execute()
74
+
75
+ video_ids.extend([item["id"]["videoId"] for item in search_response["items"]])
76
+ next_page_token = search_response.get("nextPageToken")
77
+
78
+ if not next_page_token:
79
+ break
80
+
81
+ video_ids = video_ids[:max_results]
82
+
83
+ video_stats = []
84
+ for video_id in video_ids:
85
+ stats = get_video_stats(api_key, video_id)
86
+ channel_id = stats["Channel ID"]
87
+ subscriber_count = get_channel_stats(api_key, channel_id)
88
+ stats["Subscriber Count"] = subscriber_count
89
+ video_stats.append(stats)
90
+
91
+ video_stats_df = pd.DataFrame(video_stats)
92
+ return video_stats_df
93
+
94
+ def download_csv(df, filename):
95
+ csv = df.to_csv(index=False)
96
+ b64 = base64.b64encode(csv.encode()).decode()
97
+ href = f'<a href="data:file/csv;base64,{b64}" download="{filename}.csv">Download {filename} CSV</a>'
98
+ return href
99
+
100
+ def visualize_video_ranking(video_stats_df):
101
+ video_stats_df["Active_Index"] = video_stats_df["View Count"] / video_stats_df["Subscriber Count"]
102
+
103
+ csv_download_link = download_csv(video_stats_df, "video_stats")
104
+
105
+ fig = px.bar(video_stats_df, x="Video ID", y="Active_Index", color="View Count",
106
+ labels={"Video ID": "Video ID", "Active_Index": "Active_Index"},
107
+ title="Video Active Index")
108
+ fig.update_layout(height=500, width=500)
109
+
110
+ return video_stats_df, fig, csv_download_link
111
+
112
+ def analyze_titles(video_stats_df, n_clusters=5):
113
+ titles = video_stats_df['Title'].tolist()
114
+ vectorizer = TfidfVectorizer()
115
+ tfidf_matrix = vectorizer.fit_transform(titles)
116
+
117
+ kmeans = KMeans(n_clusters=n_clusters, random_state=42)
118
+ kmeans.fit(tfidf_matrix)
119
+ labels = kmeans.labels_
120
+ video_stats_df["Cluster"] = labels
121
+
122
+ cluster_summaries = []
123
+ for i in range(n_clusters):
124
+ cluster_titles = video_stats_df[video_stats_df["Cluster"] == i]['Title'].tolist()
125
+ cluster_text = ' '.join(cluster_titles)
126
+ summary = summarize_cluster(cluster_text, i)
127
+ cluster_summaries.append(summary)
128
+
129
+ cluster_summary_df = pd.DataFrame({'Cluster': range(n_clusters), 'Summary': cluster_summaries})
130
+ return cluster_summary_df
131
+
132
+ def summarize_cluster(cluster_text, cluster_num):
133
+ response = client.generate(
134
+ prompt=f"๋‹ค์Œ ๋™์˜์ƒ์„ ๋ถ„์„ํ•˜์—ฌ ์š”์•ฝํ•˜๊ณ , 500์ž ์ด๋‚ด๋กœ ๋™์˜์ƒ์˜ ํŠน์ง• ๋ฐ ์ธ๊ธฐ ์š”์ธ์„ ์„ค๋ช…ํ•ด์ฃผ์„ธ์š”: {cluster_text}",
135
+ max_tokens=500
136
+ )
137
+ summary = response['choices'][0]['text'].strip()
138
+ return summary
139
+
140
+ def main(api_key, query, max_results, period, page, n_clusters=5):
141
+ if query:
142
+ # ๊ธฐ๊ฐ„ ์„ค์ •
143
+ now = datetime.utcnow()
144
+ published_before = now.isoformat("T") + "Z"
145
+ if period == "1์ฃผ์ผ":
146
+ published_after = (now - timedelta(days=7)).isoformat("T") + "Z"
147
+ elif period == "1๊ฐœ์›”":
148
+ published_after = (now - timedelta(days=30)).isoformat("T") + "Z"
149
+ elif period == "3๊ฐœ์›”":
150
+ published_after = (now - timedelta(days=90)).isoformat("T") + "Z"
151
+ else:
152
+ published_after = (now - timedelta(days=30)).isoformat("T") + "Z" # ๊ธฐ๋ณธ๊ฐ’ 1๊ฐœ์›”
153
+
154
+ video_stats_df = get_video_data(api_key, query, max_results, published_after, published_before)
155
+
156
+ if page == "Video Ranking":
157
+ video_stats_df, fig, csv_download_link = visualize_video_ranking(video_stats_df)
158
+ return video_stats_df, fig, csv_download_link
159
+
160
+ elif page == "Title Analysis":
161
+ cluster_summary_df = analyze_titles(video_stats_df, n_clusters)
162
+ return cluster_summary_df, None, None
163
+
164
+ iface = gr.Interface(
165
+ fn=main,
166
+ inputs=[
167
+ gr.components.Textbox(label="YouTube API Key๋ฅผ ์ž…๋ ฅํ•˜์„ธ์š”", type="password"),
168
+ gr.components.Textbox(label="๊ฒ€์ƒ‰ ์ฟผ๋ฆฌ"),
169
+ gr.components.Slider(minimum=1, maximum=1000, value=5, label="์ตœ๋Œ€ ๊ฒฐ๊ณผ ์ˆ˜"),
170
+ gr.components.Dropdown(["1์ฃผ์ผ", "1๊ฐœ์›”", "3๊ฐœ์›”"], label="๊ธฐ๊ฐ„"),
171
+ gr.components.Dropdown(["Video Ranking", "Title Analysis"], label="ํŽ˜์ด์ง€"),
172
+ gr.components.Slider(minimum=2, maximum=10, value=5, label="ํด๋Ÿฌ์Šคํ„ฐ ์ˆ˜")
173
+ ],
174
+ outputs=[
175
+ gr.components.Dataframe(label="๊ฒฐ๊ณผ"),
176
+ gr.components.Plot(label="๊ทธ๋ž˜ํ”„"),
177
+ gr.components.HTML(label="CSV ๋‹ค์šด๋กœ๋“œ ๋งํฌ")
178
+ ],
179
+ live=False,
180
+ title="YouTube ๋ถ„์„ ๋„๊ตฌ"
181
+ )
182
+
183
+ if __name__ == "__main__":
184
+ iface.launch()