Spaces:
Sleeping
Sleeping
Update app.py
Browse files
app.py
CHANGED
@@ -11,7 +11,7 @@ import os
|
|
11 |
from huggingface_hub import InferenceClient # Hugging Face Hub API μ¬μ©
|
12 |
|
13 |
# μ¬κΈ°μ YouTube API ν€λ₯Ό μ
λ ₯νμΈμ
|
14 |
-
YOUTUBE_API_KEY = "
|
15 |
|
16 |
def create_client(model_name):
|
17 |
token = os.getenv("HF_TOKEN")
|
@@ -35,13 +35,13 @@ def get_video_stats(video_id):
|
|
35 |
comment_count = int(video["statistics"].get("commentCount", 0))
|
36 |
|
37 |
return {
|
38 |
-
"
|
39 |
-
"
|
40 |
-
"
|
41 |
-
"
|
42 |
-
"
|
43 |
-
"
|
44 |
-
"
|
45 |
}
|
46 |
|
47 |
def get_channel_stats(channel_id):
|
@@ -87,9 +87,9 @@ def get_video_data(query, max_results, published_after, published_before):
|
|
87 |
video_stats = []
|
88 |
for video_id in video_ids:
|
89 |
stats = get_video_stats(video_id)
|
90 |
-
channel_id = stats["
|
91 |
subscriber_count = get_channel_stats(channel_id)
|
92 |
-
stats["
|
93 |
video_stats.append(stats)
|
94 |
|
95 |
video_stats_df = pd.DataFrame(video_stats)
|
@@ -98,39 +98,39 @@ def get_video_data(query, max_results, published_after, published_before):
|
|
98 |
def download_csv(df, filename):
|
99 |
csv = df.to_csv(index=False)
|
100 |
b64 = base64.b64encode(csv.encode()).decode()
|
101 |
-
href = f'<a href="data:file/csv;base64,{b64}" download="{filename}.csv"
|
102 |
return href
|
103 |
|
104 |
def visualize_video_ranking(video_stats_df):
|
105 |
-
video_stats_df["
|
106 |
|
107 |
csv_download_link = download_csv(video_stats_df, "video_stats")
|
108 |
|
109 |
-
fig = px.bar(video_stats_df, x="
|
110 |
-
labels={"
|
111 |
-
title="
|
112 |
fig.update_layout(height=500, width=500)
|
113 |
|
114 |
return video_stats_df, fig, csv_download_link
|
115 |
|
116 |
def analyze_titles(video_stats_df, n_clusters=5):
|
117 |
-
titles = video_stats_df['
|
118 |
vectorizer = TfidfVectorizer()
|
119 |
tfidf_matrix = vectorizer.fit_transform(titles)
|
120 |
|
121 |
kmeans = KMeans(n_clusters=n_clusters, random_state=42)
|
122 |
kmeans.fit(tfidf_matrix)
|
123 |
labels = kmeans.labels_
|
124 |
-
video_stats_df["
|
125 |
|
126 |
cluster_summaries = []
|
127 |
for i in range(n_clusters):
|
128 |
-
cluster_titles = video_stats_df[video_stats_df["
|
129 |
cluster_text = ' '.join(cluster_titles)
|
130 |
summary = summarize_cluster(cluster_text, i)
|
131 |
cluster_summaries.append(summary)
|
132 |
|
133 |
-
cluster_summary_df = pd.DataFrame({'
|
134 |
return cluster_summary_df
|
135 |
|
136 |
def summarize_cluster(cluster_text, cluster_num):
|
@@ -140,7 +140,7 @@ def summarize_cluster(cluster_text, cluster_num):
|
|
140 |
|
141 |
def main(query, max_results, period, page, n_clusters=5):
|
142 |
if query:
|
143 |
-
#
|
144 |
now = datetime.utcnow()
|
145 |
published_before = now.isoformat("T") + "Z"
|
146 |
if period == "1μ£ΌμΌ":
|
@@ -166,10 +166,10 @@ iface = gr.Interface(
|
|
166 |
fn=main,
|
167 |
inputs=[
|
168 |
gr.components.Textbox(label="κ²μ 쿼리"),
|
169 |
-
gr.components.
|
170 |
gr.components.Dropdown(["1μ£ΌμΌ", "1κ°μ", "3κ°μ"], label="κΈ°κ°"),
|
171 |
gr.components.Dropdown(["Video Ranking", "Title Analysis"], label="νμ΄μ§"),
|
172 |
-
gr.components.
|
173 |
],
|
174 |
outputs=[
|
175 |
gr.components.Dataframe(label="κ²°κ³Ό"),
|
|
|
11 |
from huggingface_hub import InferenceClient # Hugging Face Hub API μ¬μ©
|
12 |
|
13 |
# μ¬κΈ°μ YouTube API ν€λ₯Ό μ
λ ₯νμΈμ
|
14 |
+
YOUTUBE_API_KEY = "YOUR_YOUTUBE_API_KEY"
|
15 |
|
16 |
def create_client(model_name):
|
17 |
token = os.getenv("HF_TOKEN")
|
|
|
35 |
comment_count = int(video["statistics"].get("commentCount", 0))
|
36 |
|
37 |
return {
|
38 |
+
"λμμ ID": video_id,
|
39 |
+
"μ λͺ©": title,
|
40 |
+
"κ²μ μκ°": publish_time,
|
41 |
+
"μ±λ ID": channel_id,
|
42 |
+
"μ‘°νμ": view_count,
|
43 |
+
"μ’μμ μ": like_count,
|
44 |
+
"λκΈ μ": comment_count
|
45 |
}
|
46 |
|
47 |
def get_channel_stats(channel_id):
|
|
|
87 |
video_stats = []
|
88 |
for video_id in video_ids:
|
89 |
stats = get_video_stats(video_id)
|
90 |
+
channel_id = stats["μ±λ ID"]
|
91 |
subscriber_count = get_channel_stats(channel_id)
|
92 |
+
stats["ꡬλ
μ μ"] = subscriber_count
|
93 |
video_stats.append(stats)
|
94 |
|
95 |
video_stats_df = pd.DataFrame(video_stats)
|
|
|
98 |
def download_csv(df, filename):
|
99 |
csv = df.to_csv(index=False)
|
100 |
b64 = base64.b64encode(csv.encode()).decode()
|
101 |
+
href = f'<a href="data:file/csv;base64,{b64}" download="{filename}.csv">λ€μ΄λ‘λ {filename} CSV</a>'
|
102 |
return href
|
103 |
|
104 |
def visualize_video_ranking(video_stats_df):
|
105 |
+
video_stats_df["νμ± μ§μ"] = video_stats_df["μ‘°νμ"] / video_stats_df["ꡬλ
μ μ"]
|
106 |
|
107 |
csv_download_link = download_csv(video_stats_df, "video_stats")
|
108 |
|
109 |
+
fig = px.bar(video_stats_df, x="λμμ ID", y="νμ± μ§μ", color="μ‘°νμ",
|
110 |
+
labels={"λμμ ID": "λμμ ID", "νμ± μ§μ": "νμ± μ§μ"},
|
111 |
+
title="λμμ νμ± μ§μ")
|
112 |
fig.update_layout(height=500, width=500)
|
113 |
|
114 |
return video_stats_df, fig, csv_download_link
|
115 |
|
116 |
def analyze_titles(video_stats_df, n_clusters=5):
|
117 |
+
titles = video_stats_df['μ λͺ©'].tolist()
|
118 |
vectorizer = TfidfVectorizer()
|
119 |
tfidf_matrix = vectorizer.fit_transform(titles)
|
120 |
|
121 |
kmeans = KMeans(n_clusters=n_clusters, random_state=42)
|
122 |
kmeans.fit(tfidf_matrix)
|
123 |
labels = kmeans.labels_
|
124 |
+
video_stats_df["ν΄λ¬μ€ν°"] = labels
|
125 |
|
126 |
cluster_summaries = []
|
127 |
for i in range(n_clusters):
|
128 |
+
cluster_titles = video_stats_df[video_stats_df["ν΄λ¬μ€ν°"] == i]['μ λͺ©'].tolist()
|
129 |
cluster_text = ' '.join(cluster_titles)
|
130 |
summary = summarize_cluster(cluster_text, i)
|
131 |
cluster_summaries.append(summary)
|
132 |
|
133 |
+
cluster_summary_df = pd.DataFrame({'ν΄λ¬μ€ν°': range(n_clusters), 'μμ½': cluster_summaries})
|
134 |
return cluster_summary_df
|
135 |
|
136 |
def summarize_cluster(cluster_text, cluster_num):
|
|
|
140 |
|
141 |
def main(query, max_results, period, page, n_clusters=5):
|
142 |
if query:
|
143 |
+
# κΈ°κ° μ€μ
|
144 |
now = datetime.utcnow()
|
145 |
published_before = now.isoformat("T") + "Z"
|
146 |
if period == "1μ£ΌμΌ":
|
|
|
166 |
fn=main,
|
167 |
inputs=[
|
168 |
gr.components.Textbox(label="κ²μ 쿼리"),
|
169 |
+
gr.components.Number(label="μ΅λ κ²°κ³Ό μ"),
|
170 |
gr.components.Dropdown(["1μ£ΌμΌ", "1κ°μ", "3κ°μ"], label="κΈ°κ°"),
|
171 |
gr.components.Dropdown(["Video Ranking", "Title Analysis"], label="νμ΄μ§"),
|
172 |
+
gr.components.Number(label="ν΄λ¬μ€ν° μ")
|
173 |
],
|
174 |
outputs=[
|
175 |
gr.components.Dataframe(label="κ²°κ³Ό"),
|