AIRider commited on
Commit
6ce99f0
Β·
verified Β·
1 Parent(s): 12e0bbb

Update app.py

Browse files
Files changed (1) hide show
  1. app.py +22 -22
app.py CHANGED
@@ -11,7 +11,7 @@ import os
11
  from huggingface_hub import InferenceClient # Hugging Face Hub API μ‚¬μš©
12
 
13
  # 여기에 YouTube API ν‚€λ₯Ό μž…λ ₯ν•˜μ„Έμš”
14
- YOUTUBE_API_KEY = "AIzaSyA9DEIHCYexeF2gSFW8cF6E3JTu9BhYxLc"
15
 
16
  def create_client(model_name):
17
  token = os.getenv("HF_TOKEN")
@@ -35,13 +35,13 @@ def get_video_stats(video_id):
35
  comment_count = int(video["statistics"].get("commentCount", 0))
36
 
37
  return {
38
- "Video ID": video_id,
39
- "Title": title,
40
- "publishedAt": publish_time,
41
- "Channel ID": channel_id,
42
- "View Count": view_count,
43
- "Like Count": like_count,
44
- "Comment Count": comment_count
45
  }
46
 
47
  def get_channel_stats(channel_id):
@@ -87,9 +87,9 @@ def get_video_data(query, max_results, published_after, published_before):
87
  video_stats = []
88
  for video_id in video_ids:
89
  stats = get_video_stats(video_id)
90
- channel_id = stats["Channel ID"]
91
  subscriber_count = get_channel_stats(channel_id)
92
- stats["Subscriber Count"] = subscriber_count
93
  video_stats.append(stats)
94
 
95
  video_stats_df = pd.DataFrame(video_stats)
@@ -98,39 +98,39 @@ def get_video_data(query, max_results, published_after, published_before):
98
  def download_csv(df, filename):
99
  csv = df.to_csv(index=False)
100
  b64 = base64.b64encode(csv.encode()).decode()
101
- href = f'<a href="data:file/csv;base64,{b64}" download="{filename}.csv">Download {filename} CSV</a>'
102
  return href
103
 
104
  def visualize_video_ranking(video_stats_df):
105
- video_stats_df["Active_Index"] = video_stats_df["View Count"] / video_stats_df["Subscriber Count"]
106
 
107
  csv_download_link = download_csv(video_stats_df, "video_stats")
108
 
109
- fig = px.bar(video_stats_df, x="Video ID", y="Active_Index", color="View Count",
110
- labels={"Video ID": "Video ID", "Active_Index": "Active_Index"},
111
- title="Video Active Index")
112
  fig.update_layout(height=500, width=500)
113
 
114
  return video_stats_df, fig, csv_download_link
115
 
116
  def analyze_titles(video_stats_df, n_clusters=5):
117
- titles = video_stats_df['Title'].tolist()
118
  vectorizer = TfidfVectorizer()
119
  tfidf_matrix = vectorizer.fit_transform(titles)
120
 
121
  kmeans = KMeans(n_clusters=n_clusters, random_state=42)
122
  kmeans.fit(tfidf_matrix)
123
  labels = kmeans.labels_
124
- video_stats_df["Cluster"] = labels
125
 
126
  cluster_summaries = []
127
  for i in range(n_clusters):
128
- cluster_titles = video_stats_df[video_stats_df["Cluster"] == i]['Title'].tolist()
129
  cluster_text = ' '.join(cluster_titles)
130
  summary = summarize_cluster(cluster_text, i)
131
  cluster_summaries.append(summary)
132
 
133
- cluster_summary_df = pd.DataFrame({'Cluster': range(n_clusters), 'Summary': cluster_summaries})
134
  return cluster_summary_df
135
 
136
  def summarize_cluster(cluster_text, cluster_num):
@@ -140,7 +140,7 @@ def summarize_cluster(cluster_text, cluster_num):
140
 
141
  def main(query, max_results, period, page, n_clusters=5):
142
  if query:
143
- # κΈ°κ°„ μ„€μ •
144
  now = datetime.utcnow()
145
  published_before = now.isoformat("T") + "Z"
146
  if period == "1주일":
@@ -166,10 +166,10 @@ iface = gr.Interface(
166
  fn=main,
167
  inputs=[
168
  gr.components.Textbox(label="검색 쿼리"),
169
- gr.components.Textbox(label="μ΅œλŒ€ κ²°κ³Ό 수", type="number"),
170
  gr.components.Dropdown(["1주일", "1κ°œμ›”", "3κ°œμ›”"], label="κΈ°κ°„"),
171
  gr.components.Dropdown(["Video Ranking", "Title Analysis"], label="νŽ˜μ΄μ§€"),
172
- gr.components.Textbox(label="ν΄λŸ¬μŠ€ν„° 수", type="number")
173
  ],
174
  outputs=[
175
  gr.components.Dataframe(label="κ²°κ³Ό"),
 
11
  from huggingface_hub import InferenceClient # Hugging Face Hub API μ‚¬μš©
12
 
13
  # 여기에 YouTube API ν‚€λ₯Ό μž…λ ₯ν•˜μ„Έμš”
14
+ YOUTUBE_API_KEY = "YOUR_YOUTUBE_API_KEY"
15
 
16
  def create_client(model_name):
17
  token = os.getenv("HF_TOKEN")
 
35
  comment_count = int(video["statistics"].get("commentCount", 0))
36
 
37
  return {
38
+ "λ™μ˜μƒ ID": video_id,
39
+ "제λͺ©": title,
40
+ "κ²Œμ‹œ μ‹œκ°„": publish_time,
41
+ "채널 ID": channel_id,
42
+ "쑰회수": view_count,
43
+ "μ’‹μ•„μš” 수": like_count,
44
+ "λŒ“κΈ€ 수": comment_count
45
  }
46
 
47
  def get_channel_stats(channel_id):
 
87
  video_stats = []
88
  for video_id in video_ids:
89
  stats = get_video_stats(video_id)
90
+ channel_id = stats["채널 ID"]
91
  subscriber_count = get_channel_stats(channel_id)
92
+ stats["κ΅¬λ…μž 수"] = subscriber_count
93
  video_stats.append(stats)
94
 
95
  video_stats_df = pd.DataFrame(video_stats)
 
98
  def download_csv(df, filename):
99
  csv = df.to_csv(index=False)
100
  b64 = base64.b64encode(csv.encode()).decode()
101
+ href = f'<a href="data:file/csv;base64,{b64}" download="{filename}.csv">λ‹€μš΄λ‘œλ“œ {filename} CSV</a>'
102
  return href
103
 
104
  def visualize_video_ranking(video_stats_df):
105
+ video_stats_df["ν™œμ„± μ§€μˆ˜"] = video_stats_df["쑰회수"] / video_stats_df["κ΅¬λ…μž 수"]
106
 
107
  csv_download_link = download_csv(video_stats_df, "video_stats")
108
 
109
+ fig = px.bar(video_stats_df, x="λ™μ˜μƒ ID", y="ν™œμ„± μ§€μˆ˜", color="쑰회수",
110
+ labels={"λ™μ˜μƒ ID": "λ™μ˜μƒ ID", "ν™œμ„± μ§€μˆ˜": "ν™œμ„± μ§€μˆ˜"},
111
+ title="λ™μ˜μƒ ν™œμ„± μ§€μˆ˜")
112
  fig.update_layout(height=500, width=500)
113
 
114
  return video_stats_df, fig, csv_download_link
115
 
116
  def analyze_titles(video_stats_df, n_clusters=5):
117
+ titles = video_stats_df['제λͺ©'].tolist()
118
  vectorizer = TfidfVectorizer()
119
  tfidf_matrix = vectorizer.fit_transform(titles)
120
 
121
  kmeans = KMeans(n_clusters=n_clusters, random_state=42)
122
  kmeans.fit(tfidf_matrix)
123
  labels = kmeans.labels_
124
+ video_stats_df["ν΄λŸ¬μŠ€ν„°"] = labels
125
 
126
  cluster_summaries = []
127
  for i in range(n_clusters):
128
+ cluster_titles = video_stats_df[video_stats_df["ν΄λŸ¬μŠ€ν„°"] == i]['제λͺ©'].tolist()
129
  cluster_text = ' '.join(cluster_titles)
130
  summary = summarize_cluster(cluster_text, i)
131
  cluster_summaries.append(summary)
132
 
133
+ cluster_summary_df = pd.DataFrame({'ν΄λŸ¬μŠ€ν„°': range(n_clusters), 'μš”μ•½': cluster_summaries})
134
  return cluster_summary_df
135
 
136
  def summarize_cluster(cluster_text, cluster_num):
 
140
 
141
  def main(query, max_results, period, page, n_clusters=5):
142
  if query:
143
+ # κΈ°κ°„ μ„€μ •
144
  now = datetime.utcnow()
145
  published_before = now.isoformat("T") + "Z"
146
  if period == "1주일":
 
166
  fn=main,
167
  inputs=[
168
  gr.components.Textbox(label="검색 쿼리"),
169
+ gr.components.Number(label="μ΅œλŒ€ κ²°κ³Ό 수"),
170
  gr.components.Dropdown(["1주일", "1κ°œμ›”", "3κ°œμ›”"], label="κΈ°κ°„"),
171
  gr.components.Dropdown(["Video Ranking", "Title Analysis"], label="νŽ˜μ΄μ§€"),
172
+ gr.components.Number(label="ν΄λŸ¬μŠ€ν„° 수")
173
  ],
174
  outputs=[
175
  gr.components.Dataframe(label="κ²°κ³Ό"),