Spaces:

AIRider
/

Youtube_Anal_v1

Sleeping

App Files Files Community

AIRider commited on Jul 25, 2024

Commit

6ce99f0

verified ·

1 Parent(s): 12e0bbb

Update app.py

Browse files

Files changed (1) hide show

app.py +22 -22

app.py CHANGED Viewed

@@ -11,7 +11,7 @@ import os
 from huggingface_hub import InferenceClient  # Hugging Face Hub API 사용
 # 여기에 YouTube API 키를 입력하세요
-YOUTUBE_API_KEY = "AIzaSyA9DEIHCYexeF2gSFW8cF6E3JTu9BhYxLc"
 def create_client(model_name):
     token = os.getenv("HF_TOKEN")
@@ -35,13 +35,13 @@ def get_video_stats(video_id):
     comment_count = int(video["statistics"].get("commentCount", 0))
     return {
-        "Video ID": video_id,
-        "Title": title,
-        "publishedAt": publish_time,
-        "Channel ID": channel_id,
-        "View Count": view_count,
-        "Like Count": like_count,
-        "Comment Count": comment_count
     }
 def get_channel_stats(channel_id):
@@ -87,9 +87,9 @@ def get_video_data(query, max_results, published_after, published_before):
     video_stats = []
     for video_id in video_ids:
         stats = get_video_stats(video_id)
-        channel_id = stats["Channel ID"]
         subscriber_count = get_channel_stats(channel_id)
-        stats["Subscriber Count"] = subscriber_count
         video_stats.append(stats)
     video_stats_df = pd.DataFrame(video_stats)
@@ -98,39 +98,39 @@ def get_video_data(query, max_results, published_after, published_before):
 def download_csv(df, filename):
     csv = df.to_csv(index=False)
     b64 = base64.b64encode(csv.encode()).decode()
-    href = f'<a href="data:file/csv;base64,{b64}" download="{filename}.csv">Download {filename} CSV</a>'
     return href
 def visualize_video_ranking(video_stats_df):
-    video_stats_df["Active_Index"] = video_stats_df["View Count"] / video_stats_df["Subscriber Count"]
     csv_download_link = download_csv(video_stats_df, "video_stats")
-    fig = px.bar(video_stats_df, x="Video ID", y="Active_Index", color="View Count",
-                 labels={"Video ID": "Video ID", "Active_Index": "Active_Index"},
-                 title="Video Active Index")
     fig.update_layout(height=500, width=500)
     return video_stats_df, fig, csv_download_link
 def analyze_titles(video_stats_df, n_clusters=5):
-    titles = video_stats_df['Title'].tolist()
     vectorizer = TfidfVectorizer()
     tfidf_matrix = vectorizer.fit_transform(titles)
     kmeans = KMeans(n_clusters=n_clusters, random_state=42)
     kmeans.fit(tfidf_matrix)
     labels = kmeans.labels_
-    video_stats_df["Cluster"] = labels
     cluster_summaries = []
     for i in range(n_clusters):
-        cluster_titles = video_stats_df[video_stats_df["Cluster"] == i]['Title'].tolist()
         cluster_text = ' '.join(cluster_titles)
         summary = summarize_cluster(cluster_text, i)
         cluster_summaries.append(summary)
-    cluster_summary_df = pd.DataFrame({'Cluster': range(n_clusters), 'Summary': cluster_summaries})
     return cluster_summary_df
 def summarize_cluster(cluster_text, cluster_num):
@@ -140,7 +140,7 @@ def summarize_cluster(cluster_text, cluster_num):
 def main(query, max_results, period, page, n_clusters=5):
     if query:
-        #  기간 설정
         now = datetime.utcnow()
         published_before = now.isoformat("T") + "Z"
         if period == "1주일":
@@ -166,10 +166,10 @@ iface = gr.Interface(
     fn=main,
     inputs=[
         gr.components.Textbox(label="검색 쿼리"),
-        gr.components.Textbox(label="최대 결과 수", type="number"),
         gr.components.Dropdown(["1주일", "1개월", "3개월"], label="기간"),
         gr.components.Dropdown(["Video Ranking", "Title Analysis"], label="페이지"),
-        gr.components.Textbox(label="클러스터 수", type="number")
     ],
     outputs=[
         gr.components.Dataframe(label="결과"),

 from huggingface_hub import InferenceClient  # Hugging Face Hub API 사용
 # 여기에 YouTube API 키를 입력하세요
+YOUTUBE_API_KEY = "YOUR_YOUTUBE_API_KEY"
 def create_client(model_name):
     token = os.getenv("HF_TOKEN")
     comment_count = int(video["statistics"].get("commentCount", 0))
     return {
+        "동영상 ID": video_id,
+        "제목": title,
+        "게시 시간": publish_time,
+        "채널 ID": channel_id,
+        "조회수": view_count,
+        "좋아요 수": like_count,
+        "댓글 수": comment_count
     }
 def get_channel_stats(channel_id):
     video_stats = []
     for video_id in video_ids:
         stats = get_video_stats(video_id)
+        channel_id = stats["채널 ID"]
         subscriber_count = get_channel_stats(channel_id)
+        stats["구독자 수"] = subscriber_count
         video_stats.append(stats)
     video_stats_df = pd.DataFrame(video_stats)
 def download_csv(df, filename):
     csv = df.to_csv(index=False)
     b64 = base64.b64encode(csv.encode()).decode()
+    href = f'<a href="data:file/csv;base64,{b64}" download="{filename}.csv">다운로드 {filename} CSV</a>'
     return href
 def visualize_video_ranking(video_stats_df):
+    video_stats_df["활성 지수"] = video_stats_df["조회수"] / video_stats_df["구독자 수"]
     csv_download_link = download_csv(video_stats_df, "video_stats")
+    fig = px.bar(video_stats_df, x="동영상 ID", y="활성 지수", color="조회수",
+                 labels={"동영상 ID": "동영상 ID", "활성 지수": "활성 지수"},
+                 title="동영상 활성 지수")
     fig.update_layout(height=500, width=500)
     return video_stats_df, fig, csv_download_link
 def analyze_titles(video_stats_df, n_clusters=5):
+    titles = video_stats_df['제목'].tolist()
     vectorizer = TfidfVectorizer()
     tfidf_matrix = vectorizer.fit_transform(titles)
     kmeans = KMeans(n_clusters=n_clusters, random_state=42)
     kmeans.fit(tfidf_matrix)
     labels = kmeans.labels_
+    video_stats_df["클러스터"] = labels
     cluster_summaries = []
     for i in range(n_clusters):
+        cluster_titles = video_stats_df[video_stats_df["클러스터"] == i]['제목'].tolist()
         cluster_text = ' '.join(cluster_titles)
         summary = summarize_cluster(cluster_text, i)
         cluster_summaries.append(summary)
+    cluster_summary_df = pd.DataFrame({'클러스터': range(n_clusters), '요약': cluster_summaries})
     return cluster_summary_df
 def summarize_cluster(cluster_text, cluster_num):
 def main(query, max_results, period, page, n_clusters=5):
     if query:
+        # 기간 설정
         now = datetime.utcnow()
         published_before = now.isoformat("T") + "Z"
         if period == "1주일":
     fn=main,
     inputs=[
         gr.components.Textbox(label="검색 쿼리"),
+        gr.components.Number(label="최대 결과 수"),
         gr.components.Dropdown(["1주일", "1개월", "3개월"], label="기간"),
         gr.components.Dropdown(["Video Ranking", "Title Analysis"], label="페이지"),
+        gr.components.Number(label="클러스터 수")
     ],
     outputs=[
         gr.components.Dataframe(label="결과"),