Spaces:
Build error
Build error
| import time | |
| import pprint | |
| import csv | |
| from selenium import webdriver | |
| from selenium.webdriver.chrome.service import Service | |
| from webdriver_manager.chrome import ChromeDriverManager | |
| from selenium.webdriver.common.by import By | |
| import csv | |
| from youtube_comment_scraper_python import * | |
| import pandas as pd | |
| import plotly.express as px | |
| import re | |
| import streamlit as st | |
| st.title('Youtube Channel Analysis') | |
| st.write('Youtube WebScrap') | |
| # # ------------------------------------------------------------------------------CHANNEL DATA------------------------------------------------------------------------ | |
| driver = webdriver.Chrome(service=Service(ChromeDriverManager().install())) | |
| url = st.text_input('Paste the Youtube Channel Link',"") | |
| if not url: | |
| st.warning('Please input a Link.') | |
| st.stop() | |
| st.success('Thank you for inputting a link.') | |
| # url ='https://www.youtube.com/@YasoobKhalid/videos' | |
| name = re.compile(r"[A-Z]\w+") | |
| inp = name.findall(url) | |
| out = inp[0] | |
| st.write('Getting Data from', out, 'channel') | |
| driver.get(url) | |
| # url = input('Enter Youtube Video Url- ') | |
| # driver.get(url) | |
| # # "https://www.youtube.com/@YasoobKhalid/videos" | |
| # channel_title = driver.find_element(By.XPATH, '//yt-formatted-string[contains(@class, "ytd-channel-name")]').text | |
| handle = driver.find_element(By.XPATH, '//yt-formatted-string[@id="channel-handle"]').text | |
| subscriber_count = driver.find_element(By.XPATH, '//yt-formatted-string[@id="subscriber-count"]').text | |
| WAIT_IN_SECONDS = 5 | |
| last_height = driver.execute_script("return document.documentElement.scrollHeight") | |
| while True: | |
| # Scroll to the bottom of page | |
| driver.execute_script("window.scrollTo(0, arguments[0]);", last_height) | |
| # Wait for new videos to show up | |
| time.sleep(WAIT_IN_SECONDS) | |
| # Calculate new document height and compare it with last height | |
| new_height = driver.execute_script("return document.documentElement.scrollHeight") | |
| if new_height == last_height: | |
| break | |
| last_height = new_height | |
| thumbnails = driver.find_elements(By.XPATH, '//a[@id="thumbnail"]/yt-image/img') | |
| views = driver.find_elements(By.XPATH,'//div[@id="metadata-line"]/span[1]') | |
| titles = driver.find_elements(By.ID, "video-title") | |
| links = driver.find_elements(By.ID, "video-title-link") | |
| # likes = driver.find_elements(By.ID, "video-title-link-likes") | |
| videos = [] | |
| for title, view, thumb, link in zip(titles, views, thumbnails, links): | |
| video_dict = { | |
| 'title': title.text, | |
| 'views': view.text, | |
| # 'likes': likes.text, | |
| 'thumbnail': thumb.get_attribute('src'), | |
| 'link': link.get_attribute('href') | |
| } | |
| videos.append(video_dict) | |
| print(videos) | |
| to_csv = videos | |
| keys = to_csv[0].keys() | |
| with open('output/people.csv', 'w', newline='', encoding='utf-8') as output_file: | |
| dict_writer = csv.DictWriter(output_file, keys) | |
| dict_writer.writeheader() | |
| dict_writer.writerows(to_csv) | |
| df = pd.read_csv('output/people.csv') | |
| st.dataframe(df) | |
| count = st.slider('Select Lower Video Count', 0, 607, 100) | |
| st.write("You selected", count, 'Videos') | |
| fig = px.bar(df, | |
| x="title", | |
| y="views", height=600 | |
| ) | |
| fig.update_traces(textfont_size=12, textangle=0, textposition="outside", cliponaxis=False) | |
| # fig.update_yaxes(tickvals=['10k', '22k', '29k', '56k']) | |
| tab1, tab2 = st.tabs(["Streamlit theme (default)", "Plotly native theme"]) | |
| with tab1: | |
| # Use the Streamlit theme. | |
| # This is the default. So you can also omit the theme argument. | |
| st.plotly_chart(fig, theme="streamlit", use_container_width=True) | |
| with tab2: | |
| # Use the native Plotly theme. | |
| st.plotly_chart(fig, theme=None, use_container_width=True) | |
| # ----------------------------------------------------------------------------COMMENTS------------------------------------------------------------------------------ | |
| # url = input('Enter Youtube Video Url- ') | |
| # youtube.open(url) | |
| # youtube.keypress("pagedown") | |
| # data = [] | |
| # currentpagesource=youtube.get_page_source() | |
| # lastpagesource='' | |
| # while(True): | |
| # if(lastpagesource==currentpagesource): | |
| # break | |
| # lastpagesource=currentpagesource | |
| # response=youtube.video_comments() | |
| # for c in response['body']: | |
| # data.append(c) | |
| # youtube.scroll() | |
| # currentpagesource=youtube.get_page_source() | |
| # df = pd.DataFrame(data) | |
| # df = df.replace('\n',' ', regex=True) | |
| # df = df[['Comment', 'Likes']].drop_duplicates(keep="first") | |
| # # df = df[['Likes']].drop_duplicates(keep="first") | |
| # df.to_csv('output/data.csv',index=False) | |
| # df.head() |