# How to use: YTVideoToText("https://www.youtube.com/watch?v=jQL0ZeHtXFc") def YTVideoToText(video_link): # installing & importing libraries from transformers import pipeline from youtube_transcript_api import YouTubeTranscriptApi # fetching video transcript video_id = video_link.split("=")[1] transcript = YouTubeTranscriptApi.get_transcript(video_id) # iterating throughout and adding all text together result = "" for i in transcript: result += ' ' + i['text'] # summarize text summarizerfb = pipeline("summarization", model="facebook/bart-large-cnn") num_iters = int(len(result)/1000) summarized_text = [] summarized_text2 = [] for i in range(0, num_iters + 1): start = 0 start = i * 1000 end = (i + 1) * 1000 out = summarizerfb(result[start:end], max_length=130, min_length=30, do_sample=False) out = out[0] out = out['summary_text'] summarized_text.append(out) summarized_text2 = ' '.join(summarized_text) # returning summary return summarized_text2; # How to use: postSummaryWithBart("https://ethereum.org/en/what-is-ethereum/") def postSummaryWithBart(blog_link): # importing libraries from transformers import pipeline from bs4 import BeautifulSoup import requests # loading summarization pipeline summarizer = pipeline("summarization") # getting our blog post URL = blog_link r = requests.get(URL) soup = BeautifulSoup(r.text, 'html.parser') results = soup.find_all(['h1', 'p']) text = [result.text for result in results] ARTICLE = ' '.join(text) # replacing punctuations with end-of-sentence tags ARTICLE = ARTICLE.replace('.', '.') ARTICLE = ARTICLE.replace('?', '?') ARTICLE = ARTICLE.replace('!', '!') sentences = ARTICLE.split('') # chunking text max_chunk = 500 current_chunk = 0 chunks = [] for sentence in sentences: # checking if we have an empty chunk if len(chunks) == current_chunk + 1: if len(chunks[current_chunk]) + len(sentence.split(' ')) <= max_chunk: chunks[current_chunk].extend(sentence.split(' ')) else: current_chunk += 1 chunks.append(sentence.split(' ')) else: print(current_chunk) chunks.append(sentence.split(' ')) for chunk_id in range(len(chunks)): chunks[chunk_id] = ' '.join(chunks[chunk_id]) # summarizing text res = summarizer(chunks, max_length=70, min_length=30, do_sample=False) text = ''.join([summ['summary_text'] for summ in res]) # returning summary return text; # How to use: abstractiveSummaryWithPegasus("""Sample text to be summarized""") def abstractiveSummaryWithPegasus(words): # importing & loading model from transformers import PegasusForConditionalGeneration, PegasusTokenizer tokenizer = PegasusTokenizer.from_pretrained("google/pegasus-xsum") model = PegasusForConditionalGeneration.from_pretrained("google/pegasus-xsum") # perform summarization tokens = tokenizer(words, truncation=True, padding="longest", return_tensors="pt") summary = model.generate(**tokens) actual_summ = tokenizer.decode(summary[0]) # returning summary print(actual_summ) import gradio as gr def process(context, question): pass # Implement your question-answering model here... gr.Interface(fn=process, inputs=["text", "text"], outputs=["textbox", "text"]).launch()