Spaces:

gneya
/

youtube_video_summarizer

Sleeping

File size: 2,239 Bytes

71ab5da

import yt_dlp
from langchain import OpenAI, LLMChain
from langchain.chains.mapreduce import MapReduceChain
from langchain.prompts import PromptTemplate
from langchain.chains.summarize import load_summarize_chain
from dotenv import load_dotenv
from langchain_groq import ChatGroq
from langchain.text_splitter import RecursiveCharacterTextSplitter
from langchain.docstore.document import Document
import whisper
import textwrap
import streamlit as st



load_dotenv()


async def download_mp4_from_youtube(url):
    st.write("Downloading..........")
    # Set the options for the download
    filename = 'abc.mp4'
    ydl_opts = {
        'format': 'bestvideo[ext=mp4]+bestaudio[ext=m4a]/best[ext=mp4]',
        'outtmpl': filename,
        'quiet': True,
    }
    # Download the video file
    with yt_dlp.YoutubeDL(ydl_opts) as ydl:
        result = ydl.extract_info(url, download=True)
    print(transcribe())
        


def transcribe():
    st.write("Transcribing.....")
    model = whisper.load_model("base")
    result = model.transcribe("abc.mp4")
    with open ('text.txt', 'w') as file:  
            file.write(result['text'])
    

def create_llm():
    st.write("Summarizing.....")
    llm = ChatGroq()
    text_splitter = RecursiveCharacterTextSplitter(
    chunk_size=1000, chunk_overlap=0, separators=[" ", ",", "\n"])
    with open('text.txt') as f:
        text = f.read()
    texts = text_splitter.split_text(text)
    docs = [Document(page_content=t) for t in texts[:4]]
    prompt_template = """Write a concise bullet point summary of the following:
    {text}
    CONSCISE SUMMARY IN BULLET POINTS:"""
    BULLET_POINT_PROMPT = PromptTemplate(template=prompt_template, 
                        input_variables=["text"])
    chain = load_summarize_chain(llm, 
                             chain_type="stuff", 
                             prompt=BULLET_POINT_PROMPT)
    output_summary = chain.run(docs)
    wrapped_text = textwrap.fill(output_summary, 
                             width=1000,
                             break_long_words=False,
                             replace_whitespace=False)
    # print(wrapped_text)
    st.write("Summary of your video:")
    st.write(wrapped_text)
    return wrapped_text