Podcastify

Running

File size: 6,206 Bytes

5fe16b1

import gradio as gr
import spaces
import os, torch, io
import json
import re
os.system("python -m unidic download")
import httpx
# print("Make sure you've downloaded unidic (python -m unidic download) for this WebUI to work.")
from melo.api import TTS
import tempfile
import wave
from pydub import AudioSegment
from transformers import (
    AutoModelForCausalLM,
    AutoTokenizer,
    TextIteratorStreamer,
    BitsAndBytesConfig,
)
from threading import Thread

from gradio_client import Client

# client = Client("eswardivi/AIO_Chat")
quantization_config = BitsAndBytesConfig(
    load_in_4bit=True, bnb_4bit_compute_dtype=torch.float16
)

model = AutoModelForCausalLM.from_pretrained(
    "NousResearch/Hermes-2-Pro-Llama-3-8B", quantization_config=quantization_config
)
tok = AutoTokenizer.from_pretrained("NousResearch/Hermes-2-Pro-Llama-3-8B",revision='8ab73a6800796d84448bc936db9bac5ad9f984ae')
terminators = [
    tok.eos_token_id,
    tok.convert_tokens_to_ids("<|eot_id|>")
]
def validate_url(url):
    try:
        response = httpx.get(url, timeout=60.0)
        response.raise_for_status()
        return response.text
    except httpx.RequestError as e:
        return f"An error occurred while requesting {url}: {str(e)}"
    except httpx.HTTPStatusError as e:
        return f"Error response {e.response.status_code} while requesting {url}"
    except Exception as e:
        return f"An unexpected error occurred: {str(e)}"

def fetch_text(url):
    print("Entered Webpage Extraction")
    prefix_url = "https://r.jina.ai/"
    full_url = prefix_url + url
    print(full_url)
    print("Exited Webpage Extraction")
    return validate_url(full_url)
    
@spaces.GPU(duration=100)
def synthesize(article_url,progress_audio=gr.Progress()):
    if not article_url.startswith("http://") and not article_url.startswith("https://"):
        return "URL must start with 'http://' or 'https://'",None

    text = fetch_text(article_url)
    if "Error" in text:
        return text, None

    device = "cuda" if torch.cuda.is_available() else "cpu"
    template = """
        {
            "conversation": [
                {"speaker": "", "text": ""},
                {"speaker": "", "text": ""}
            ]
        }
        """
    chat = []
    chat.append(
        {
            "role": "user",
            "content": text + """\n Convert the provided text into a short, informative podcast conversation between two experts. The tone should be professional and engaging. Please adhere to the following format and return only JSON:
    {
        "conversation": [
            {"speaker": "", "text": ""},
            {"speaker": "", "text": ""}
        ]
    }
    """,
        }
    )


    messages = tok.apply_chat_template(chat, tokenize=False, add_generation_prompt=True)
    model_inputs = tok([messages], return_tensors="pt").to(device)
    streamer = TextIteratorStreamer(
        tok, timeout=10.0, skip_prompt=True, skip_special_tokens=True
    )
    generate_kwargs = dict(
        model_inputs,
        streamer=streamer,
        max_new_tokens=1024,
        do_sample=True,
        temperature=0.9,
        eos_token_id=terminators,
    )
    print("Entered Generation")
    t = Thread(target=model.generate, kwargs=generate_kwargs)
    t.start()

    partial_text = ""
    for new_text in streamer:
        partial_text += new_text

    # print("Calling API")
    # result = client.predict(
	# 	f"{text} \n Convert the text as Elaborate Conversation between two people as Podcast.\nfollowing this template and return only JSON \n {template}",
	# 	0.9,	
	# 	True,	
	# 	1024,
	# 	api_name="/chat"
    # )
    # print("API Call Completed")
    pattern = r"\{(?:[^{}]|(?:\{[^{}]*\}))*\}"
    json_match = re.search(pattern, partial_text)
    print("Exited Generation")
    if json_match:
        conversation=json_match.group()
    else:
        conversation = template
    print(partial_text)
    print(conversation)
    speed = 1.0
    models = {"EN": TTS(language="EN", device=device)}
    speakers = ["EN-Default", "EN-US"]
    combined_audio = AudioSegment.empty()

    conversation_dict = json.loads(conversation)
    for i, turn in enumerate(conversation_dict["conversation"]):
        bio = io.BytesIO()
        text = turn["text"]
        speaker = speakers[i % 2]
        speaker_id = models["EN"].hps.data.spk2id[speaker]
        models["EN"].tts_to_file(text, speaker_id, bio, speed=1.0, pbar=progress_audio.tqdm, format="wav")
        bio.seek(0)
        audio_segment = AudioSegment.from_file(bio, format="wav")
        combined_audio += audio_segment
    final_audio_path = "final.mp3"
    combined_audio.export(final_audio_path, format="mp3")
    return conversation, final_audio_path


with gr.Blocks(theme='gstaff/sketch') as demo:
    gr.Markdown("# Turn Any Article into a Podcast")
    gr.Markdown("## Easily convert articles from URLs into listenable audio podcasts.")
    gr.Markdown("### Instructions")
    gr.Markdown("""
    - **Step 1:** Paste the URL of the article you want to convert into the textbox.
    - **Step 2:** Click on "Podcastify" to generate the podcast.
    - **Step 3:** Listen to the podcast or view the conversation.
    """)
    gr.Markdown("""
    - View the code at [GitHub - NarrateIt](https://github.com/EswarDivi/NarrateIt).
    """)
    with gr.Group():
        text = gr.Textbox(label="Article Link")
        btn = gr.Button("Podcastify", variant="primary")
    with gr.Row():
        conv_display = gr.Textbox(label="Conversation", interactive=False)
        aud = gr.Audio(interactive=False)
    btn.click(synthesize, inputs=[text], outputs=[conv_display, aud])
    gr.Markdown("""
    Special thanks to:

    - [gstaff/sketch](https://huggingface.co/spaces/gstaff/sketch) for the Sketch Theme.
    - [mrfakename/MeloTTS](https://huggingface.co/spaces/mrfakename/MeloTTS) and [GitHub](https://github.com/myshell-ai/MeloTTS) for MeloTTS.
    - [Hermes-2-Pro-Llama-3-8B](https://huggingface.co/NousResearch/Hermes-2-Pro-Llama-3-8B) for Function Calling Support.
    - [Jina AI](https://jina.ai/reader/) for the web page parsing.
    """)
demo.queue(api_open=True, default_concurrency_limit=10).launch(show_api=True,share=True)