Spaces:
Running
Running
File size: 6,206 Bytes
5fe16b1 |
1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20 21 22 23 24 25 26 27 28 29 30 31 32 33 34 35 36 37 38 39 40 41 42 43 44 45 46 47 48 49 50 51 52 53 54 55 56 57 58 59 60 61 62 63 64 65 66 67 68 69 70 71 72 73 74 75 76 77 78 79 80 81 82 83 84 85 86 87 88 89 90 91 92 93 94 95 96 97 98 99 100 101 102 103 104 105 106 107 108 109 110 111 112 113 114 115 116 117 118 119 120 121 122 123 124 125 126 127 128 129 130 131 132 133 134 135 136 137 138 139 140 141 142 143 144 145 146 147 148 149 150 151 152 153 154 155 156 157 158 159 160 161 162 163 164 165 166 167 168 169 170 171 172 173 174 175 176 177 |
import gradio as gr
import spaces
import os, torch, io
import json
import re
os.system("python -m unidic download")
import httpx
# print("Make sure you've downloaded unidic (python -m unidic download) for this WebUI to work.")
from melo.api import TTS
import tempfile
import wave
from pydub import AudioSegment
from transformers import (
AutoModelForCausalLM,
AutoTokenizer,
TextIteratorStreamer,
BitsAndBytesConfig,
)
from threading import Thread
from gradio_client import Client
# client = Client("eswardivi/AIO_Chat")
quantization_config = BitsAndBytesConfig(
load_in_4bit=True, bnb_4bit_compute_dtype=torch.float16
)
model = AutoModelForCausalLM.from_pretrained(
"NousResearch/Hermes-2-Pro-Llama-3-8B", quantization_config=quantization_config
)
tok = AutoTokenizer.from_pretrained("NousResearch/Hermes-2-Pro-Llama-3-8B",revision='8ab73a6800796d84448bc936db9bac5ad9f984ae')
terminators = [
tok.eos_token_id,
tok.convert_tokens_to_ids("<|eot_id|>")
]
def validate_url(url):
try:
response = httpx.get(url, timeout=60.0)
response.raise_for_status()
return response.text
except httpx.RequestError as e:
return f"An error occurred while requesting {url}: {str(e)}"
except httpx.HTTPStatusError as e:
return f"Error response {e.response.status_code} while requesting {url}"
except Exception as e:
return f"An unexpected error occurred: {str(e)}"
def fetch_text(url):
print("Entered Webpage Extraction")
prefix_url = "https://r.jina.ai/"
full_url = prefix_url + url
print(full_url)
print("Exited Webpage Extraction")
return validate_url(full_url)
@spaces.GPU(duration=100)
def synthesize(article_url,progress_audio=gr.Progress()):
if not article_url.startswith("http://") and not article_url.startswith("https://"):
return "URL must start with 'http://' or 'https://'",None
text = fetch_text(article_url)
if "Error" in text:
return text, None
device = "cuda" if torch.cuda.is_available() else "cpu"
template = """
{
"conversation": [
{"speaker": "", "text": ""},
{"speaker": "", "text": ""}
]
}
"""
chat = []
chat.append(
{
"role": "user",
"content": text + """\n Convert the provided text into a short, informative podcast conversation between two experts. The tone should be professional and engaging. Please adhere to the following format and return only JSON:
{
"conversation": [
{"speaker": "", "text": ""},
{"speaker": "", "text": ""}
]
}
""",
}
)
messages = tok.apply_chat_template(chat, tokenize=False, add_generation_prompt=True)
model_inputs = tok([messages], return_tensors="pt").to(device)
streamer = TextIteratorStreamer(
tok, timeout=10.0, skip_prompt=True, skip_special_tokens=True
)
generate_kwargs = dict(
model_inputs,
streamer=streamer,
max_new_tokens=1024,
do_sample=True,
temperature=0.9,
eos_token_id=terminators,
)
print("Entered Generation")
t = Thread(target=model.generate, kwargs=generate_kwargs)
t.start()
partial_text = ""
for new_text in streamer:
partial_text += new_text
# print("Calling API")
# result = client.predict(
# f"{text} \n Convert the text as Elaborate Conversation between two people as Podcast.\nfollowing this template and return only JSON \n {template}",
# 0.9,
# True,
# 1024,
# api_name="/chat"
# )
# print("API Call Completed")
pattern = r"\{(?:[^{}]|(?:\{[^{}]*\}))*\}"
json_match = re.search(pattern, partial_text)
print("Exited Generation")
if json_match:
conversation=json_match.group()
else:
conversation = template
print(partial_text)
print(conversation)
speed = 1.0
models = {"EN": TTS(language="EN", device=device)}
speakers = ["EN-Default", "EN-US"]
combined_audio = AudioSegment.empty()
conversation_dict = json.loads(conversation)
for i, turn in enumerate(conversation_dict["conversation"]):
bio = io.BytesIO()
text = turn["text"]
speaker = speakers[i % 2]
speaker_id = models["EN"].hps.data.spk2id[speaker]
models["EN"].tts_to_file(text, speaker_id, bio, speed=1.0, pbar=progress_audio.tqdm, format="wav")
bio.seek(0)
audio_segment = AudioSegment.from_file(bio, format="wav")
combined_audio += audio_segment
final_audio_path = "final.mp3"
combined_audio.export(final_audio_path, format="mp3")
return conversation, final_audio_path
with gr.Blocks(theme='gstaff/sketch') as demo:
gr.Markdown("# Turn Any Article into a Podcast")
gr.Markdown("## Easily convert articles from URLs into listenable audio podcasts.")
gr.Markdown("### Instructions")
gr.Markdown("""
- **Step 1:** Paste the URL of the article you want to convert into the textbox.
- **Step 2:** Click on "Podcastify" to generate the podcast.
- **Step 3:** Listen to the podcast or view the conversation.
""")
gr.Markdown("""
- View the code at [GitHub - NarrateIt](https://github.com/EswarDivi/NarrateIt).
""")
with gr.Group():
text = gr.Textbox(label="Article Link")
btn = gr.Button("Podcastify", variant="primary")
with gr.Row():
conv_display = gr.Textbox(label="Conversation", interactive=False)
aud = gr.Audio(interactive=False)
btn.click(synthesize, inputs=[text], outputs=[conv_display, aud])
gr.Markdown("""
Special thanks to:
- [gstaff/sketch](https://huggingface.co/spaces/gstaff/sketch) for the Sketch Theme.
- [mrfakename/MeloTTS](https://huggingface.co/spaces/mrfakename/MeloTTS) and [GitHub](https://github.com/myshell-ai/MeloTTS) for MeloTTS.
- [Hermes-2-Pro-Llama-3-8B](https://huggingface.co/NousResearch/Hermes-2-Pro-Llama-3-8B) for Function Calling Support.
- [Jina AI](https://jina.ai/reader/) for the web page parsing.
""")
demo.queue(api_open=True, default_concurrency_limit=10).launch(show_api=True,share=True)
|