Spaces:
Running
Running
import gradio as gr | |
import io | |
import os | |
import uuid | |
import json | |
import random | |
import requests | |
import subprocess | |
from pypdf import PdfReader | |
from pypipertts import PyPiper | |
from urllib.parse import urlparse, parse_qs | |
from huggingface_hub import HfApi | |
import datetime | |
from utils import make_im | |
mi=make_im() | |
token=os.environ.get("HF_TOKEN") | |
user_name="broadfield" | |
repo_name="a" | |
#save_data=f'https://huggingface.co/datasets/{user_name}/{repo_name}/raw/main/' | |
api = HfApi(token=token) | |
pp=PyPiper() | |
pp.load_mod() | |
def read_pdf(pdf_url): | |
print(pdf_url) | |
file_name=f"{uuid.uuid4()}.pdf" | |
response = requests.get(pdf_url.replace('http:','https:'), stream=True) | |
if response.status_code == 200: | |
with open(file_name, "wb") as f: | |
f.write(response.content) | |
else: | |
print(response.status_code) | |
txt_out="" | |
reader = PdfReader(file_name) | |
number_of_pages = len(reader.pages) | |
#file_name=str(pdf_path).split("\\")[-1] | |
for i in range(number_of_pages): | |
page = reader.pages[i] | |
txt_out+=page.extract_text() | |
#return txt_out | |
return txt_out.replace("\n","") | |
def load_data(file): | |
try: | |
r = requests.get(f'{save_data}{file}.json') | |
lod = json.loads(r.text) | |
except: | |
lod=[] | |
pass | |
return(lod) | |
def save_data(t,m,l,n,w,p): | |
rand = random.randint(1000,9999) | |
title = t.replace('https://',"").replace('/',"_").replace('.',"_") | |
im = mi.run_1(name=title,img_size=[315,600]) | |
im1 = f"{os.getcwd()}/images/{title}_prob.png" | |
#im2=f"https://{user_name}-{repo_name}.static.hf.space/images/{title}_prob.png" | |
im2=f"https://huggingface.co/spaces/{user_name}/{repo_name}/resolve/main/images/{title}_{rand}.png" | |
#im3=f"{make_im.root}/images/{name}_prob.png" | |
#uid=uuid.uuid4() | |
with open('template.html','r') as file: | |
file_cont=file.read() | |
file.close() | |
file_out=file_cont.replace('IMAGE',im2).replace('TITLE',f'{title}_{rand}').replace('PDFURL',t).replace('VOICE',m).replace('LENGTH',l).replace('NOISE',n).replace('WIDTH',w).replace('PAUSE',p) | |
out_name=f"{title}_{rand}" | |
buffer = io.BytesIO() | |
buffer.write(file_out.encode()) | |
buffer.seek(0) | |
api.upload_file( | |
path_or_fileobj=buffer, | |
path_in_repo=f"{out_name}.html", | |
repo_id=f'{user_name}/{repo_name}', | |
token=token, | |
repo_type="space", | |
) | |
api.upload_file( | |
path_or_fileobj=im1, | |
path_in_repo=f"/images/{out_name}.png", | |
repo_id=f'{user_name}/{repo_name}', | |
token=token, | |
repo_type="space", | |
) | |
#return f"https://broadfield-a.hf.space/?pdfurl={t}&mod={m}&len={l}&nos={n}&wid={w}&pau={p}" | |
return f"<div><a href='https://{user_name}-{repo_name}.static.hf.space/{title}_{rand}.html' target='_blank'>https://broadfield-a.static.hf.space/{title}_{rand}.html</a></div>" | |
def load_html(url): | |
html=f"""<iframe src="https://docs.google.com/viewer?url={url})&embedded=true" frameborder="0" height="1200px" width="100%"></iframe></div>""" | |
return html | |
head = """ | |
<meta charset="utf-8" /> | |
<meta name="viewport" content="width=device-width" /> | |
<meta name="twitter:card" content="player"/> | |
<meta name="twitter:site" content=""/> | |
<meta name="twitter:player" content="https://broadfield-fast-voice-full.hf.space/"/> | |
<meta name="twitter:player:stream" content="https://broadfield-fast-voice-full.hf.space/"/> | |
<meta name="twitter:player:width" content="100%"/> | |
<meta name="twitter:player:height" content="100%"/> | |
<meta property="og:title" content="PDF to Voice"/> | |
<meta property="og:description" content="Add PDF link to /?pdfurl= parameter"/> | |
<!---meta property="og:image" content=""/---> | |
<!---meta http-equiv="refresh" content="0; url=https://broadfield-fast-voice-full.hf.space/?pdfurl="---> | |
<script> | |
function changeMetaTag(name, content) { | |
// Find the existing meta tag with the given name | |
let metaTag = document.querySelector(`meta[name="${name}"]`); | |
// If it doesn't exist, create a new meta tag | |
if (!metaTag) { | |
metaTag = document.createElement('meta'); | |
metaTag.name = name; | |
document.head.appendChild(metaTag); | |
} | |
// Set the content of the meta tag | |
metaTag.content = content; | |
} | |
const urlParams1 = new URLSearchParams(window.location.search); | |
var p_pdf = urlParams1.get('pdfurl') | |
console.log("params") | |
console.log(p_pdf) | |
/*changeMetaTag('twitter:player', 'https://broadfield-fast-voice-full.hf.space/?pdfurl=ZZZ'.replace('ZZZ',p_pdf)) | |
changeMetaTag('twitter:player:stream', 'https://broadfield-fast-voice-full.hf.space/?pdfurl=ZZZ'.replace('ZZZ',p_pdf))*/ | |
function run(url_in) { | |
let url = String(url_in) | |
console.log(url); | |
var framediv = document.getElementById('cap'); | |
let child = framediv.lastElementChild; | |
while (child) { | |
framediv.removeChild(child); | |
child=framediv.lastElementChild; | |
} | |
const iframediv = document.createElement('iframe'); | |
iframediv.src = 'https://docs.google.com/viewer?url=' + url +'&embedded=true'; | |
iframediv.width = '100%'; | |
iframediv.height = '1000px'; | |
iframediv.frameborder="0"; | |
framediv.appendChild(iframediv); | |
} | |
</script> | |
""" | |
js=""" | |
function () { | |
const urlParams = new URLSearchParams(window.location.search); | |
var p_pdf = urlParams.get('pdfurl') | |
var p_mod = urlParams.get('mod') | |
var p_len = urlParams.get('len') | |
var p_nos = urlParams.get('nos') | |
var p_wid = urlParams.get('wid') | |
var p_pau = urlParams.get('pau') | |
if (p_mod) { | |
console.log(p_mod) | |
} else { | |
p_mod = "en_US-joe-medium"; | |
}; | |
if (p_len) { | |
console.log(p_len) | |
} else { | |
p_len = "1"; | |
}; | |
if (p_nos) { | |
console.log(p_nos) | |
} else { | |
p_nos = "0.5"; | |
}; | |
if (p_wid) { | |
console.log(p_wid) | |
} else { | |
p_wid = "0.5"; | |
}; | |
if (p_pau) { | |
console.log(p_pau) | |
} else { | |
p_pau = "1"; | |
}; | |
run(p_pdf) | |
return [p_pdf, p_mod, p_len, p_nos, p_wid, p_pau]; | |
} | |
""" | |
css=""" | |
#id { | |
height:500px; | |
width:100%; | |
} | |
.mes_div { | |
height:30px; | |
font-weight:500; | |
} | |
""" | |
def upd_mes(t): | |
return(f"<div class='mes_div'>Reading PDF: {t}</div>") | |
def upd_mes2(): | |
return(f"<div class='mes_div'>PDF Loaded, creating audio</div>") | |
with gr.Blocks(head=head,css=css) as app: | |
html=gr.HTML("""<div style='font-size:xx-large;font-weight:900'>PDF Reader to Voice</div>""") | |
html2=gr.HTML() | |
a=gr.Audio(streaming=True,autoplay=True) | |
with gr.Accordion("Voice Controls",open=False): | |
with gr.Row(): | |
stp=gr.Button("Stop") | |
upd=gr.Button("Update Voice") | |
t=gr.Textbox(label="PDF URL",interactive=True,visible=True) | |
m=gr.Dropdown(label="Voice", choices=pp.key_list,value="en_US-lessac-high",interactive=True) | |
l=gr.Slider(label="Length", minimum=0.01, maximum=10.0, value=1,interactive=True) | |
n=gr.Slider(label="Noise", minimum=0.01, maximum=3.0, value=0.5,interactive=True) | |
w=gr.Slider(label="Noise Width", minimum=0.01, maximum=3.0, value=0.5,interactive=True) | |
p=gr.Slider(label="Sentence Pause", minimum=0.1, maximum=10.0, value=1,interactive=True) | |
save=gr.Button("Save Data") | |
save_html=gr.HTML() | |
h=gr.HTML("""<div id='cap'></div>""") | |
with gr.Column(visible=False): | |
bulk=gr.Textbox(label="bulk",interactive=False,visible=True) | |
app.load(None,None,[t,m,l,n,w,p],js=js) | |
tc=t.change(upd_mes,t,html2).then(read_pdf,t,[bulk]) | |
mc=m.change(pp.load_mod,m,None) | |
bc=bulk.change(upd_mes2,None,html2).then(pp.stream_tts,[bulk,m,l,n,w,p],a) | |
uc=upd.click(pp.stream_tts,[bulk,m,l,n,w,p],a) | |
sb=save.click(save_data,[t,m,l,n,w,p],save_html) | |
sc=stp.click(None,None,None, cancels=[tc,mc,bc,uc]) | |
app.queue(default_concurrency_limit=20).launch(max_threads=40) |