Kokoro-API / app.py
Yaron Koresh
Update app.py
2c7ffe4 verified
raw
history blame
6.26 kB
import gradio as gr
import os
import re
#from tempfile import NamedTemporaryFile
import numpy as np
import spaces
import random
import string
from diffusers import AutoPipelineForText2Image
import torch
from pathos.multiprocessing import ProcessingPool as ProcessPoolExecutor
import requests
from lxml.html import fromstring
pool = ProcessPoolExecutor(4)
pool.__enter__()
#model_id = "runwayml/stable-diffusion-v1-5"
#model_id = "kandinsky-community/kandinsky-3"
model_id = "stabilityai/stable-diffusion-3-medium-diffusers"
device = "cuda" if torch.cuda.is_available() else "cpu"
if torch.cuda.is_available():
torch.cuda.max_memory_allocated(device=device)
pipe = AutoPipelineForText2Image.from_pretrained(model_id, torch_dtype=torch.float16, variant="fp16", use_safetensors=True, token=os.getenv('hf_token'))
pipe = pipe.to(device)
else:
pipe = AutoPipelineForText2Image.from_pretrained(model_id, use_safetensors=True, token=os.getenv('hf_token'))
pipe = pipe.to(device)
def translate(text,lang):
if text == None or lang == None:
return ""
text = re.sub(f'[{string.punctuation}]', '', re.sub('[\s+]', ' ', text)).lower().strip()
lang = re.sub(f'[{string.punctuation}]', '', re.sub('[\s+]', ' ', lang)).lower().strip()
if text == "" or lang == "":
return ""
if len(text) > 38:
raise Exception("Translation Error: Too long text!")
user_agents = [
'Mozilla/5.0 (Windows NT 10.0; Win64; x64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/126.0.0.0 Safari/537.36',
'Mozilla/5.0 (Macintosh; Intel Mac OS X 10_15_7) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/126.0.0.0 Safari/537.36',
'Mozilla/5.0 (Windows NT 10.0; Win64; x64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/126.0.0.0 Safari/537.36',
'Mozilla/5.0 (Macintosh; Intel Mac OS X 10_15_7) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/126.0.0.0 Safari/537.36',
'Mozilla/5.0 (X11; Linux x86_64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/126.0.0.0 Safari/537.36',
'Mozilla/5.0 (Macintosh; Intel Mac OS X 10_15_7) AppleWebKit/605.1.15 (KHTML, like Gecko) Version/16.1 Safari/605.1.15',
'Mozilla/5.0 (Macintosh; Intel Mac OS X 13_1) AppleWebKit/605.1.15 (KHTML, like Gecko) Version/16.1 Safari/605.1.15'
]
url = 'https://www.google.com/search'
resp = requests.get(
url = url,
params = {'q': f'{lang} translate {text}'},
headers = {
'User-Agent': random.choice(user_agents)
}
)
content = resp.content
html = fromstring(content)
#src = html.xpath('//pre[@data-placeholder="Enter text"]/textarea')[0].text.strip()
translated = text
try:
trgt = html.xpath('//span[@class="target-language"]')[0].text.strip()
rslt = html.xpath('//pre[@aria-label="Translated text"]/span')[0].text.strip()
if trgt.lower() == lang.lower():
translated = rslt
except:
raise Exception("Translation Error!")
ret = re.sub(f'[{string.punctuation}]', '', re.sub('[\s+]', ' ', translated)).lower().strip()
print(ret)
return ret
def generate_random_string(length):
characters = string.ascii_letters + string.digits
return ''.join(random.choice(characters) for _ in range(length))
@spaces.GPU(duration=120)
def Piper(_do,_dont):
return pipe(
_do,
height=512,
width=1024,
negative_prompt=_dont,
num_inference_steps=400,
guidance_scale=10
)
def infer(prompt,prompt2):
name = generate_random_string(12)+".png"
prompt_en = translate(prompt,"english")
prompt2_en = translate(prompt2,"english")
if prompt == None or prompt.strip() == "":
_do = 'soft vivid colors, rough texture, dynamic poze, reasonable, realistic, photograph, soft lighting, deep field, highly detailed, bright background'
else:
_do = f'{ prompt_en }, soft vivid colors, rough texture, dynamic poze, reasonable, realistic, photograph, soft lighting, deep field, highly detailed, bright background'
if prompt2 == None or prompt2.strip() == "":
_dont = 'ugly, deformed, disfigured, poor details, bad anatomy, logos, texts, labels'
else:
_dont = f'ugly, deformed, disfigured, poor details, bad anatomy, {prompt2_en} where in {prompt_en}, {prompt2_en}, logos where in {prompt_en}, texts where in {prompt_en}, labels where in {prompt_en}'
image = Piper(_do,_dont).images[0].save(name)
return name
css="""
footer {
display: none !important;
}
#col-container {
margin: 0 auto;
max-width: 15cm;
}
#image-container {
aspect-ratio: 1024 / 512;
}
.dropdown-arrow {
display: none !important;
}
"""
js="""
function custom(){
document.querySelector("div#prompt input").setAttribute("maxlength","30");
document.querySelector("div#prompt2 input").setAttribute("maxlength","30");
}
"""
if torch.cuda.is_available():
power_device = "GPU"
else:
power_device = "CPU"
with gr.Blocks(theme=gr.themes.Soft(),css=css,js=js) as demo:
with gr.Column(elem_id="col-container"):
gr.Markdown(f"""
# Image Generator
Currently running on {power_device}.
""")
with gr.Row():
prompt = gr.Textbox(
elem_id="prompt",
placeholder="(Required Content)",
container=False,
rtl=True,
max_lines=1
)
prompt2 = gr.Textbox(
elem_id="prompt2",
placeholder="(Forbidden Content)",
container=False,
rtl=True,
max_lines=1
)
with gr.Row():
run_button = gr.Button("Run")
result = gr.Image(elem_id="image-container", label="Result", show_label=False, type='filepath', show_share_button=False)
prompt.submit(
fn = infer,
inputs = [prompt,prompt2],
outputs = [result]
)
prompt2.submit(
fn = infer,
inputs = [prompt,prompt2],
outputs = [result]
)
run_button.click(
fn = infer,
inputs = [prompt,prompt2],
outputs = [result]
)
demo.queue().launch()