Kokoro-API

Running

File size: 8,404 Bytes

f1052d9
a67a3c8
95af88e
210ed13
b1328e8
210ed13
b4f9b4b
a58c3bb
 
ecc81cb
9f1f2bf
 
026afe1
77872ec
af97d45
bc115c5
 
 
9642724
 
166635a
568c974
66fc53c
 
359486d
66fc53c
210ed13
9f1f2bf
 
 
568c974
9f1f2bf
210ed13
9f1f2bf
 
 
 
 
 
 
 
 
 
 
 
c6d02b3
9f1f2bf
c6d02b3
9f1f2bf
1d16cc9
9f1f2bf
c6d02b3
 
2c7ffe4
 
a597e6b
 
 
 
 
 
 
fd34825
 
706151f
c6e402b
84291d5
7206ba2
a597e6b
48e1ac1
758f177
84291d5
 
f2fa35d
369a3fa
397731d
 
 
 
 
544df84
f2fa35d
eb977a1
83d3e5a
 
 
b4f9b4b
 
 
 
 
b263b2a
9642724
9f1f2bf
ebe916c
83d3d2d
ebe916c
72493a8
 
a588239
5afec29
967386d
ebe916c
 
34f0a8c
 
52c6881
452be41
b263b2a
42906cf
9f1f2bf
9642724
 
42906cf
9642724
42906cf
9642724
 
 
 
 
 
b263b2a
7f06f4f
07d7428
7f06f4f
 
359486d
9932afd
b4f9b4b
967386d
 
 
ba73260
 
9642724
ba73260
7c25f42
9642724
52c6881
9642724
ebe916c
 
9642724
 
 
9f1f2bf
 
9642724
 
 
 
 
210ed13
 
f86add6
a345db9
 
840cd7b
647941b
0ec3daa
840cd7b
a345db9
0ec3daa
a345db9
763a02d
34c1550
33f3309
 
 
210ed13
0ec3daa
 
33f3309
f285313
5afec29
210ed13
2daa864
 
cacb176
91c50b4
db40b0c
f7a31e7
2daa864
210ed13
 
aac4d05
32ecfac
1acb407
9932afd
0b4c2e7
 
 
aac4d05
7dd59c4
210ed13
 
9db21d2
210ed13
02471b0
 
aac4d05
9932afd
 
 
 
 
 
 
 
dd2b7f9
 
 
02471b0
b632387
f285313
01ee4f0
9f1f2bf
 
5afec29
560ff39
 
026afe1
560ff39
026afe1
019b2bc
026afe1
9932afd
026afe1
4a68766
9932afd
560ff39
 
 
9f1f2bf
2478f2d
560ff39
2478f2d
560ff39
9f1f2bf
 
 
 
026afe1
9932afd
210ed13
716f353

import os
import re
import spaces
import random
import string
import torch
import requests
import gradio as gr
import numpy as np
from lxml.html import fromstring
from transformers import pipeline
from torch.multiprocessing import Pool, Process, set_start_method
#from pathos.multiprocessing import ProcessPool as Pool
#from pathos.threading import ThreadPool as Pool
#from diffusers.pipelines.flux import FluxPipeline
#from diffusers.utils import export_to_gif
#from huggingface_hub import hf_hub_download
#from safetensors.torch import load_file
from diffusers import DiffusionPipeline, StableDiffusionXLImg2ImgPipeline
from diffusers.utils import load_image
#import jax
#import jax.numpy as jnp
import torch._dynamo

set_start_method("spawn", force=True)
torch._dynamo.config.suppress_errors = True

#pipe = DiffusionPipeline.from_pretrained("black-forest-labs/FLUX.1-schnell", torch_dtype=torch.bfloat16, revision="refs/pr/1", token=os.getenv("hf_token")).to(device)
#pipe2 = StableDiffusionXLImg2ImgPipeline.from_pretrained("stabilityai/stable-diffusion-xl-refiner-1.0", torch_dtype=torch.float16, variant="fp16", use_safetensors=True).to(device)
#pipe2.unet = torch.compile(pipe2.unet, mode="reduce-overhead", fullgraph=True)

PIPE = None

def pipe_t2i():
    global PIPE
    if PIPE is None:
        PIPE = pipeline("text-to-image", model="black-forest-labs/FLUX.1-schnell", torch_dtype=torch.bfloat16, revision="refs/pr/1", tokenizer="black-forest-labs/FLUX.1-schnell", device=-1, token=os.getenv("hf_token"))
    return PIPE
    
def pipe_i2i():
    global PIPE
    if PIPE is None:
        PIPE = pipeline("image-to-image", model="stabilityai/stable-diffusion-xl-refiner-1.0", tokenizer="stabilityai/stable-diffusion-xl-refiner-1.0", torch_dtype=torch.float16, device=-1, variant="fp16", use_safetensors=True)
        PIPE.unet = torch.compile(PIPE.unet, mode="reduce-overhead", fullgraph=True)
    return PIPE
    
def translate(text,lang):
    if text == None or lang == None:
        return ""       
    text = re.sub(f'[{string.punctuation}]', '', re.sub('[\s+]', ' ', text)).lower().strip()
    lang = re.sub(f'[{string.punctuation}]', '', re.sub('[\s+]', ' ', lang)).lower().strip()    
    if text == "" or lang == "":
        return ""
    if len(text) > 38:
        raise Exception("Translation Error: Too long text!")
    user_agents = [
        'Mozilla/5.0 (Windows NT 10.0; Win64; x64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/126.0.0.0 Safari/537.36',
        'Mozilla/5.0 (Macintosh; Intel Mac OS X 10_15_7) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/126.0.0.0 Safari/537.36',
        'Mozilla/5.0 (X11; Linux x86_64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/126.0.0.0 Safari/537.36',
        'Mozilla/5.0 (Macintosh; Intel Mac OS X 10_15_7) AppleWebKit/605.1.15 (KHTML, like Gecko) Version/16.1 Safari/605.1.15',
        'Mozilla/5.0 (Macintosh; Intel Mac OS X 13_1) AppleWebKit/605.1.15 (KHTML, like Gecko) Version/16.1 Safari/605.1.15'
    ]
    padded_chars = re.sub("[(^\-)(\-$)]","",text.replace("","-").replace("- -"," ")).strip()
    query_text = f'Please translate {padded_chars}, into {lang}'
    url = f'https://www.google.com/search?q={query_text}'
    resp = requests.get(
        url = url,
        headers = {
            'User-Agent': random.choice(user_agents)
        }
    )
    content = resp.content
    html = fromstring(content)
    translated = text
    try:
        src_lang = html.xpath('//*[@class="source-language"]')[0].text_content().lower().strip()
        trgt_lang = html.xpath('//*[@class="target-language"]')[0].text_content().lower().strip()
        src_text = html.xpath('//*[@id="tw-source-text"]/*')[0].text_content().lower().strip()
        trgt_text = html.xpath('//*[@id="tw-target-text"]/*')[0].text_content().lower().strip()
        if trgt_lang == lang:
            translated = trgt_text
    except:
        print(f'Translation Warning: Failed To Translate!')
    ret = re.sub(f'[{string.punctuation}]', '', re.sub('[\s+]', ' ', translated)).lower().strip()
    print(ret)
    return ret

def generate_random_string(length):
    characters = string.ascii_letters + string.digits
    return ''.join(random.choice(characters) for _ in range(length))

@spaces.GPU(duration=35)
def Piper(_do):
    pipe = pipe_t2i()
    try:
        retu = pipe(
            _do,
            height=512,
            width=512,
            num_inference_steps=4,
            max_sequence_length=256,
            guidance_scale=0
        )
        return retu
    except Exception as e:
        print(e)
        return None

@spaces.GPU(duration=35)
def Piper2(img,posi,neg):
    pipe = pipe_i2i()
    try:
        retu = pipe2(
            prompt=posi,
            negative_prompt=neg,
            image=img
        )
        return retu
    except Exception as e:
        print(e)
        return None

@spaces.GPU(duration=35)
def tok(txt):
    toks = pipe.tokenizer(txt)['input_ids']
    print(toks)
    return toks

def infer(p1,p2):
    name = generate_random_string(12)+".png"
    _do = ['beautiful', 'playful', 'photographed', 'realistic', 'dynamic poze', 'deep field', 'reasonable coloring', 'rough texture', 'best quality', 'focused']
    if p1 != "":
        _do.append(f'{p1}')
    if p2 != "":
        _dont = f'{p2} where in {p1}'
        neg = _dont
    else:
        neg = None
    output = Piper('A '+" ".join(_do))
    if output == None:
        return None
    else:
        output.images[0].save(name)
    if neg == None:
        return name

    img = load_image(name).convert("RGB")
    output2 = Piper2(img,p1,neg)
    if output2 == None:
        return None
    else:
        output2.images[0].save("_"+name)
    return "_"+name

css="""
input, input::placeholder {
    text-align: center !important;
}
*, *::placeholder {
    direction: ltr !important;
    font-family: Suez One !important;
}
h1,h2,h3,h4,h5,h6,span,p,pre {
    width: 100% !important;
    text-align: center !important;
    display: block !important;
}
footer {
    display: none !important;
}
#col-container {
    margin: 0 auto !important;
    max-width: 15cm !important;
 }
.image-container {
    aspect-ratio: 448 / 448 !important;
}
.dropdown-arrow {
    display: none !important;
}
*:has(.btn), .btn {
    width: 100% !important;
    margin: 0 auto !important;
}
"""

js="""
function custom(){
    document.querySelector("div#prompt input").setAttribute("maxlength","38")
    document.querySelector("div#prompt2 input").setAttribute("maxlength","38")
}
"""

with gr.Blocks(theme=gr.themes.Soft(),css=css,js=js) as demo:
    result = []
    with gr.Column(elem_id="col-container"):
        gr.Markdown(f"""
            # MULTI-LANGUAGE IMAGE GENERATOR
        """)
        with gr.Row():
            prompt = gr.Textbox(
                elem_id="prompt",
                placeholder="INCLUDE",
                container=False,
                max_lines=1
            )
        with gr.Row():
            prompt2 = gr.Textbox(
                elem_id="prompt2",
                placeholder="EXCLUDE",
                container=False,
                max_lines=1
            )
        with gr.Row():
            run_button = gr.Button("START",elem_classes="btn",scale=0)
        with gr.Row():
            result.append(gr.Image(interactive=False,elem_classes="image-container", label="Result", show_label=False, type='filepath', show_share_button=False))
            result.append(gr.Image(interactive=False,elem_classes="image-container", label="Result", show_label=False, type='filepath', show_share_button=False))
            result.append(gr.Image(interactive=False,elem_classes="image-container", label="Result", show_label=False, type='filepath', show_share_button=False))

    def _ret(p):

        print(f'Starting!')
        v = infer(p["a"],p["b"])
        print(f'Finished!')
        return v
        
    def _rets(p1,p2):
        
        p1_en = translate(p1,"english")
        p2_en = translate(p2,"english")
        
        p = {"a":p1_en,"b":p2_en}
        
        ln = len(result)
        rng = range(ln)
        p_arr = [p for _ in rng]
        pool = Pool(processes=ln)
        lst = list( pool.imap( _ret, p_arr ) )
        pool.clear()
        return lst
        
        #return list( _ret(p1_en,p2_en) )
        
    run_button.click(fn=_rets,inputs=[prompt,prompt2],outputs=result)

demo.queue().launch(server_port=6900)