Kokoro-API-1

Sleeping

App Files Files Community

Yaron Koresh commited on Jan 22

Commit

05bb0fb

verified ·

1 Parent(s): 4487e3e

Update app.py

Browse files

Files changed (1) hide show

app.py +88 -122

app.py CHANGED Viewed

@@ -538,43 +538,30 @@ def upscaler(
     log(f'CALL upscaler')
-    global working
-    global _HEIGHT_
-    global _WIDTH_
-    if not working:
-        working = True
-        manual_seed(seed)
-        solver_type: type[Solver] = getattr(solvers, solver)
-        log(f'DBG upscaler 1')
-        enhanced_image = enhancer.upscale(
-            image=input_image,
-            prompt=prompt,
-            negative_prompt=negative_prompt,
-            upscale_factor=upscale_factor,
-            controlnet_scale=controlnet_scale,
-            controlnet_scale_decay=controlnet_decay,
-            condition_scale=condition_scale,
-            tile_size=(tile_height, tile_width),
-            denoise_strength=denoise_strength,
-            num_inference_steps=num_inference_steps,
-            loras_scale={"more_details": 0.5, "sdxl_render": 1.0},
-            solver_type=solver_type,
-        )
-        _HEIGHT_ = _HEIGHT_ * upscale_factor
-        _WIDTH_ = _WIDTH_ * upscale_factor
-        log(f'RET upscaler')
-        working = False
-        return enhanced_image
 def get_tensor_length(tensor):
     nums = list(tensor.size())
@@ -631,41 +618,33 @@ def generate_random_string(length):
 def add_song_cover_text(img,top_title=None,bottom_title=None):
-    global working
-    if not working:
-        working = True
-        w, h = img.size
-        draw = ImageDraw.Draw(img,mode="RGBA")
-        labels_distance = 1/3
-        if top_title:
-            rows = len(top_title.split("\n"))
-            textheight=min(math.ceil( w / 10 ), math.ceil( h / 5 ))
-            font = ImageFont.truetype(r"Alef-Bold.ttf", textheight)
-            textwidth = draw.textlength(top_title,font)
-            x = math.ceil((w - textwidth) / 2)
-            y = h - (textheight * rows / 2) - (h / 2)
-            y = math.ceil(y - (h / 2 * labels_distance))
-            draw.text((x, y), top_title, (255,255,255), font=font, spacing=2, stroke_width=math.ceil(textheight/20), stroke_fill=(0,0,0))
-        if bottom_title:
-            rows = len(bottom_title.split("\n"))
-            textheight=min(math.ceil( w / 10 ), math.ceil( h / 5 ))
-            font = ImageFont.truetype(r"Alef-Bold.ttf", textheight)
-            textwidth = draw.textlength(bottom_title,font)
-            x = math.ceil((w - textwidth) / 2)
-            y = h - (textheight * rows / 2) - (h / 2)
-            y = math.ceil(y + (h / 2 * labels_distance))
-            draw.text((x, y), bottom_title, (0,0,0), font=font, spacing=2, stroke_width=math.ceil(textheight/20), stroke_fill=(255,255,255))
-        working = False
-        return img
 google_translate_endpoint = "https://translate.google.com/m"
 language_codes = {
@@ -1284,67 +1263,54 @@ def translate(txt,to_lang="en",from_lang="auto"):
     log(f'RET translate with translation as {translation}')
     return translation.lower()
-@spaces.GPU(duration=150)
 def handle_generation(h,w,d):
     log(f'CALL handle_generate')
-    global working
-    global _HEIGHT_
-    global _WIDTH_
-    if not working:
-        working = True
-        if len(d) > 0:
-            d = re.sub(r",( ){1,}",". ",d)
-            d_lines = re.split(r"([\n]){1,}", d)
-            for line_index in range(len(d_lines)):
-                d_lines[line_index] = d_lines[line_index].strip()
-                if d_lines[line_index] != "" and re.sub(r'[\.]$', '', d_lines[line_index]) == d_lines[line_index]:
-                    d_lines[line_index] = d_lines[line_index] + "."
-            d = " ".join(d_lines)
-            d = re.sub(r"([ \t]){1,}", " ", d).lower().strip()
-            if len(d) > 400:
-                d = d if d == "" else summarize(translate(d))
-            else:
-                d = d if d == "" else translate(d)
-            d = re.sub(r"([ \t]){1,}", " ", d)
-            d = re.sub(r"(\. \.)", ".", d)
-            d = re.sub(r"[,]", ".", d).lower().strip()
-        neg = f"Textual, Text, Blurry, Distorted, Exceptional, Irregular, Unusual, Shiny, Smoothed, Polished, Low Quality, Worst Quality, Normal Quality, Anime Quality, Paint Quality, Movie Quality."
-        q = "\""
-        pos = f'Convincing Realism{ "." if d == "" else " from " + d }'
-        print(f"""
-            Positive: {pos}
-            Negative: {neg}
-        """)
-        img = image_pipe(
-            prompt=pos,
-            negative_prompt=neg,
-            height=h,
-            width=w,
-            output_type="pil",
-            guidance_scale=img_accu,
-            num_images_per_prompt=1,
-            num_inference_steps=image_steps,
-            max_sequence_length=seq,
-            generator=torch.Generator(device).manual_seed(random.randint(0, MAX_SEED))
-        ).images[0]
-        working = False
-        _HEIGHT_ = h
-        _WIDTH_ = w
-        return img
 # entry

     log(f'CALL upscaler')
+    manual_seed(seed)
+    solver_type: type[Solver] = getattr(solvers, solver)
+    log(f'DBG upscaler 1')
+    enhanced_image = enhancer.upscale(
+        image=input_image,
+        prompt=prompt,
+        negative_prompt=negative_prompt,
+        upscale_factor=upscale_factor,
+        controlnet_scale=controlnet_scale,
+        controlnet_scale_decay=controlnet_decay,
+        condition_scale=condition_scale,
+        tile_size=(tile_height, tile_width),
+        denoise_strength=denoise_strength,
+        num_inference_steps=num_inference_steps,
+        loras_scale={"more_details": 0.5, "sdxl_render": 1.0},
+        solver_type=solver_type,
+    )
+    log(f'RET upscaler')
+    return enhanced_image
 def get_tensor_length(tensor):
     nums = list(tensor.size())
 def add_song_cover_text(img,top_title=None,bottom_title=None):
+    w, h = img.size
+    draw = ImageDraw.Draw(img,mode="RGBA")
+    labels_distance = 1/3
+    if top_title:
+        rows = len(top_title.split("\n"))
+        textheight=min(math.ceil( w / 10 ), math.ceil( h / 5 ))
+        font = ImageFont.truetype(r"Alef-Bold.ttf", textheight)
+        textwidth = draw.textlength(top_title,font)
+        x = math.ceil((w - textwidth) / 2)
+        y = h - (textheight * rows / 2) - (h / 2)
+        y = math.ceil(y - (h / 2 * labels_distance))
+        draw.text((x, y), top_title, (255,255,255), font=font, spacing=2, stroke_width=math.ceil(textheight/20), stroke_fill=(0,0,0))
+    if bottom_title:
+        rows = len(bottom_title.split("\n"))
+        textheight=min(math.ceil( w / 10 ), math.ceil( h / 5 ))
+        font = ImageFont.truetype(r"Alef-Bold.ttf", textheight)
+        textwidth = draw.textlength(bottom_title,font)
+        x = math.ceil((w - textwidth) / 2)
+        y = h - (textheight * rows / 2) - (h / 2)
+        y = math.ceil(y + (h / 2 * labels_distance))
+        draw.text((x, y), bottom_title, (0,0,0), font=font, spacing=2, stroke_width=math.ceil(textheight/20), stroke_fill=(255,255,255))
+    return img
 google_translate_endpoint = "https://translate.google.com/m"
 language_codes = {
     log(f'RET translate with translation as {translation}')
     return translation.lower()
+@spaces.GPU(duration=100)
 def handle_generation(h,w,d):
     log(f'CALL handle_generate')
+    if len(d) > 0:
+        d = re.sub(r",( ){1,}",". ",d)
+        d_lines = re.split(r"([\n]){1,}", d)
+        for line_index in range(len(d_lines)):
+            d_lines[line_index] = d_lines[line_index].strip()
+            if d_lines[line_index] != "" and re.sub(r'[\.]$', '', d_lines[line_index]) == d_lines[line_index]:
+                d_lines[line_index] = d_lines[line_index] + "."
+        d = " ".join(d_lines)
+        d = re.sub(r"([ \t]){1,}", " ", d).lower().strip()
+        if len(d) > 400:
+            d = d if d == "" else summarize(translate(d))
+        else:
+            d = d if d == "" else translate(d)
+        d = re.sub(r"([ \t]){1,}", " ", d)
+        d = re.sub(r"(\. \.)", ".", d)
+        d = re.sub(r"[,]", ".", d).lower().strip()
+    neg = f"Textual, Text, Blurry, Distorted, Exceptional, Irregular, Unusual, Shiny, Smoothed, Polished, Low Quality, Worst Quality, Normal Quality, Anime Quality, Paint Quality, Movie Quality."
+    q = "\""
+    pos = f'Convincing Realism{ "." if d == "" else " from " + d }'
+    print(f"""
+        Positive: {pos}
+        Negative: {neg}
+    """)
+    img = image_pipe(
+        prompt=pos,
+        negative_prompt=neg,
+        height=h,
+        width=w,
+        output_type="pil",
+        guidance_scale=img_accu,
+        num_images_per_prompt=1,
+        num_inference_steps=image_steps,
+        max_sequence_length=seq,
+        generator=torch.Generator(device).manual_seed(random.randint(0, MAX_SEED))
+    ).images[0]
+    return img
 # entry