Update app.py
Browse files
app.py
CHANGED
@@ -1,17 +1,17 @@
|
|
1 |
import os, re
|
2 |
-
from typing import List
|
3 |
-
from pydantic import BaseModel
|
4 |
-
from transformers import AutoTokenizer, AutoModelForSeq2SeqLM
|
5 |
import torch
|
6 |
import gradio as gr
|
|
|
7 |
|
8 |
-
#
|
9 |
-
|
|
|
10 |
device = "cuda" if torch.cuda.is_available() else "cpu"
|
11 |
tokenizer = AutoTokenizer.from_pretrained(MODEL_ID)
|
12 |
model = AutoModelForSeq2SeqLM.from_pretrained(MODEL_ID).to(device).eval()
|
13 |
|
14 |
-
#
|
15 |
SENTINEL_OPEN = "§§KEEP_OPEN§§"
|
16 |
SENTINEL_CLOSE = "§§KEEP_CLOSE§§"
|
17 |
URL_RE = re.compile(r'(https?://\S+)')
|
@@ -57,7 +57,7 @@ def build_input(text: str, tone: str, region: str, level: str, intensity: int) -
|
|
57 |
|
58 |
def chunk_text(s: str, max_chars: int = 1100):
|
59 |
parts, buf, cur = [], [], 0
|
60 |
-
for block in re.split(r"(\n{2,})", s):
|
61 |
if cur + len(block) > max_chars and buf:
|
62 |
parts.append("".join(buf)); buf, cur = [block], len(block)
|
63 |
else:
|
@@ -85,13 +85,14 @@ def humanize_core(text: str, tone: str, region: str, reading_level: str, intensi
|
|
85 |
outs = [generate_one(ch, max_new=320) for ch in chunks]
|
86 |
draft = "".join(outs).replace("—", "-").strip()
|
87 |
final_text = restore(draft, bag)
|
|
|
88 |
for i, span in enumerate(bag):
|
89 |
marker = f"{SENTINEL_OPEN}{i}{SENTINEL_CLOSE}"
|
90 |
if marker in protected_text and span not in final_text:
|
91 |
final_text = final_text.replace(marker, span)
|
92 |
return final_text
|
93 |
|
94 |
-
#
|
95 |
def ui_humanize(text, tone, region, reading_level, intensity):
|
96 |
return humanize_core(text, tone, region, reading_level, int(intensity))
|
97 |
|
@@ -105,9 +106,6 @@ demo = gr.Interface(
|
|
105 |
gr.Slider(1, 10, value=6, step=1, label="Humanization intensity"),
|
106 |
],
|
107 |
outputs=gr.Textbox(label="Humanized"),
|
108 |
-
title="NoteCraft Humanizer (
|
109 |
-
description="Model:
|
110 |
).queue()
|
111 |
-
|
112 |
-
if __name__ == "__main__":
|
113 |
-
demo.launch()
|
|
|
1 |
import os, re
|
2 |
+
from typing import List
|
|
|
|
|
3 |
import torch
|
4 |
import gradio as gr
|
5 |
+
from transformers import AutoTokenizer, AutoModelForSeq2SeqLM
|
6 |
|
7 |
+
# ---------------- Model (CPU) ----------------
|
8 |
+
# Humaneyes is Pegasus-based (seq2seq). Loads fine on CPU Spaces.
|
9 |
+
MODEL_ID = os.getenv("MODEL_ID", "Eemansleepdeprived/Humaneyes")
|
10 |
device = "cuda" if torch.cuda.is_available() else "cpu"
|
11 |
tokenizer = AutoTokenizer.from_pretrained(MODEL_ID)
|
12 |
model = AutoModelForSeq2SeqLM.from_pretrained(MODEL_ID).to(device).eval()
|
13 |
|
14 |
+
# ---------------- Protect / restore ----------------
|
15 |
SENTINEL_OPEN = "§§KEEP_OPEN§§"
|
16 |
SENTINEL_CLOSE = "§§KEEP_CLOSE§§"
|
17 |
URL_RE = re.compile(r'(https?://\S+)')
|
|
|
57 |
|
58 |
def chunk_text(s: str, max_chars: int = 1100):
|
59 |
parts, buf, cur = [], [], 0
|
60 |
+
for block in re.split(r"(\n{2,})", s): # keep paragraph gaps
|
61 |
if cur + len(block) > max_chars and buf:
|
62 |
parts.append("".join(buf)); buf, cur = [block], len(block)
|
63 |
else:
|
|
|
85 |
outs = [generate_one(ch, max_new=320) for ch in chunks]
|
86 |
draft = "".join(outs).replace("—", "-").strip()
|
87 |
final_text = restore(draft, bag)
|
88 |
+
# ensure protected spans survived (paranoid check)
|
89 |
for i, span in enumerate(bag):
|
90 |
marker = f"{SENTINEL_OPEN}{i}{SENTINEL_CLOSE}"
|
91 |
if marker in protected_text and span not in final_text:
|
92 |
final_text = final_text.replace(marker, span)
|
93 |
return final_text
|
94 |
|
95 |
+
# ---------------- Gradio UI (also exposes REST at /api/predict/) ----------------
|
96 |
def ui_humanize(text, tone, region, reading_level, intensity):
|
97 |
return humanize_core(text, tone, region, reading_level, int(intensity))
|
98 |
|
|
|
106 |
gr.Slider(1, 10, value=6, step=1, label="Humanization intensity"),
|
107 |
],
|
108 |
outputs=gr.Textbox(label="Humanized"),
|
109 |
+
title="NoteCraft Humanizer (Humaneyes)",
|
110 |
+
description="Model: Eemansleepdeprived/Humaneyes (Pegasus). REST: POST /api/predict/ with { data: [text,tone,region,level,intensity] }",
|
111 |
).queue()
|
|
|
|
|
|