Create app.py
Browse files
app.py
ADDED
@@ -0,0 +1,299 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
1 |
+
# espeak.py
|
2 |
+
import os
|
3 |
+
import json
|
4 |
+
import time
|
5 |
+
import gradio as gr
|
6 |
+
import speech_recognition as sr
|
7 |
+
import pyttsx3
|
8 |
+
import threading
|
9 |
+
|
10 |
+
from typing import Tuple
|
11 |
+
|
12 |
+
# Try importing OpenAI; if not present app will use local model fallback
|
13 |
+
USE_OPENAI = bool(os.getenv("OPENAI_API_KEY", "").strip())
|
14 |
+
if USE_OPENAI:
|
15 |
+
import openai
|
16 |
+
|
17 |
+
# Local model fallback (T5-based)
|
18 |
+
try:
|
19 |
+
import torch
|
20 |
+
from transformers import AutoTokenizer, AutoModelForSeq2SeqLM
|
21 |
+
from happytransformer import HappyTextToText, TTSettings
|
22 |
+
LOCAL_MODEL_AVAILABLE = True
|
23 |
+
except Exception:
|
24 |
+
LOCAL_MODEL_AVAILABLE = False
|
25 |
+
|
26 |
+
# Optional: Levenshtein for better scoring
|
27 |
+
try:
|
28 |
+
import Levenshtein
|
29 |
+
_have_lev = True
|
30 |
+
except Exception:
|
31 |
+
_have_lev = False
|
32 |
+
|
33 |
+
APP_TITLE = "ESPeak — AI Grammar & Speech Assistant"
|
34 |
+
|
35 |
+
# ----------------------
|
36 |
+
# Utilities
|
37 |
+
# ----------------------
|
38 |
+
def levenshtein_distance(a: str, b: str) -> int:
|
39 |
+
if _have_lev:
|
40 |
+
return Levenshtein.distance(a, b)
|
41 |
+
# fallback simple DP (O(len(a)*len(b))) — fine for short sentences
|
42 |
+
la, lb = len(a), len(b)
|
43 |
+
if la == 0: return lb
|
44 |
+
if lb == 0: return la
|
45 |
+
dp = [[0]*(lb+1) for _ in range(la+1)]
|
46 |
+
for i in range(la+1):
|
47 |
+
dp[i][0] = i
|
48 |
+
for j in range(lb+1):
|
49 |
+
dp[0][j] = j
|
50 |
+
for i in range(1, la+1):
|
51 |
+
for j in range(1, lb+1):
|
52 |
+
cost = 0 if a[i-1]==b[j-1] else 1
|
53 |
+
dp[i][j] = min(dp[i-1][j]+1, dp[i][j-1]+1, dp[i-1][j-1]+cost)
|
54 |
+
return dp[la][lb]
|
55 |
+
|
56 |
+
def score_from_edit(orig: str, corrected: str) -> int:
|
57 |
+
# Compute a simple score: smaller edit distance -> higher score
|
58 |
+
if not orig.strip():
|
59 |
+
return 0
|
60 |
+
dist = levenshtein_distance(orig, corrected)
|
61 |
+
# Normalize by length, clamp to [0,100]
|
62 |
+
norm = max(len(orig), 1)
|
63 |
+
ratio = max(0.0, 1.0 - dist / norm)
|
64 |
+
score = int(round(ratio * 100))
|
65 |
+
return score
|
66 |
+
|
67 |
+
# ----------------------
|
68 |
+
# Model loading
|
69 |
+
# ----------------------
|
70 |
+
tokenizer = model = happy_tt = None
|
71 |
+
if not USE_OPENAI and LOCAL_MODEL_AVAILABLE:
|
72 |
+
def load_local_models():
|
73 |
+
global tokenizer, model, happy_tt
|
74 |
+
model_name = "prithivida/grammar_error_correcter_v1"
|
75 |
+
try:
|
76 |
+
tokenizer = AutoTokenizer.from_pretrained(model_name)
|
77 |
+
model = AutoModelForSeq2SeqLM.from_pretrained(model_name)
|
78 |
+
happy_tt = HappyTextToText("T5", model_name)
|
79 |
+
except Exception as e:
|
80 |
+
print("Local model load failed:", e)
|
81 |
+
raise
|
82 |
+
load_local_models()
|
83 |
+
|
84 |
+
# ----------------------
|
85 |
+
# Speech transcription helper (speech_recognition)
|
86 |
+
# ----------------------
|
87 |
+
def transcribe_audio_file(audio_filepath: str) -> str:
|
88 |
+
r = sr.Recognizer()
|
89 |
+
try:
|
90 |
+
with sr.AudioFile(audio_filepath) as source:
|
91 |
+
audio_data = r.record(source)
|
92 |
+
text = r.recognize_google(audio_data)
|
93 |
+
return text
|
94 |
+
except sr.UnknownValueError:
|
95 |
+
return ""
|
96 |
+
except Exception as e:
|
97 |
+
return f"[transcription_error]: {str(e)}"
|
98 |
+
|
99 |
+
# ----------------------
|
100 |
+
# LLM connectors
|
101 |
+
# ----------------------
|
102 |
+
OPENAI_PROMPT_SYSTEM = (
|
103 |
+
"You are ESPeak Assistant — expert grammar corrector. "
|
104 |
+
"Return JSON only with keys: corrected_text (string), score (0-100 integer), explanation (short string)."
|
105 |
+
)
|
106 |
+
|
107 |
+
OPENAI_USER_TEMPLATE = (
|
108 |
+
"Correct this sentence for grammar, punctuation, and clarity while preserving tone:\n\n"
|
109 |
+
"### INPUT\n{input_text}\n\n"
|
110 |
+
"Return only JSON with corrected_text, score, and explanation."
|
111 |
+
)
|
112 |
+
|
113 |
+
def call_openai_correct(text: str) -> Tuple[str,int,str]:
|
114 |
+
messages = [
|
115 |
+
{"role":"system", "content": OPENAI_PROMPT_SYSTEM},
|
116 |
+
{"role":"user", "content": OPENAI_USER_TEMPLATE.format(input_text=text)}
|
117 |
+
]
|
118 |
+
resp = openai.ChatCompletion.create(
|
119 |
+
model="gpt-4o-mini" if "gpt-4o-mini" in openai.Model.list() else "gpt-4",
|
120 |
+
messages=messages,
|
121 |
+
temperature=0.0,
|
122 |
+
max_tokens=300
|
123 |
+
)
|
124 |
+
content = resp["choices"][0]["message"]["content"].strip()
|
125 |
+
# Try to parse JSON from response
|
126 |
+
try:
|
127 |
+
parsed = json.loads(content)
|
128 |
+
corrected = parsed.get("corrected_text", "")
|
129 |
+
score = int(parsed.get("score", score_from_edit(text, corrected)))
|
130 |
+
explanation = parsed.get("explanation", "")
|
131 |
+
return corrected, score, explanation
|
132 |
+
except Exception:
|
133 |
+
# fallback: use raw content — try to extract a JSON substring
|
134 |
+
try:
|
135 |
+
start = content.index("{")
|
136 |
+
end = content.rindex("}")+1
|
137 |
+
data = json.loads(content[start:end])
|
138 |
+
corrected = data.get("corrected_text","")
|
139 |
+
score = int(data.get("score", score_from_edit(text, corrected)))
|
140 |
+
explanation = data.get("explanation","")
|
141 |
+
return corrected, score, explanation
|
142 |
+
except Exception:
|
143 |
+
# Last resort: return plain corrected via model-less heuristic
|
144 |
+
corrected = content
|
145 |
+
score = score_from_edit(text, corrected)
|
146 |
+
explanation = "Auto-correction from OpenAI; parsing fallback used."
|
147 |
+
return corrected, score, explanation
|
148 |
+
|
149 |
+
def call_local_correct(text: str) -> Tuple[str,int,str]:
|
150 |
+
# Using prithivida T5 model and HappyTransformer to generate correction
|
151 |
+
prefix = "gec: " + text
|
152 |
+
# generate with transformers (fast)
|
153 |
+
try:
|
154 |
+
inputs = tokenizer.encode(prefix, return_tensors="pt", max_length=256, truncation=True)
|
155 |
+
with torch.no_grad():
|
156 |
+
outputs = model.generate(inputs, max_length=256, num_beams=4)
|
157 |
+
corrected = tokenizer.decode(outputs[0], skip_special_tokens=True)
|
158 |
+
except Exception:
|
159 |
+
corrected = text
|
160 |
+
|
161 |
+
# use happy transformer to also generate explanation-like correction (best-effort)
|
162 |
+
try:
|
163 |
+
args = TTSettings(num_beams=4, min_length=1)
|
164 |
+
happy_out = happy_tt.generate_text(prefix, args=args).text
|
165 |
+
# If happy returns something meaningful, prefer it to compute score
|
166 |
+
alt_correction = happy_out or corrected
|
167 |
+
except Exception:
|
168 |
+
alt_correction = corrected
|
169 |
+
|
170 |
+
score = score_from_edit(text, alt_correction)
|
171 |
+
# Basic explanation: detect what changed (very short)
|
172 |
+
explanation = []
|
173 |
+
if text.strip() == alt_correction.strip():
|
174 |
+
explanation = ["No change needed."]
|
175 |
+
else:
|
176 |
+
explanation = ["Adjusted grammar/punctuation; minor wording edits to improve clarity."]
|
177 |
+
return alt_correction, score, "; ".join(explanation)
|
178 |
+
|
179 |
+
# ----------------------
|
180 |
+
# Main processing function
|
181 |
+
# ----------------------
|
182 |
+
def process_input(audio, typed_text, use_tts=False, prefer_openai=False):
|
183 |
+
"""
|
184 |
+
audio: filepath from Gradio (or None)
|
185 |
+
typed_text: str
|
186 |
+
use_tts: bool -> read corrected text with local pyttsx3
|
187 |
+
prefer_openai: triage flag to prefer OpenAI (if key available)
|
188 |
+
"""
|
189 |
+
source_text = ""
|
190 |
+
# 1) Transcribe audio if present
|
191 |
+
if audio:
|
192 |
+
transcribed = transcribe_audio_file(audio)
|
193 |
+
if transcribed.startswith("[transcription_error]"):
|
194 |
+
source_text = typed_text or ""
|
195 |
+
trans_msg = transcribed
|
196 |
+
else:
|
197 |
+
source_text = transcribed
|
198 |
+
trans_msg = f"Transcribed: {transcribed}"
|
199 |
+
else:
|
200 |
+
source_text = typed_text or ""
|
201 |
+
trans_msg = "Typed input"
|
202 |
+
|
203 |
+
if not source_text.strip():
|
204 |
+
return "No input detected.", 0, "No correction (empty input).", trans_msg, json.dumps({})
|
205 |
+
|
206 |
+
# 2) Choose backend
|
207 |
+
use_openai_backend = False
|
208 |
+
if USE_OPENAI and prefer_openai:
|
209 |
+
use_openai_backend = True
|
210 |
+
elif USE_OPENAI and not LOCAL_MODEL_AVAILABLE:
|
211 |
+
use_openai_backend = True
|
212 |
+
elif not USE_OPENAI and LOCAL_MODEL_AVAILABLE:
|
213 |
+
use_openai_backend = False
|
214 |
+
elif USE_OPENAI and LOCAL_MODEL_AVAILABLE:
|
215 |
+
# default: prefer OpenAI if available (more robust), unless user opts out
|
216 |
+
use_openai_backend = prefer_openai or True
|
217 |
+
|
218 |
+
try:
|
219 |
+
if use_openai_backend:
|
220 |
+
corrected, score, explanation = call_openai_correct(source_text)
|
221 |
+
else:
|
222 |
+
corrected, score, explanation = call_local_correct(source_text)
|
223 |
+
except Exception as e:
|
224 |
+
# fallback to local heuristic if something fails
|
225 |
+
corrected = source_text
|
226 |
+
score = 0
|
227 |
+
explanation = f"Model error: {e}"
|
228 |
+
|
229 |
+
# 3) Optionally speak corrected text (pyttsx3)
|
230 |
+
tts_msg = ""
|
231 |
+
if use_tts:
|
232 |
+
try:
|
233 |
+
def speak(text):
|
234 |
+
engine = pyttsx3.init()
|
235 |
+
engine.say(text)
|
236 |
+
engine.runAndWait()
|
237 |
+
threading.Thread(target=speak, args=(corrected,), daemon=True).start()
|
238 |
+
tts_msg = "Speaking corrected text..."
|
239 |
+
except Exception as e:
|
240 |
+
tts_msg = f"TTS failed: {e}"
|
241 |
+
|
242 |
+
# 4) Build JSON metadata
|
243 |
+
meta = {
|
244 |
+
"original": source_text,
|
245 |
+
"corrected": corrected,
|
246 |
+
"score": score,
|
247 |
+
"explanation": explanation,
|
248 |
+
"backend": "openai" if use_openai_backend else "local",
|
249 |
+
"transcription_note": trans_msg,
|
250 |
+
"timestamp": int(time.time())
|
251 |
+
}
|
252 |
+
|
253 |
+
return corrected, score, explanation, trans_msg + (" • " + tts_msg if tts_msg else ""), json.dumps(meta, ensure_ascii=False, indent=2)
|
254 |
+
|
255 |
+
# ----------------------
|
256 |
+
# Gradio UI
|
257 |
+
# ----------------------
|
258 |
+
def build_ui():
|
259 |
+
with gr.Blocks(title=APP_TITLE, css="""
|
260 |
+
.header {background: linear-gradient(90deg,#ff8fa3,#ff6aa3); padding: 18px; border-radius: 12px; color:white}
|
261 |
+
.muted {color: #6b7280}
|
262 |
+
""") as demo:
|
263 |
+
# Header
|
264 |
+
with gr.Row(elem_id="top-row"):
|
265 |
+
with gr.Column(scale=3):
|
266 |
+
gr.Markdown(f"## <div class='header'>ESPeak — AI Grammar & Speech Assistant</div>")
|
267 |
+
gr.Markdown("Speak or type a sentence — ESPeak will correct grammar, score it, and explain changes. Use OpenAI backend if you set `OPENAI_API_KEY` in environment.")
|
268 |
+
with gr.Column(scale=1):
|
269 |
+
gr.Markdown("**Quick tips**\n- Speak clearly (short sentences work best)\n- Toggle TTS to hear the corrected sentence\n- Use `Prefer OpenAI` to route to ChatGPT if available")
|
270 |
+
gr.Markdown("---")
|
271 |
+
|
272 |
+
with gr.Row():
|
273 |
+
with gr.Column(scale=1):
|
274 |
+
audio = gr.Audio(sources="microphone", type="filepath", label="Record (microphone)")
|
275 |
+
typed = gr.Textbox(lines=3, placeholder="Or type your sentence here...", label="Text input")
|
276 |
+
with gr.Row():
|
277 |
+
tts_checkbox = gr.Checkbox(label="Play corrected (TTS)", value=False)
|
278 |
+
prefer_openai = gr.Checkbox(label="Prefer OpenAI backend (if available)", value=True)
|
279 |
+
run_btn = gr.Button("Check Grammar", variant="primary")
|
280 |
+
with gr.Column(scale=2):
|
281 |
+
corrected_out = gr.Textbox(label="Corrected Text", interactive=False)
|
282 |
+
score_out = gr.Number(label="Grammar Score (0-100)", interactive=False)
|
283 |
+
explanation_out = gr.Textbox(label="Explanation (what I changed)", interactive=False)
|
284 |
+
trans_note = gr.Textbox(label="Transcription / Info", interactive=False)
|
285 |
+
meta_out = gr.Code(label="JSON metadata (copyable)", language="json")
|
286 |
+
|
287 |
+
def on_submit(audio_file, typed_text, use_tts, use_openai):
|
288 |
+
return process_input(audio_file, typed_text, use_tts, use_openai)
|
289 |
+
|
290 |
+
run_btn.click(on_submit, inputs=[audio, typed, tts_checkbox, prefer_openai],
|
291 |
+
outputs=[corrected_out, score_out, explanation_out, trans_note, meta_out])
|
292 |
+
|
293 |
+
gr.Markdown("---")
|
294 |
+
gr.Markdown("**ESPeak** · Built for quick grammar checking of spoken and typed English. Designed for demos and interview projects.")
|
295 |
+
return demo
|
296 |
+
|
297 |
+
if __name__ == "__main__":
|
298 |
+
demo = build_ui()
|
299 |
+
demo.launch(share=False, inbrowser=True)
|