gaur3009 commited on
Commit
f85af8e
·
verified ·
1 Parent(s): 36b167c

Create app.py

Browse files
Files changed (1) hide show
  1. app.py +299 -0
app.py ADDED
@@ -0,0 +1,299 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ # espeak.py
2
+ import os
3
+ import json
4
+ import time
5
+ import gradio as gr
6
+ import speech_recognition as sr
7
+ import pyttsx3
8
+ import threading
9
+
10
+ from typing import Tuple
11
+
12
+ # Try importing OpenAI; if not present app will use local model fallback
13
+ USE_OPENAI = bool(os.getenv("OPENAI_API_KEY", "").strip())
14
+ if USE_OPENAI:
15
+ import openai
16
+
17
+ # Local model fallback (T5-based)
18
+ try:
19
+ import torch
20
+ from transformers import AutoTokenizer, AutoModelForSeq2SeqLM
21
+ from happytransformer import HappyTextToText, TTSettings
22
+ LOCAL_MODEL_AVAILABLE = True
23
+ except Exception:
24
+ LOCAL_MODEL_AVAILABLE = False
25
+
26
+ # Optional: Levenshtein for better scoring
27
+ try:
28
+ import Levenshtein
29
+ _have_lev = True
30
+ except Exception:
31
+ _have_lev = False
32
+
33
+ APP_TITLE = "ESPeak — AI Grammar & Speech Assistant"
34
+
35
+ # ----------------------
36
+ # Utilities
37
+ # ----------------------
38
+ def levenshtein_distance(a: str, b: str) -> int:
39
+ if _have_lev:
40
+ return Levenshtein.distance(a, b)
41
+ # fallback simple DP (O(len(a)*len(b))) — fine for short sentences
42
+ la, lb = len(a), len(b)
43
+ if la == 0: return lb
44
+ if lb == 0: return la
45
+ dp = [[0]*(lb+1) for _ in range(la+1)]
46
+ for i in range(la+1):
47
+ dp[i][0] = i
48
+ for j in range(lb+1):
49
+ dp[0][j] = j
50
+ for i in range(1, la+1):
51
+ for j in range(1, lb+1):
52
+ cost = 0 if a[i-1]==b[j-1] else 1
53
+ dp[i][j] = min(dp[i-1][j]+1, dp[i][j-1]+1, dp[i-1][j-1]+cost)
54
+ return dp[la][lb]
55
+
56
+ def score_from_edit(orig: str, corrected: str) -> int:
57
+ # Compute a simple score: smaller edit distance -> higher score
58
+ if not orig.strip():
59
+ return 0
60
+ dist = levenshtein_distance(orig, corrected)
61
+ # Normalize by length, clamp to [0,100]
62
+ norm = max(len(orig), 1)
63
+ ratio = max(0.0, 1.0 - dist / norm)
64
+ score = int(round(ratio * 100))
65
+ return score
66
+
67
+ # ----------------------
68
+ # Model loading
69
+ # ----------------------
70
+ tokenizer = model = happy_tt = None
71
+ if not USE_OPENAI and LOCAL_MODEL_AVAILABLE:
72
+ def load_local_models():
73
+ global tokenizer, model, happy_tt
74
+ model_name = "prithivida/grammar_error_correcter_v1"
75
+ try:
76
+ tokenizer = AutoTokenizer.from_pretrained(model_name)
77
+ model = AutoModelForSeq2SeqLM.from_pretrained(model_name)
78
+ happy_tt = HappyTextToText("T5", model_name)
79
+ except Exception as e:
80
+ print("Local model load failed:", e)
81
+ raise
82
+ load_local_models()
83
+
84
+ # ----------------------
85
+ # Speech transcription helper (speech_recognition)
86
+ # ----------------------
87
+ def transcribe_audio_file(audio_filepath: str) -> str:
88
+ r = sr.Recognizer()
89
+ try:
90
+ with sr.AudioFile(audio_filepath) as source:
91
+ audio_data = r.record(source)
92
+ text = r.recognize_google(audio_data)
93
+ return text
94
+ except sr.UnknownValueError:
95
+ return ""
96
+ except Exception as e:
97
+ return f"[transcription_error]: {str(e)}"
98
+
99
+ # ----------------------
100
+ # LLM connectors
101
+ # ----------------------
102
+ OPENAI_PROMPT_SYSTEM = (
103
+ "You are ESPeak Assistant — expert grammar corrector. "
104
+ "Return JSON only with keys: corrected_text (string), score (0-100 integer), explanation (short string)."
105
+ )
106
+
107
+ OPENAI_USER_TEMPLATE = (
108
+ "Correct this sentence for grammar, punctuation, and clarity while preserving tone:\n\n"
109
+ "### INPUT\n{input_text}\n\n"
110
+ "Return only JSON with corrected_text, score, and explanation."
111
+ )
112
+
113
+ def call_openai_correct(text: str) -> Tuple[str,int,str]:
114
+ messages = [
115
+ {"role":"system", "content": OPENAI_PROMPT_SYSTEM},
116
+ {"role":"user", "content": OPENAI_USER_TEMPLATE.format(input_text=text)}
117
+ ]
118
+ resp = openai.ChatCompletion.create(
119
+ model="gpt-4o-mini" if "gpt-4o-mini" in openai.Model.list() else "gpt-4",
120
+ messages=messages,
121
+ temperature=0.0,
122
+ max_tokens=300
123
+ )
124
+ content = resp["choices"][0]["message"]["content"].strip()
125
+ # Try to parse JSON from response
126
+ try:
127
+ parsed = json.loads(content)
128
+ corrected = parsed.get("corrected_text", "")
129
+ score = int(parsed.get("score", score_from_edit(text, corrected)))
130
+ explanation = parsed.get("explanation", "")
131
+ return corrected, score, explanation
132
+ except Exception:
133
+ # fallback: use raw content — try to extract a JSON substring
134
+ try:
135
+ start = content.index("{")
136
+ end = content.rindex("}")+1
137
+ data = json.loads(content[start:end])
138
+ corrected = data.get("corrected_text","")
139
+ score = int(data.get("score", score_from_edit(text, corrected)))
140
+ explanation = data.get("explanation","")
141
+ return corrected, score, explanation
142
+ except Exception:
143
+ # Last resort: return plain corrected via model-less heuristic
144
+ corrected = content
145
+ score = score_from_edit(text, corrected)
146
+ explanation = "Auto-correction from OpenAI; parsing fallback used."
147
+ return corrected, score, explanation
148
+
149
+ def call_local_correct(text: str) -> Tuple[str,int,str]:
150
+ # Using prithivida T5 model and HappyTransformer to generate correction
151
+ prefix = "gec: " + text
152
+ # generate with transformers (fast)
153
+ try:
154
+ inputs = tokenizer.encode(prefix, return_tensors="pt", max_length=256, truncation=True)
155
+ with torch.no_grad():
156
+ outputs = model.generate(inputs, max_length=256, num_beams=4)
157
+ corrected = tokenizer.decode(outputs[0], skip_special_tokens=True)
158
+ except Exception:
159
+ corrected = text
160
+
161
+ # use happy transformer to also generate explanation-like correction (best-effort)
162
+ try:
163
+ args = TTSettings(num_beams=4, min_length=1)
164
+ happy_out = happy_tt.generate_text(prefix, args=args).text
165
+ # If happy returns something meaningful, prefer it to compute score
166
+ alt_correction = happy_out or corrected
167
+ except Exception:
168
+ alt_correction = corrected
169
+
170
+ score = score_from_edit(text, alt_correction)
171
+ # Basic explanation: detect what changed (very short)
172
+ explanation = []
173
+ if text.strip() == alt_correction.strip():
174
+ explanation = ["No change needed."]
175
+ else:
176
+ explanation = ["Adjusted grammar/punctuation; minor wording edits to improve clarity."]
177
+ return alt_correction, score, "; ".join(explanation)
178
+
179
+ # ----------------------
180
+ # Main processing function
181
+ # ----------------------
182
+ def process_input(audio, typed_text, use_tts=False, prefer_openai=False):
183
+ """
184
+ audio: filepath from Gradio (or None)
185
+ typed_text: str
186
+ use_tts: bool -> read corrected text with local pyttsx3
187
+ prefer_openai: triage flag to prefer OpenAI (if key available)
188
+ """
189
+ source_text = ""
190
+ # 1) Transcribe audio if present
191
+ if audio:
192
+ transcribed = transcribe_audio_file(audio)
193
+ if transcribed.startswith("[transcription_error]"):
194
+ source_text = typed_text or ""
195
+ trans_msg = transcribed
196
+ else:
197
+ source_text = transcribed
198
+ trans_msg = f"Transcribed: {transcribed}"
199
+ else:
200
+ source_text = typed_text or ""
201
+ trans_msg = "Typed input"
202
+
203
+ if not source_text.strip():
204
+ return "No input detected.", 0, "No correction (empty input).", trans_msg, json.dumps({})
205
+
206
+ # 2) Choose backend
207
+ use_openai_backend = False
208
+ if USE_OPENAI and prefer_openai:
209
+ use_openai_backend = True
210
+ elif USE_OPENAI and not LOCAL_MODEL_AVAILABLE:
211
+ use_openai_backend = True
212
+ elif not USE_OPENAI and LOCAL_MODEL_AVAILABLE:
213
+ use_openai_backend = False
214
+ elif USE_OPENAI and LOCAL_MODEL_AVAILABLE:
215
+ # default: prefer OpenAI if available (more robust), unless user opts out
216
+ use_openai_backend = prefer_openai or True
217
+
218
+ try:
219
+ if use_openai_backend:
220
+ corrected, score, explanation = call_openai_correct(source_text)
221
+ else:
222
+ corrected, score, explanation = call_local_correct(source_text)
223
+ except Exception as e:
224
+ # fallback to local heuristic if something fails
225
+ corrected = source_text
226
+ score = 0
227
+ explanation = f"Model error: {e}"
228
+
229
+ # 3) Optionally speak corrected text (pyttsx3)
230
+ tts_msg = ""
231
+ if use_tts:
232
+ try:
233
+ def speak(text):
234
+ engine = pyttsx3.init()
235
+ engine.say(text)
236
+ engine.runAndWait()
237
+ threading.Thread(target=speak, args=(corrected,), daemon=True).start()
238
+ tts_msg = "Speaking corrected text..."
239
+ except Exception as e:
240
+ tts_msg = f"TTS failed: {e}"
241
+
242
+ # 4) Build JSON metadata
243
+ meta = {
244
+ "original": source_text,
245
+ "corrected": corrected,
246
+ "score": score,
247
+ "explanation": explanation,
248
+ "backend": "openai" if use_openai_backend else "local",
249
+ "transcription_note": trans_msg,
250
+ "timestamp": int(time.time())
251
+ }
252
+
253
+ return corrected, score, explanation, trans_msg + (" • " + tts_msg if tts_msg else ""), json.dumps(meta, ensure_ascii=False, indent=2)
254
+
255
+ # ----------------------
256
+ # Gradio UI
257
+ # ----------------------
258
+ def build_ui():
259
+ with gr.Blocks(title=APP_TITLE, css="""
260
+ .header {background: linear-gradient(90deg,#ff8fa3,#ff6aa3); padding: 18px; border-radius: 12px; color:white}
261
+ .muted {color: #6b7280}
262
+ """) as demo:
263
+ # Header
264
+ with gr.Row(elem_id="top-row"):
265
+ with gr.Column(scale=3):
266
+ gr.Markdown(f"## <div class='header'>ESPeak — AI Grammar & Speech Assistant</div>")
267
+ gr.Markdown("Speak or type a sentence — ESPeak will correct grammar, score it, and explain changes. Use OpenAI backend if you set `OPENAI_API_KEY` in environment.")
268
+ with gr.Column(scale=1):
269
+ gr.Markdown("**Quick tips**\n- Speak clearly (short sentences work best)\n- Toggle TTS to hear the corrected sentence\n- Use `Prefer OpenAI` to route to ChatGPT if available")
270
+ gr.Markdown("---")
271
+
272
+ with gr.Row():
273
+ with gr.Column(scale=1):
274
+ audio = gr.Audio(sources="microphone", type="filepath", label="Record (microphone)")
275
+ typed = gr.Textbox(lines=3, placeholder="Or type your sentence here...", label="Text input")
276
+ with gr.Row():
277
+ tts_checkbox = gr.Checkbox(label="Play corrected (TTS)", value=False)
278
+ prefer_openai = gr.Checkbox(label="Prefer OpenAI backend (if available)", value=True)
279
+ run_btn = gr.Button("Check Grammar", variant="primary")
280
+ with gr.Column(scale=2):
281
+ corrected_out = gr.Textbox(label="Corrected Text", interactive=False)
282
+ score_out = gr.Number(label="Grammar Score (0-100)", interactive=False)
283
+ explanation_out = gr.Textbox(label="Explanation (what I changed)", interactive=False)
284
+ trans_note = gr.Textbox(label="Transcription / Info", interactive=False)
285
+ meta_out = gr.Code(label="JSON metadata (copyable)", language="json")
286
+
287
+ def on_submit(audio_file, typed_text, use_tts, use_openai):
288
+ return process_input(audio_file, typed_text, use_tts, use_openai)
289
+
290
+ run_btn.click(on_submit, inputs=[audio, typed, tts_checkbox, prefer_openai],
291
+ outputs=[corrected_out, score_out, explanation_out, trans_note, meta_out])
292
+
293
+ gr.Markdown("---")
294
+ gr.Markdown("**ESPeak** · Built for quick grammar checking of spoken and typed English. Designed for demos and interview projects.")
295
+ return demo
296
+
297
+ if __name__ == "__main__":
298
+ demo = build_ui()
299
+ demo.launch(share=False, inbrowser=True)