Spaces:

ethzanalytics
/

gpt2-xl-conversational

Runtime error

App Files Files Community

Peter commited on May 25, 2022

Commit

0d868fb

1 Parent(s): 766eaec

add grammar correction

Browse files

Files changed (2) hide show

app.py +13 -1
grammar_improve.py +46 -1

app.py CHANGED Viewed

@@ -29,6 +29,7 @@ from grammar_improve import (
     remove_repeated_words,
     remove_trailing_punctuation,
     symspeller,
 )
 from utils import corr
@@ -77,7 +78,7 @@ def ask_gpt(
     chat_pipe,
     speaker="person alpha",
     responder="person beta",
-    max_len=196,
     top_p=0.95,
     top_k=50,
     temperature=0.6,
@@ -124,6 +125,7 @@ def ask_gpt(
         cln_resp = symspeller(rawtxt, sym_checker=schnellspell)
     elif not detect_propers(rawtxt):
         cln_resp = neuspell_correct(rawtxt, checker=ns_checker)
     else:
         # no correction needed
         cln_resp = rawtxt.strip()
@@ -152,6 +154,14 @@ def get_parser():
         default="ethzanalytics/ai-msgbot-gpt2-XL",  # default model
         help="the model to use for the chatbot on https://huggingface.co/models OR a path to a local model",
     )
     parser.add_argument(
         "--basic-sc",
         required=False,
@@ -174,6 +184,7 @@ if __name__ == "__main__":
     default_model = str(args.model)
     model_loc = Path(default_model)  # if the model is a path, use it
     basic_sc = args.basic_sc  # whether to use the baseline spellchecker
     device = 0 if torch.cuda.is_available() else -1
     print(f"CUDA avail is {torch.cuda.is_available()}")
@@ -190,6 +201,7 @@ if __name__ == "__main__":
     else:
         print("using Neuspell spell checker")
         ns_checker = load_ns_checker(fast=False)
     print(f"using model stored here: \n {model_loc} \n")
     iface = gr.Interface(

     remove_repeated_words,
     remove_trailing_punctuation,
     symspeller,
+    synthesize_grammar,
 )
 from utils import corr
     chat_pipe,
     speaker="person alpha",
     responder="person beta",
+    max_len=128,
     top_p=0.95,
     top_k=50,
     temperature=0.6,
         cln_resp = symspeller(rawtxt, sym_checker=schnellspell)
     elif not detect_propers(rawtxt):
         cln_resp = neuspell_correct(rawtxt, checker=ns_checker)
+        cln_resp = synthesize_grammar(corrector=grammarbot, message=cln_resp)
     else:
         # no correction needed
         cln_resp = rawtxt.strip()
         default="ethzanalytics/ai-msgbot-gpt2-XL",  # default model
         help="the model to use for the chatbot on https://huggingface.co/models OR a path to a local model",
     )
+    parser.add_argument(
+        "--gram-model",
+        required=False,
+        type=str,
+        default="pszemraj/t5-v1_1-base-ft-jflAUG",
+        help="text2text generation model ID from huggingface for the model to correct grammar",
+    )
     parser.add_argument(
         "--basic-sc",
         required=False,
     default_model = str(args.model)
     model_loc = Path(default_model)  # if the model is a path, use it
     basic_sc = args.basic_sc  # whether to use the baseline spellchecker
+    gram_model = str(args.gram_model)
     device = 0 if torch.cuda.is_available() else -1
     print(f"CUDA avail is {torch.cuda.is_available()}")
     else:
         print("using Neuspell spell checker")
         ns_checker = load_ns_checker(fast=False)
+        grammarbot = pipeline("'text2text-generation",gram_model, device=device)
     print(f"using model stored here: \n {model_loc} \n")
     iface = gr.Interface(

grammar_improve.py CHANGED Viewed

@@ -14,7 +14,8 @@ import time
 import re
 import sys
 from symspellpy.symspellpy import SymSpell
 from utils import suppress_stdout
@@ -108,6 +109,50 @@ def fix_punct_spacing(text: str):
     return cln_text
 """
 start of SymSpell code

 import re
 import sys
 from symspellpy.symspellpy import SymSpell
+import transformers
+from transformers import pipeline
 from utils import suppress_stdout
     return cln_text
+def synthesize_grammar(
+    corrector: transformers.pipeline,
+    message: str,
+    num_beams=4,
+    length_penalty=0.9,
+    repetition_penalty=1.5,
+    no_repeat_ngram_size=4,
+    verbose=False,
+):
+    """
+    synthesize_grammar - use a SyntaxSynthesizer model to generate a string from a message
+    Parameters
+    ----------
+    corrector : transformers.pipeline, required, which is the SyntaxSynthesizer model already loaded
+    message : str, required, which is the message to be corrected
+    num_beams : int, optional, by default 4, which is the number of beams to use for the model
+    length_penalty : float, optional, by default 0.9, which is the length penalty to use for the model
+    repetition_penalty : float, optional, by default 1.5, which is the repetition penalty to use for the model
+    no_repeat_ngram_size : int, optional, by default 4, which is the n-gram size to use for the model
+    verbose : bool, optional, by default False, which is whether to print the runtime of the model
+    Returns
+    -------
+    """
+    st = time.perf_counter()
+    input_text = clean(message, lower=False)
+    results = corrector(
+        input_text,
+        max_length=int(1.1 * len(input_text)),
+        min_length=2 if len(input_text) < 64 else int(0.2 * len(input_text)),
+        num_beams=num_beams,
+        repetition_penalty=repetition_penalty,
+        length_penalty=length_penalty,
+        no_repeat_ngram_size=no_repeat_ngram_size,
+        early_stopping=True,
+        do_sample=False,
+        clean_up_tokenization_spaces=True,
+    )
+    corrected_text = results[0]["generated_text"]
+    if verbose:
+        rt = round(time.perf_counter() - st, 2)
+        print(f"synthesizing took {rt} seconds")
+    return corrected_text.strip()
 """
 start of SymSpell code