Spaces:

BSC-LT
/

SalamandraTA-7B-Demo

Running on Zero

App Files Files Community

javi8979 commited on Mar 21

Commit

3e11881

verified ·

1 Parent(s): 43dc82a

Update app.py

Browse files

Files changed (1) hide show

app.py +67 -159

app.py CHANGED Viewed

@@ -1,170 +1,78 @@
 import gradio as gr
-from datetime import datetime
-from transformers import AutoTokenizer, AutoModelForCausalLM
 import torch
-# ------------------------
-# 1) Load the Model
-# ------------------------
 model_id = "BSC-LT/salamandraTA-7b-instruct"
 tokenizer = AutoTokenizer.from_pretrained(model_id)
 model = AutoModelForCausalLM.from_pretrained(
     model_id,
     device_map="auto",
-    torch_dtype=torch.bfloat16
 )
-# Common function to generate text using transformers
-def call_model(prompt: str, max_tokens: int = 256):
-    message = [{"role": "user", "content": prompt}]
-    date_string = datetime.today().strftime('%Y-%m-%d')
-    chat_prompt = tokenizer.apply_chat_template(
-        message,
-        tokenize=False,
-        add_generation_prompt=True,
-        date_string=date_string
-    )
-    inputs = tokenizer.encode(chat_prompt, return_tensors="pt").to(model.device)
-    input_length = inputs.shape[1]
-    outputs = model.generate(
-        input_ids=inputs,
-        max_new_tokens=max_tokens,
-        do_sample=True,
-        num_beams=5,
-        early_stopping=True
-    )
-    return tokenizer.decode(outputs[0, input_length:], skip_special_tokens=True)
-# ------------------------
-# 2) Task-specific functions
-# ------------------------
-def general_translation(source_lang, target_lang, text):
-    prompt = (
-        f"Translate the following text from {source_lang} into {target_lang}.\n"
-        f"{source_lang}: {text}\n"
-        f"{target_lang}:"
-    )
-    return call_model(prompt)
-def post_editing(source_lang, target_lang, source_text, machine_translation):
-    prompt = (
-        f"Please fix any mistakes in the following {source_lang}-{target_lang} machine translation or keep it unedited if it's correct.\n"
-        f"Source: {source_text}\n"
-        f"MT: {machine_translation}\n"
-        f"Corrected:"
-    )
-    return call_model(prompt, temperature=0.1)
-def document_level_translation(source_lang, target_lang, document_text):
-    prompt = (
-        f"Please translate this text from {source_lang} into {target_lang}.\n"
-        f"{source_lang}: {document_text}\n"
-        f"{target_lang}:"
-    )
-    return call_model(prompt)
-def named_entity_recognition(tokenized_text):
-    tokens = tokenized_text.strip().split()
-    prompt = (
-        "Analyse the following tokenized text and mark the tokens containing named entities.\n"
-        "Use the following annotation guidelines with these tags for named entities:\n"
-        "- ORG (Refers to named groups or organizations)\n"
-        "- PER (Refers to individual people or named groups of people)\n"
-        "- LOC (Refers to physical places or natural landmarks)\n"
-        "- MISC (Refers to entities that don't fit into standard categories).\n"
-        "Prepend B- to the first token of a given entity and I- to the remaining ones if they exist.\n"
-        "If a token is not a named entity, label it as O.\n"
-        f"Input: {tokens}\n"
-        "Marked:"
-    )
-    return call_model(prompt)
-def grammar_checker(source_lang, sentence):
-    prompt = (
-        f"Please fix any mistakes in the following {source_lang} sentence or keep it unedited if it's correct.\n"
-        f"Sentence: {sentence}\n"
-        f"Corrected:"
-    )
-    return call_model(prompt)
-# ------------------------
-# 3) Gradio UI
-# ------------------------
-with gr.Blocks() as demo:
-    gr.Markdown("## SalamandraTA-7B-Instruct Demo")
-    gr.Markdown(
-        "This Gradio app demonstrates various use-cases for the **SalamandraTA-7B-Instruct** model, including:\n"
-        "1. General Translation\n"
-        "2. Post-editing\n"
-        "3. Document-level Translation\n"
-        "4. Named-Entity Recognition (NER)\n"
-        "5. Grammar Checking"
-    )
-    with gr.Tab("1. General Translation"):
-        gr.Markdown("### General Translation")
-        src_lang_gt = gr.Textbox(label="Source Language", value="Spanish")
-        tgt_lang_gt = gr.Textbox(label="Target Language", value="English")
-        text_gt = gr.Textbox(label="Text to Translate", lines=4, value="Ayer se fue, tomó sus cosas y se puso a navegar.")
-        translate_button = gr.Button("Translate")
-        output_gt = gr.Textbox(label="Translation Output", lines=4)
-        translate_button.click(fn=general_translation,
-                               inputs=[src_lang_gt, tgt_lang_gt, text_gt],
-                               outputs=output_gt)
-    with gr.Tab("2. Post-editing"):
-        gr.Markdown("### Post-editing (Source → Target)")
-        src_lang_pe = gr.Textbox(label="Source Language", value="Catalan")
-        tgt_lang_pe = gr.Textbox(label="Target Language", value="English")
-        source_text_pe = gr.Textbox(label="Source Text", lines=2, value="Rafael Nadal i Maria Magdalena van inspirar a una generació sencera.")
-        mt_text_pe = gr.Textbox(label="Machine Translation", lines=2, value="Rafael Christmas and Maria the Muffin inspired an entire generation each in their own way.")
-        post_edit_button = gr.Button("Post-edit")
-        output_pe = gr.Textbox(label="Post-edited Text", lines=4)
-        post_edit_button.click(fn=post_editing,
-                               inputs=[src_lang_pe, tgt_lang_pe, source_text_pe, mt_text_pe],
-                               outputs=output_pe)
-    with gr.Tab("3. Document-level Translation"):
-        gr.Markdown("### Document-level Translation")
-        src_lang_doc = gr.Textbox(label="Source Language", value="English")
-        tgt_lang_doc = gr.Textbox(label="Target Language", value="Asturian")
-        doc_text = gr.Textbox(label="Document Text (multiple paragraphs allowed)",
-                              lines=8,
-                              value=("President Donald Trump, who campaigned on promises to crack down on illegal immigration, "
-                                     "has raised alarms in the U.S. dairy industry with his threat to impose 25% tariffs on Mexico "
-                                     "and Canada by February 2025."))
-        doc_button = gr.Button("Translate Document")
-        doc_output = gr.Textbox(label="Document-level Translation Output", lines=8)
-        doc_button.click(fn=document_level_translation,
-                         inputs=[src_lang_doc, tgt_lang_doc, doc_text],
-                         outputs=doc_output)
-    with gr.Tab("4. Named-Entity Recognition"):
-        gr.Markdown("### Named-Entity Recognition (NER)")
-        text_ner = gr.Textbox(
-            label="Tokenized Text (space-separated tokens)",
-            lines=2,
-            value="La defensa del antiguo responsable de la RFEF confirma que interpondrá un recurso."
         )
-        ner_button = gr.Button("Run NER")
-        ner_output = gr.Textbox(label="NER Output", lines=6)
-        ner_button.click(fn=named_entity_recognition,
-                         inputs=[text_ner],
-                         outputs=ner_output)
-    with gr.Tab("5. Grammar Checker"):
-        gr.Markdown("### Grammar Checker")
-        src_lang_gc = gr.Textbox(label="Source Language", value="Catalan")
-        text_gc = gr.Textbox(label="Sentence to Check",
-                             lines=2,
-                             value="Entonses, el meu jefe m’ha dit que he de treballar els fins de setmana.")
-        gc_button = gr.Button("Check Grammar")
-        gc_output = gr.Textbox(label="Corrected Sentence", lines=2)
-        gc_button.click(fn=grammar_checker,
-                        inputs=[src_lang_gc, text_gc],
-                        outputs=gc_output)
-demo.launch()

 import gradio as gr
+import spaces
 import torch
+from transformers import AutoTokenizer, AutoModelForCausalLM
+from datetime import datetime
 model_id = "BSC-LT/salamandraTA-7b-instruct"
+# Load tokenizer and model
 tokenizer = AutoTokenizer.from_pretrained(model_id)
 model = AutoModelForCausalLM.from_pretrained(
     model_id,
     device_map="auto",
+    torch_dtype=torch.bfloat16  # Usa bf16 como en el ejemplo original
 )
+languages = [ "Spanish", "Catalan", "English", "French", "German", "Italian", "Portuguese", "Euskera", "Galician",
+             "Bulgarian", "Czech", "Lithuanian", "Croatian", "Dutch", "Romanian", "Danish", "Greek", "Finnish",
+             "Hungarian", "Slovak", "Slovenian", "Estonian", "Polish", "Latvian", "Swedish", "Maltese",
+             "Irish", "Aranese", "Aragonese", "Asturian" ]
+example_sentence = ["Ahir se'n va anar, va agafar les seves coses i es va posar a navegar."]
+@spaces.GPU(duration=120)
+def translate(input_text, source, target):
+    sentences = [s for s in input_text.strip().split('\n') if s.strip()]
+    translated_sentences = []
+    for sentence in sentences:
+        prompt_text = f"Translate the following text from {source} into {target}.\n{source}: {sentence} \n{target}:"
+        messages = [{"role": "user", "content": prompt_text}]
+        date_string = datetime.today().strftime('%Y-%m-%d')
+        prompt = tokenizer.apply_chat_template(
+            messages,
+            tokenize=False,
+            add_generation_prompt=True,
+            date_string=date_string
+        )
+        inputs = tokenizer(prompt, return_tensors="pt", add_special_tokens=False).to(model.device)
+        input_length = inputs.input_ids.shape[1]
+        output = model.generate(
+            input_ids=inputs.input_ids,
+            max_new_tokens=400,
+            early_stopping=True,
+            num_beams=5
         )
+        decoded = tokenizer.decode(output[0, input_length:], skip_special_tokens=True).strip()
+        translated_sentences.append(decoded)
+    return '\n'.join(translated_sentences), ""
+with gr.Blocks() as demo:
+    gr.HTML("""<html>
+  <head><style>h1 { text-align: center; }</style></head>
+  <body><h1>SalamandraTA 7B Translate</h1></body>
+</html>""")
+    with gr.Row():
+        with gr.Column():
+            source_language_dropdown = gr.Dropdown(choices=languages, value="Catalan", label="Source Language")
+            input_textbox = gr.Textbox(lines=5, placeholder="Enter text to translate", label="Input Text")
+        with gr.Column():
+            target_language_dropdown = gr.Dropdown(choices=languages, value="English", label="Target Language")
+            translated_textbox = gr.Textbox(lines=5, placeholder="", label="Translated Text")
+    info_label = gr.HTML("")
+    btn = gr.Button("Translate")
+    btn.click(translate, inputs=[input_textbox, source_language_dropdown, target_language_dropdown],
+              outputs=[translated_textbox, info_label])
+    gr.Examples(example_sentence, inputs=[input_textbox])
+if __name__ == "__main__":
+    demo.launch()