Spaces:

nvidia
/

Plan2Align-NV

Paused

App Files Files Community

KuangDW commited on Apr 15

Commit

946f7f8

1 Parent(s): dd05f29

specify local llm

Browse files

Files changed (2) hide show

app.py +30 -17
vecalign/plan2align.py +31 -134

app.py CHANGED Viewed

@@ -9,14 +9,14 @@ from openai import OpenAI
 from vecalign.plan2align import translate_text, external_find_best_translation
 from transformers import AutoTokenizer, AutoModelForCausalLM
 from trl import AutoModelForCausalLMWithValueHead
-from huggingface_hub import login
 import spacy
 import subprocess
 import pkg_resources
 import sys
 laser_token = os.environ.get("align_enc")
-laser_path = snapshot_download(repo_id="KuangDW/laser", use_auth_token=hf_token)
 os.environ["LASER"] = laser_path
 def check_and_install(package, required_version):
@@ -54,21 +54,35 @@ except OSError:
     download("zh_core_web_sm")
     subprocess.check_call([sys.executable, "-m", "pip", "install", "numpy==1.24.0", "--force-reinstall"])
-# ---------- deepinfra translation ----------
-openai = OpenAI(
-    api_key="",
-    base_url="https://api.deepinfra.com/v1/openai",
 )
 def generate_translation(system_prompt, prompt):
-    response = openai.chat.completions.create(
-        model="meta-llama/Meta-Llama-3.1-8B-Instruct",
-        messages=[
-            {"role": "system", "content": system_prompt},
-            {"role": "user", "content": prompt}
-        ]
-    )
-    translation = response.choices[0].message.content.strip()
     return translation
 def check_token_length(text, max_tokens=1024):
@@ -188,7 +202,7 @@ def mpc_translation(text, src_language, target_language, iterations, session_id)
         best_score = score
     return current_trans, best_score
-# ---------- Gradio 主流程函數 ----------
 def process_text(text, src_language, target_language, max_iterations_value, threshold_value,
                  good_ref_contexts_num_value, translation_methods, state):
@@ -202,7 +216,6 @@ def process_text(text, src_language, target_language, max_iterations_value, thre
       4. MPC 翻譯
     """
-    # 初始化各輸出內容
     orig_output = ""
     plan2align_output = ""
     best_of_n_output = ""
@@ -214,7 +227,7 @@ def process_text(text, src_language, target_language, max_iterations_value, thre
         orig_output = f"{orig}\n\nScore: {best_score:.2f}"
     if "Plan2Align" in translation_methods:
         plan2align_trans, best_score = plan2align_translate_text(
-            text, session_id, src_language, target_language,
             max_iterations_value, threshold_value, good_ref_contexts_num_value, "metricx"
         )
         plan2align_output = f"{plan2align_trans}\n\nScore: {best_score:.2f}"

 from vecalign.plan2align import translate_text, external_find_best_translation
 from transformers import AutoTokenizer, AutoModelForCausalLM
 from trl import AutoModelForCausalLMWithValueHead
+from huggingface_hub import login, HfApi, snapshot_download
 import spacy
 import subprocess
 import pkg_resources
 import sys
 laser_token = os.environ.get("align_enc")
+laser_path = snapshot_download(repo_id="KuangDW/laser", use_auth_token=laser_token)
 os.environ["LASER"] = laser_path
 def check_and_install(package, required_version):
     download("zh_core_web_sm")
     subprocess.check_call([sys.executable, "-m", "pip", "install", "numpy==1.24.0", "--force-reinstall"])
+# ---------- translation function ----------
+# Initialize device
+device = torch.device("cuda:0" if torch.cuda.is_available() else "cpu")
+print(f"Using device: {device}")
+# Load models once
+print("Loading models...")
+model_id = "meta-llama/Meta-Llama-3.1-8B-Instruct"
+tokenizer = AutoTokenizer.from_pretrained(model_id)
+model = AutoModelForCausalLM.from_pretrained(
+    model_id,
+    device_map="auto",
+    torch_dtype=torch.float16
 )
 def generate_translation(system_prompt, prompt):
+    messages=[
+        {"role": "system", "content": system_prompt},
+        {"role": "user", "content": prompt}
+    ]
+    inputs = tokenizer.apply_chat_template(messages, return_tensors="pt").to(device)
+    outputs = model.generate(
+            inputs,
+            max_new_tokens=512,
+            temperature=0.7,
+            top_p=0.9,
+            do_sample=True
+        )
+    translation = tokenizer.decode(outputs[0][inputs.shape[1]:], skip_special_tokens=True)
     return translation
 def check_token_length(text, max_tokens=1024):
         best_score = score
     return current_trans, best_score
+# ---------- Gradio function ----------
 def process_text(text, src_language, target_language, max_iterations_value, threshold_value,
                  good_ref_contexts_num_value, translation_methods, state):
       4. MPC 翻譯
     """
     orig_output = ""
     plan2align_output = ""
     best_of_n_output = ""
         orig_output = f"{orig}\n\nScore: {best_score:.2f}"
     if "Plan2Align" in translation_methods:
         plan2align_trans, best_score = plan2align_translate_text(
+            text, session_id, model, tokenizer, device, src_language, target_language,
             max_iterations_value, threshold_value, good_ref_contexts_num_value, "metricx"
         )
         plan2align_output = f"{plan2align_trans}\n\nScore: {best_score:.2f}"

vecalign/plan2align.py CHANGED Viewed

@@ -28,12 +28,6 @@ lang_map = {
     "Chinese": ("zh", "zh_core_web_sm")
 }
-openai = OpenAI(
-    api_key="",
-    base_url="https://api.deepinfra.com/v1/openai",
-)
-MODEL_NAME= "google/gemma-2-9b-it" # "meta-llama/Meta-Llama-3.1-8B-Instruct"
 ################################# folder / file processing #################################
 def clear_folder(folder_path):
@@ -180,7 +174,7 @@ def external_find_best_translation(evals, language, session_id):
 ################################# generating translation #################################
-def translate_with_deepinfra(source_sentence, buffer, good_sent_size, src_language, tgt_language):
     system_prompts = [
         "You are a meticulous translator. Provide a literal, word-for-word translation that preserves the structure and meaning of each individual word.",
         "You are a professional translator. Deliver a clear, formal, and precise translation that faithfully conveys the original meaning.",
@@ -227,14 +221,19 @@ def translate_with_deepinfra(source_sentence, buffer, good_sent_size, src_langua
     translations = []
     for prompt in system_prompts:
-        response = openai.chat.completions.create(
-            model=MODEL_NAME,
-            messages=[
-                {"role": "system", "content": prompt},
-                {"role": "user", "content": context_prompt}
-            ]
         )
-        translation = response.choices[0].message.content.strip()
         print("--------------------------------------------------------------------------------")
         print("\n rollout translation: \n")
@@ -264,7 +263,7 @@ def process_buffer_sentences(source_sentences, buffer):
             translations.append(translation_map[src_sent][0])
     return translations
-def final_translate_with_deepinfra(source_sentence, source_segments, buffer, src_language, tgt_language):
     translations = process_buffer_sentences(source_segments, buffer)
     initial_translation = "\n".join(translations)
@@ -286,21 +285,23 @@ def final_translate_with_deepinfra(source_sentence, source_segments, buffer, src
     print("rewrite prompt:")
     print(rewrite_prompt)
-    rewrite_response = openai.chat.completions.create(
-        model=MODEL_NAME,  # Replace with your actual model name
-        messages=[
-            {"role": "system", "content": "You are a helpful translator and only output the result."},
-            {"role": "user", "content": rewrite_prompt}
-        ]
-    )
-    translation = rewrite_response.choices[0].message.content.strip()
     return translation
 ################################# alignment functions #################################
 def save_sentences_to_txt(sentences, filename):
     i = 0
     with open(filename, "w", encoding="utf-8") as file:
@@ -558,111 +559,13 @@ def generate_windows(source, translations):
 ################################# main function #################################
-def saving_memory(buffer, index, iteration, final_translations_record):
-    """
-    Save the buffer, and final_translations_record to the Memory folder.
-    """
-    current_dir = os.path.dirname(os.path.abspath(__file__))
-    memory_folder = os.path.join(current_dir, f"{MEMORY_FOLDER}")
-    os.makedirs(memory_folder, exist_ok=True)
-    buffer_file_path = f"{MEMORY_FOLDER}/buffer_{index}_iter_{iteration}.json"
-    metadata_file_path = f"{MEMORY_FOLDER}/metadata_{index}_iter_{iteration}.json"
-    buffer_to_save = {key: list(value) for key, value in buffer.items()}
-    with open(buffer_file_path, "w", encoding="utf-8") as f:
-        json.dump(buffer_to_save, f, ensure_ascii=False, indent=4)
-    metadata = {
-        "final_translations_record": final_translations_record
-    }
-    with open(metadata_file_path, "w", encoding="utf-8") as f:
-        json.dump(metadata, f, ensure_ascii=False, indent=4)
-    print(f"Buffer saved to {buffer_file_path}")
-    print(f"Metadata saved to {metadata_file_path}")
-def process_chunk():
-    data = pd.read_csv(csv_path)
-    for index, row in data.iterrows():
-        print("::::::::::::::::::::::: index :::::::::::::::::::::::", index, " ::::::::::::::::::::::: index :::::::::::::::::::::::", )
-        buffer = defaultdict(list)
-        source_sentence = row[src_lang].replace('\n', ' ')
-        source_segments = segment_sentences_by_punctuation(source_sentence, lang=src_lang)
-        for iteration in range(max_iterations):
-            print(f"\nStarting iteration {iteration + 1}/{max_iterations}...\n")
-            if iteration in stop_memory:
-                final_translations = final_translate_with_deepinfra(source_sentence, source_segments, buffer, SRC_LANGUAGE, TASK_LANGUAGE)
-                print("Final Translation Method:")
-                print(final_translations)
-                final_translations_record = [final_translations]
-                saving_memory(buffer, index, iteration, final_translations_record)
-            if iteration == max_iterations - 1:
-                break
-            else:
-                translations = translate_with_deepinfra(source_sentence, buffer, good_ref_contexts_num+iteration, SRC_LANGUAGE, TASK_LANGUAGE)
-            src_windows, mt_windows_list = generate_windows(source_sentence, translations)
-            ####################################### Evaluate translations and update buffer #######################################
-            print("Evaluate translations and update buffer ..............")
-            # First, store all sources and candidate translations as lists.
-            src_context_list = list(src_windows)
-            candidates_list = []
-            for window_index in range(len(src_windows)):
-                candidates = [mt_windows[window_index] for mt_windows in mt_windows_list]
-                candidates_list.append(candidates)
-            # Batch evaluate all candidate translations, returning the best translation and score for each source.
-            best_candidate_results = batch_rm_find_best_translation(list(zip(src_context_list, candidates_list)), TASK_LANGUAGE)
-            print("\n Our best candidate results:")
-            print(best_candidate_results)
-            print(" ------------------------------------------------------------------------  \n")
-            print("\n===== Initial buffer state =====")
-            for src, translations in buffer.items():
-                print(f"Source '{src}': {[t[0] for t in translations]}")
-            # Update the buffer for each source.
-            for i, src in enumerate(src_context_list):
-                best_tuple = best_candidate_results[i]  # (translation, score)
-                if best_tuple[0] is not None:
-                    # If the source is not yet in the buffer, initialize it.
-                    if src not in buffer:
-                        buffer[src] = [best_tuple]
-                        print(f"[ADD] New Source '{src}' Add Translation: '{best_tuple[0]}', Score: {best_tuple[1]}")
-                    else:
-                        # Directly add the new translation to the buffer.
-                        buffer[src].append(best_tuple)
-                        print(f"[ADD] Source '{src}' Add Translation: '{best_tuple[0]}', Score: {best_tuple[1]}")
-                    # Sort by score to place the best translation (highest score) at the top.
-                    buffer[src].sort(key=lambda x: x[1], reverse=True)
-                    print(f"[UPDATE] Source '{src}' Best Translation: '{buffer[src][0][0]}'")
-            print("\n===== Final buffer state =====")
-            for src, translations in buffer.items():
-                print(f"Source '{src}': {[t[0] for t in translations]}")
-        print("Final Translation:")
-        print(final_translations)
 def get_lang_and_nlp(language):
     if language not in lang_map:
         raise ValueError(f"Unsupported language: {language}")
     lang_code, model_name = lang_map[language]
     return lang_code, spacy.load(model_name)
-def translate_text(text, session_id,
                             src_language="Japanese",
                             task_language="English",
                             max_iterations_value=3,
@@ -699,14 +602,12 @@ def translate_text(text, session_id,
     final_translations = None
     for iteration in range(max_iterations):
-        # print(f"\nStarting iteration {iteration + 1}/{max_iterations}...\n")
         if iteration in stop_memory:
-            final_translations = final_translate_with_deepinfra(source_sentence, source_segments, buffer, SRC_LANGUAGE, TASK_LANGUAGE)
-            # saving_memory(buffer, 0, iteration, [final_translations])
         if iteration == max_iterations - 1:
             break
         else:
-            translations = translate_with_deepinfra(source_sentence, buffer, good_ref_contexts_num + iteration, SRC_LANGUAGE, TASK_LANGUAGE)
         src_windows, mt_windows_list = generate_windows(source_sentence, translations)
         # print("Evaluate translations and update buffer ..............")
@@ -741,8 +642,4 @@ def translate_text(text, session_id,
     # print("Final Translation:")
     # print(final_translations)
-    return final_translations
-if __name__ == "__main__":
-    process_chunk()

     "Chinese": ("zh", "zh_core_web_sm")
 }
 ################################# folder / file processing #################################
 def clear_folder(folder_path):
 ################################# generating translation #################################
+def translate_with_deepinfra(model, tokenizer, device, source_sentence, buffer, good_sent_size, src_language, tgt_language):
     system_prompts = [
         "You are a meticulous translator. Provide a literal, word-for-word translation that preserves the structure and meaning of each individual word.",
         "You are a professional translator. Deliver a clear, formal, and precise translation that faithfully conveys the original meaning.",
     translations = []
     for prompt in system_prompts:
+        messages=[
+            {"role": "system", "content": prompt},
+            {"role": "user", "content": context_prompt}
+        ]
+        inputs = tokenizer.apply_chat_template(messages, return_tensors="pt").to(device)
+        outputs = model.generate(
+            inputs,
+            max_new_tokens=512,
+            temperature=0.7,
+            top_p=0.9,
+            do_sample=True
         )
+        translation = tokenizer.decode(outputs[0][inputs.shape[1]:], skip_special_tokens=True)
         print("--------------------------------------------------------------------------------")
         print("\n rollout translation: \n")
             translations.append(translation_map[src_sent][0])
     return translations
+def final_translate_with_deepinfra(model, tokenizer, device, source_sentence, source_segments, buffer, src_language, tgt_language):
     translations = process_buffer_sentences(source_segments, buffer)
     initial_translation = "\n".join(translations)
     print("rewrite prompt:")
     print(rewrite_prompt)
+    messages=[
+        {"role": "system", "content": "You are a helpful translator and only output the result."},
+        {"role": "user", "content": rewrite_prompt}
+    ]
+    inputs = tokenizer.apply_chat_template(messages, return_tensors="pt").to(device)
+    outputs = model.generate(
+            inputs,
+            max_new_tokens=512,
+            temperature=0.7,
+            top_p=0.9,
+            do_sample=True
+        )
+    translation = tokenizer.decode(outputs[0][inputs.shape[1]:], skip_special_tokens=True)
     return translation
 ################################# alignment functions #################################
 def save_sentences_to_txt(sentences, filename):
     i = 0
     with open(filename, "w", encoding="utf-8") as file:
 ################################# main function #################################
 def get_lang_and_nlp(language):
     if language not in lang_map:
         raise ValueError(f"Unsupported language: {language}")
     lang_code, model_name = lang_map[language]
     return lang_code, spacy.load(model_name)
+def translate_text(text, session_id, model, tokenizer, device,
                             src_language="Japanese",
                             task_language="English",
                             max_iterations_value=3,
     final_translations = None
     for iteration in range(max_iterations):
         if iteration in stop_memory:
+            final_translations = final_translate_with_deepinfra(model, tokenizer, device, source_sentence, source_segments, buffer, SRC_LANGUAGE, TASK_LANGUAGE)
         if iteration == max_iterations - 1:
             break
         else:
+            translations = translate_with_deepinfra(model, tokenizer, device, source_sentence, buffer, good_ref_contexts_num + iteration, SRC_LANGUAGE, TASK_LANGUAGE)
         src_windows, mt_windows_list = generate_windows(source_sentence, translations)
         # print("Evaluate translations and update buffer ..............")
     # print("Final Translation:")
     # print(final_translations)
+    return final_translations