FastAPIMT

Running

TiberiuCristianLeon commited on 27 days ago

Commit

aaba0bf

verified ·

1 Parent(s): d550028

Update src/translate/Translate.py

Files changed (1) hide show

src/translate/Translate.py CHANGED Viewed

@@ -78,6 +78,8 @@ def gemma_direct(requestValue: str, model: str = 'Gargaz/gemma-2b-romanian-bette
     # limit max_new_tokens to 150% of the requestValue
     prompt = f"Translate this text to Romanian: {requestValue}"
     input_ids = tokenizer.encode(request_value, add_special_tokens=True)
     num_tokens = len(input_ids)
     # Estimate output length (e.g., 50% longer)
@@ -85,8 +87,6 @@ def gemma_direct(requestValue: str, model: str = 'Gargaz/gemma-2b-romanian-bette
     max_new_tokens += max_new_tokens % 2  # ensure it's even
     messages = [{"role": "user", "content": prompt}]
-    tokenizer = AutoTokenizer.from_pretrained("Gargaz/gemma-2b-romanian-better")
-    model = AutoModelForCausalLM.from_pretrained("Gargaz/gemma-2b-romanian-better").to(device)
     inputs = tokenizer.apply_chat_template(
         messages,

     # limit max_new_tokens to 150% of the requestValue
     prompt = f"Translate this text to Romanian: {requestValue}"
+    tokenizer = AutoTokenizer.from_pretrained("Gargaz/gemma-2b-romanian-better")
+    model = AutoModelForCausalLM.from_pretrained("Gargaz/gemma-2b-romanian-better").to(device)
     input_ids = tokenizer.encode(request_value, add_special_tokens=True)
     num_tokens = len(input_ids)
     # Estimate output length (e.g., 50% longer)
     max_new_tokens += max_new_tokens % 2  # ensure it's even
     messages = [{"role": "user", "content": prompt}]
     inputs = tokenizer.apply_chat_template(
         messages,