Spaces:

amiguel
/

translateEn2FR

Sleeping

App Files Files Community

amiguel commited on Apr 11

Commit

9a4c5ac

verified ·

1 Parent(s): 36256aa

Update app.py

Browse files

Files changed (1) hide show

app.py +97 -54

app.py CHANGED Viewed

@@ -1,10 +1,13 @@
 import streamlit as st
-from transformers import AutoModelForSeq2SeqLM, AutoTokenizer, TextStreamer
-from huggingface_hub import login
-import PyPDF2
-import pandas as pd
 import torch
 import time
 # Device setup
 DEVICE = "cuda" if torch.cuda.is_available() else "cpu"
@@ -17,7 +20,7 @@ st.set_page_config(
 )
 # Model name
-MODEL_NAME = "amiguel/custom-en2fr-transformer-v1" #"Helsinki-NLP/opus-mt-en-fr"
 # Title with rocket emojis
 st.title("🚀 English to French Translator 🚀")
@@ -60,9 +63,9 @@ def process_file(uploaded_file):
         st.error(f"📄 Error processing file: {str(e)}")
         return ""
-# Model loading function
 @st.cache_resource
-def load_model(hf_token):
     try:
         if not hf_token:
             st.error("🔐 Authentication required! Please provide a Hugging Face token.")
@@ -76,49 +79,86 @@ def load_model(hf_token):
             token=hf_token
         )
-        # Load the model with appropriate dtype for CPU/GPU compatibility
-        dtype = torch.float16 if DEVICE == "cuda" else torch.float32
-        model = AutoModelForSeq2SeqLM.from_pretrained(
             MODEL_NAME,
-            token=hf_token,
-            torch_dtype=dtype,
-            device_map="auto"  # Automatically maps to CPU or GPU
-        )
-        return model, tokenizer
     except Exception as e:
         st.error(f"🤖 Model loading failed: {str(e)}")
         return None
-# Generation function for translation with streaming
-def generate_translation(input_text, model, tokenizer):
     try:
-        # Tokenize the input (no prompt needed for seq2seq translation models)
-        inputs = tokenizer(input_text, return_tensors="pt", padding=True, truncation=True, max_length=512)
-        inputs = inputs.to(DEVICE)
-        # Set up the streamer for real-time output
-        streamer = TextStreamer(tokenizer, skip_special_tokens=True)
-        # Generate translation with streaming (disable beam search)
         model.eval()
-        with torch.no_grad():
-            outputs = model.generate(
-                input_ids=inputs["input_ids"],
-                attention_mask=inputs["attention_mask"],
-                max_length=512,
-                num_beams=1,  # Set to 1 to disable beam search for streaming
-                length_penalty=1.0,
-                early_stopping=True,
-                streamer=streamer,
-                return_dict_in_generate=True,
-                output_scores=True
-            )
-        # Decode the full output for storage and metrics
-        translation = tokenizer.decode(outputs.sequences[0], skip_special_tokens=True)
-        return translation, streamer
     except Exception as e:
         raise Exception(f"Generation error: {str(e)}")
@@ -139,17 +179,22 @@ if prompt := st.chat_input("Enter text to translate into French..."):
         st.error("🔑 Authentication required!")
         st.stop()
-    # Load model if not already loaded
     if "model" not in st.session_state:
-        model_data = load_model(hf_token)
         if model_data is None:
             st.error("Failed to load model. Please check your token and try again.")
             st.stop()
-        st.session_state.model, st.session_state.tokenizer = model_data
     model = st.session_state.model
     tokenizer = st.session_state.tokenizer
     # Add user message
     with st.chat_message("user", avatar=USER_AVATAR):
@@ -170,21 +215,19 @@ if prompt := st.chat_input("Enter text to translate into French..."):
                 response_container = st.empty()
                 full_response = ""
-                # Generate translation and stream output
-                translation, streamer = generate_translation(input_text, model, tokenizer)
-                # Streamlit will automatically display the streamed output via the TextStreamer
-                # Collect the full response for metrics and storage
-                full_response = translation
-                # Update the placeholder with the final response
-                response_container.markdown(full_response)
                 # Calculate performance metrics
                 end_time = time.time()
                 input_tokens = len(tokenizer(input_text)["input_ids"])
                 output_tokens = len(tokenizer(full_response)["input_ids"])
-                speed = output_tokens / (end_time - start_time)
                 # Calculate costs (hypothetical pricing model)
                 input_cost = (input_tokens / 1000000) * 5  # $5 per million input tokens

 import streamlit as st
 import torch
+import pandas as pd
+import PyPDF2
+import pickle
+import os
+from transformers import AutoTokenizer
+from huggingface_hub import login
 import time
+from utils.ch09util import subsequent_mask  # Ensure ch09util.py is available
 # Device setup
 DEVICE = "cuda" if torch.cuda.is_available() else "cpu"
 )
 # Model name
+MODEL_NAME = "amiguel/custom-en2fr-transformer-v1"
 # Title with rocket emojis
 st.title("🚀 English to French Translator 🚀")
         st.error(f"📄 Error processing file: {str(e)}")
         return ""
+# Custom model loading function
 @st.cache_resource
+def load_model_and_resources(hf_token):
     try:
         if not hf_token:
             st.error("🔐 Authentication required! Please provide a Hugging Face token.")
             token=hf_token
         )
+        # Load model
+        from transformers import PreTrainedModel, PretrainedConfig
+        class TransformerConfig(PretrainedConfig):
+            model_type = "custom_transformer"
+            def __init__(self, src_vocab_size, tgt_vocab_size, d_model=256, d_ff=1024, h=8, N=6, dropout=0.1, **kwargs):
+                super().__init__(**kwargs)
+                self.src_vocab_size = src_vocab_size
+                self.tgt_vocab_size = tgt_vocab_size
+                self.d_model = d_model
+                self.d_ff = d_ff
+                self.h = h
+                self.N = N
+                self.dropout = dropout
+        class CustomTransformer(PreTrainedModel):
+            config_class = TransformerConfig
+            def __init__(self, config):
+                super().__init__(config)
+                from utils.ch09util import create_model
+                self.model = create_model(
+                    config.src_vocab_size,
+                    config.tgt_vocab_size,
+                    N=config.N,
+                    d_model=config.d_model,
+                    d_ff=config.d_ff,
+                    h=config.h,
+                    dropout=config.dropout
+                )
+            def forward(self, src, tgt, src_mask, tgt_mask, **kwargs):
+                return self.model(src, tgt, src_mask, tgt_mask)
+        config = TransformerConfig.from_pretrained(MODEL_NAME, token=hf_token)
+        model = CustomTransformer.from_pretrained(
             MODEL_NAME,
+            config=config,
+            token=hf_token
+        ).to(DEVICE)
+        # Load dictionaries (assumes dict.p was uploaded to the model repo)
+        dict_path = "dict.p"
+        if not os.path.exists(dict_path):
+            st.error("Dictionary file (dict.p) not found. Please ensure it was uploaded to the model repository.")
+            return None
+        with open(dict_path, "rb") as fb:
+            en_word_dict, en_idx_dict, fr_word_dict, fr_idx_dict = pickle.load(fb)
+        return model, tokenizer, en_word_dict, fr_word_dict, en_idx_dict, fr_idx_dict
     except Exception as e:
         st.error(f"🤖 Model loading failed: {str(e)}")
         return None
+# Custom streaming generation function
+def custom_streaming_generate(input_text, model, tokenizer, en_word_dict, fr_word_dict, fr_idx_dict):
     try:
         model.eval()
+        PAD, UNK = 0, 1
+        tokenized_en = ["BOS"] + tokenizer.tokenize(input_text) + ["EOS"]
+        enidx = [en_word_dict.get(i, UNK) for i in tokenized_en]
+        src = torch.tensor(enidx).long().to(DEVICE).unsqueeze(0)
+        src_mask = (src != 0).unsqueeze(-2)
+        memory = model.model.encode(src, src_mask)
+        start_symbol = fr_word_dict["BOS"]
+        ys = torch.ones(1, 1).fill_(start_symbol).type_as(src.data)
+        for _ in range(100):
+            out = model.model.decode(memory, src_mask, ys, subsequent_mask(ys.size(1)).type_as(src.data))
+            prob = model.model.generator(out[:, -1])
+            _, next_word = torch.max(prob, dim=1)
+            next_word = next_word.data[0]
+            sym = fr_idx_dict.get(next_word, "UNK")
+            if sym != "EOS":
+                token = sym.replace("</w>", " ")
+                for x in '''?:;.,'("-!&)%''':
+                    token = token.replace(f" {x}", f"{x}")
+                yield token
+            else:
+                break
+            ys = torch.cat([ys, torch.ones(1, 1).type_as(src.data).fill_(next_word)], dim=1)
+        # Yield a final empty token to ensure completion
+        yield ""
     except Exception as e:
         raise Exception(f"Generation error: {str(e)}")
         st.error("🔑 Authentication required!")
         st.stop()
+    # Load model and resources if not already loaded
     if "model" not in st.session_state:
+        model_data = load_model_and_resources(hf_token)
         if model_data is None:
             st.error("Failed to load model. Please check your token and try again.")
             st.stop()
+        st.session_state.model, st.session_state.tokenizer, \
+        st.session_state.en_word_dict, st.session_state.fr_word_dict, \
+        st.session_state.en_idx_dict, st.session_state.fr_idx_dict = model_data
     model = st.session_state.model
     tokenizer = st.session_state.tokenizer
+    en_word_dict = st.session_state.en_word_dict
+    fr_word_dict = st.session_state.fr_word_dict
+    fr_idx_dict = st.session_state.fr_idx_dict
     # Add user message
     with st.chat_message("user", avatar=USER_AVATAR):
                 response_container = st.empty()
                 full_response = ""
+                # Stream translation tokens
+                for token in custom_streaming_generate(
+                    input_text, model, tokenizer, en_word_dict, fr_word_dict, fr_idx_dict
+                ):
+                    if token:  # Only append non-empty tokens
+                        full_response += token
+                        response_container.markdown(full_response)
                 # Calculate performance metrics
                 end_time = time.time()
                 input_tokens = len(tokenizer(input_text)["input_ids"])
                 output_tokens = len(tokenizer(full_response)["input_ids"])
+                speed = output_tokens / (end_time - start_time) if (end_time - start_time) > 0 else 0
                 # Calculate costs (hypothetical pricing model)
                 input_cost = (input_tokens / 1000000) * 5  # $5 per million input tokens