Spaces:

kgourgou
/

llm-decoders

Running

App Files Files Community

kgourgou commited on Feb 12

Commit

e1724e4

verified ·

1 Parent(s): bf63770

Update app.py

Browse files

Files changed (1) hide show

app.py +44 -21

app.py CHANGED Viewed

@@ -1,43 +1,66 @@
 import gradio as gr
 from transformers import AutoTokenizer, AutoModelForCausalLM
-# Load your model (using GPT-2 as an example)
 model_name = "gpt2"
 tokenizer = AutoTokenizer.from_pretrained(model_name)
 model = AutoModelForCausalLM.from_pretrained(model_name)
-def generate_completions(prompt):
-    # Define decoding strategies with corresponding parameters
-    strategies = {
-        "Greedy": {"do_sample": False},
-        "Beam Search": {"num_beams": 5, "early_stopping": True},
-        "Top-k Sampling": {"do_sample": True, "top_k": 50},
-        "Top-p Sampling": {"do_sample": True, "top_p": 0.95}
     }
-    results = {}
-    input_ids = tokenizer.encode(prompt, return_tensors="pt")
-    for strategy, params in strategies.items():
-        # Generate output using the specific strategy
-        output_ids = model.generate(input_ids, max_length=50, **params)
-        output_text = tokenizer.decode(output_ids[0], skip_special_tokens=True)
-        results[strategy] = output_text
-    return results["Greedy"], results["Beam Search"], results["Top-k Sampling"], results["Top-p Sampling"]
-# Define the Gradio interface using the updated API
 interface = gr.Interface(
-    fn=generate_completions,
     inputs=gr.Textbox(lines=3, placeholder="Enter your prompt here...", label="Prompt"),
     outputs=[
         gr.Textbox(label="Greedy"),
-        gr.Textbox(label="Beam Search"),
         gr.Textbox(label="Top-k Sampling"),
         gr.Textbox(label="Top-p Sampling"),
     ],
-    title="LLM Decoding Strategies Comparison",
-    description="Enter a prompt to see how different decoding strategies affect the output of a language model."
 )
 if __name__ == "__main__":

 import gradio as gr
+import torch
+import concurrent.futures
 from transformers import AutoTokenizer, AutoModelForCausalLM
+# Load your model and tokenizer (using GPT-2 as an example)
 model_name = "gpt2"
 tokenizer = AutoTokenizer.from_pretrained(model_name)
 model = AutoModelForCausalLM.from_pretrained(model_name)
+def generate_completion(prompt, strategy, params):
+    """Generate a complete answer using the specified decoding strategy."""
+    input_ids = tokenizer.encode(prompt, return_tensors="pt")
+    # Adjust generation parameters as needed.
+    output_ids = model.generate(input_ids, max_length=50, **params)
+    return tokenizer.decode(output_ids[0], skip_special_tokens=True)
+def generate_all(prompt):
+    # Define decoding strategies and their corresponding parameters.
+    methods = {
+        "Greedy": {"params": {"do_sample": False}},
+        "Top-k Sampling": {"params": {"do_sample": True, "top_k": 50}},
+        "Top-p Sampling": {"params": {"do_sample": True, "top_p": 0.95}},
+        "Beam Search": {"params": {"num_beams": 5, "early_stopping": True}},
     }
+    # This list defines the order in which results are displayed.
+    method_order = ["Greedy", "Top-k Sampling", "Top-p Sampling", "Beam Search"]
+    # Dictionary to store the final answer for each method (initially None)
+    results = {method: None for method in methods}
+    # Yield an initial state so the UI shows placeholders.
+    yield tuple("Processing..." for _ in method_order)
+    # Use ThreadPoolExecutor to run each generation concurrently.
+    with concurrent.futures.ThreadPoolExecutor() as executor:
+        future_to_method = {
+            executor.submit(generate_completion, prompt, method, methods[method]["params"]): method
+            for method in methods
+        }
+        # As soon as a method finishes, update its result and yield the current state.
+        for future in concurrent.futures.as_completed(future_to_method):
+            method = future_to_method[future]
+            try:
+                result = future.result()
+            except Exception as exc:
+                result = f"Error: {exc}"
+            results[method] = result
+            # Yield the results in the specified order; methods still processing show "Processing..."
+            yield tuple(results[m] if results[m] is not None else "Processing..." for m in method_order)
+# Create a Gradio interface that uses the generator function.
 interface = gr.Interface(
+    fn=generate_all,
     inputs=gr.Textbox(lines=3, placeholder="Enter your prompt here...", label="Prompt"),
     outputs=[
         gr.Textbox(label="Greedy"),
         gr.Textbox(label="Top-k Sampling"),
         gr.Textbox(label="Top-p Sampling"),
+        gr.Textbox(label="Beam Search"),
     ],
+    title="Decoding Methods Results",
+    description="Each decoding method's complete answer is printed as soon as it's done."
 )
 if __name__ == "__main__":