Spaces:

rajeshthangaraj1
/

testgen

Runtime error

App Files Files Community

rajeshthangaraj1 commited on Oct 24, 2024

Commit

d3218e7

verified ·

1 Parent(s): 36186cd

Update app.py

Browse files

Files changed (1) hide show

app.py +49 -16

app.py CHANGED Viewed

@@ -1,23 +1,56 @@
 import gradio as gr
-from transformers import pipeline
-# Load the GPT-2 model
-generator = pipeline('text-generation', model='gpt2')
-def generate_text(prompt):
-    # Generate text based on the input prompt
-    results = generator(prompt, max_length=100, num_return_sequences=1)
-    return results[0]['generated_text']
 # Set up the Gradio interface
-# Updated to use gr.Textbox directly instead of gr.inputs.Textbox
-interface = gr.Interface(
-    fn=generate_text,
-    inputs=gr.Textbox(lines=2, placeholder="Type something here..."),
-    outputs='text',
-    title="Simple Generative AI",
-    description="Type in a prompt and get a continuation from GPT-2!"
 )
-if __name__ == "__main__":
-    interface.launch()

+import os
+import torch
+from transformers import AutoModelForCausalLM, AutoTokenizer, pipeline,BitsAndBytesConfig
 import gradio as gr
+from google.colab import userdata
+# Set up the model and tokenizer
+MODEL_ID = "microsoft/Phi-3.5-mini-instruct"
+tokenizer = AutoTokenizer.from_pretrained(MODEL_ID)
+os.environ['HF_TOKEN']=os.environ.get('HF_TOKEN')
+os.environ['HUGGINGFACEHUB_API_TOKEN']=os.environ.get('HF_TOKEN')
+# Configure quantization for CPU
+quantization_config = BitsAndBytesConfig(
+    load_in_4bit=True,
+    bnb_4bit_compute_dtype=torch.bfloat16,
+    bnb_4bit_use_double_quant=True,
+    bnb_4bit_quant_type="nf4"
+)
+# Load the model with quantization
+model = AutoModelForCausalLM.from_pretrained(
+    MODEL_ID,
+    torch_dtype=torch.bfloat16,
+    device_map="auto",
+    quantization_config=quantization_config
+)
+# Set the device to CPU
+device = "cpu"
+# model.to(device)
+# Define the function for the Gradio interface
+def chat_with_phi(message):
+    conversation = [{"role": "user", "content": message}]
+    pipe = pipeline(
+        "text-generation",
+        model=model,
+        tokenizer=tokenizer,
+    )
+    response = pipe(conversation)
+    return response[0]['generated_text']
 # Set up the Gradio interface
+app = gr.Interface(
+    fn=chat_with_phi,
+    inputs=gr.Textbox(label="Type your message:"),
+    outputs=gr.Textbox(label="Phi 3.5 Responds:"),
+    title="Phi 3.5 Text Chat",
+    description="Chat with Phi 3.5 model. Ask anything!",
+    theme="huggingface"
 )
+# Launch the app
+app.launch(debug=True)