kevalfst commited on
Commit
556d852
·
verified ·
1 Parent(s): 39c1d2f

Update app.py

Browse files
Files changed (1) hide show
  1. app.py +47 -40
app.py CHANGED
@@ -1,42 +1,49 @@
1
- import torch
2
- from diffusers import StableDiffusionPipeline
3
  import gradio as gr
4
-
5
- device = "cuda" if torch.cuda.is_available() else "cpu"
6
-
7
- # Load Stable Diffusion v1.4 from Hugging Face
8
- pipe = StableDiffusionPipeline.from_pretrained(
9
- "CompVis/stable-diffusion-v1-4",
10
- torch_dtype=torch.float16 if device == "cuda" else torch.float32,
11
- use_safetensors=True
12
- )
13
- pipe = pipe.to(device)
14
-
15
- # Inference function
16
- def generate(prompt, guidance, steps, width, height):
17
- image = pipe(prompt=prompt, guidance_scale=guidance, num_inference_steps=steps, height=height, width=width).images[0]
18
- return image
19
-
20
- # Gradio UI
21
- title = "🎨 Offline Text-to-Image Generator (Stable Diffusion v1.4)"
22
- description = "Generate images from text prompts using a fully self-hosted Stable Diffusion model."
23
-
24
- with gr.Blocks(theme=gr.themes.Soft(primary_hue="blue", secondary_hue="pink")) as demo:
25
- gr.Markdown(f"# {title}")
26
- gr.Markdown(description)
27
-
 
 
 
 
 
 
 
 
28
  with gr.Row():
29
- with gr.Column():
30
- prompt = gr.Textbox(label="Enter your prompt", placeholder="A steampunk dragon flying over a futuristic city")
31
- guidance = gr.Slider(1, 20, value=7.5, step=0.5, label="Guidance Scale")
32
- steps = gr.Slider(10, 100, value=30, step=5, label="Inference Steps")
33
- width = gr.Slider(256, 768, value=512, step=64, label="Image Width")
34
- height = gr.Slider(256, 768, value=512, step=64, label="Image Height")
35
- submit = gr.Button("Generate Image")
36
-
37
- with gr.Column():
38
- output = gr.Image(label="Generated Image")
39
-
40
- submit.click(fn=generate, inputs=[prompt, guidance, steps, width, height], outputs=output)
41
-
42
- demo.launch()
 
 
 
 
1
  import gradio as gr
2
+ import torch
3
+ from transformers import AutoModelForCausalLM, AutoTokenizer
4
+ import os
5
+
6
+ # Set model and tokenizer
7
+ model_name = "Qwen/Qwen2.5-Omni-3B"
8
+ tokenizer = AutoTokenizer.from_pretrained(model_name)
9
+ model = AutoModelForCausalLM.from_pretrained(model_name, torch_dtype=torch.bfloat16, device_map="auto")
10
+
11
+ # Function to process inputs and generate response
12
+ def process_input(text_input, image_input=None, audio_input=None):
13
+ inputs = {"text": text_input}
14
+ if image_input:
15
+ inputs["image"] = image_input
16
+ if audio_input:
17
+ inputs["audio"] = audio_input
18
+
19
+ # Tokenize inputs (simplified for demo)
20
+ input_ids = tokenizer.encode(inputs["text"], return_tensors="pt").to(model.device)
21
+
22
+ # Generate response
23
+ outputs = model.generate(input_ids, max_length=200)
24
+ response_text = tokenizer.decode(outputs[0], skip_special_tokens=True)
25
+
26
+ # Placeholder for speech generation (requires additional setup)
27
+ response_audio = None # Implement speech generation if needed
28
+
29
+ return response_text, response_audio
30
+
31
+ # Gradio interface
32
+ with gr.Blocks() as demo:
33
+ gr.Markdown("# Qwen2.5-Omni-3B Demo")
34
  with gr.Row():
35
+ text_input = gr.Textbox(label="Text Input")
36
+ image_input = gr.Image(label="Upload Image")
37
+ audio_input = gr.Audio(label="Upload Audio")
38
+ submit_button = gr.Button("Submit")
39
+ text_output = gr.Textbox(label="Text Response")
40
+ audio_output = gr.Audio(label="Audio Response")
41
+
42
+ submit_button.click(
43
+ fn=process_input,
44
+ inputs=[text_input, image_input, audio_input],
45
+ outputs=[text_output, audio_output]
46
+ )
47
+
48
+ # Launch the app
49
+ demo.launch()