Spaces:
Sleeping
Sleeping
File size: 2,381 Bytes
d50bc01 338367b d50bc01 338367b |
1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20 21 22 23 24 25 26 27 28 29 30 31 32 33 34 35 36 37 38 39 40 41 42 43 44 45 46 47 48 49 50 51 52 53 54 55 56 57 58 59 60 61 62 |
import torch
from transformers import pipeline, AutoTokenizer, AutoModelForCausalLM
from diffusers import StableDiffusionPipeline
from TTS.api import TTS
import gradio as gr
from PIL import Image
# Load Text Generation Model (phi-2)
tokenizer = AutoTokenizer.from_pretrained("microsoft/phi-2")
text_model = AutoModelForCausalLM.from_pretrained("microsoft/phi-2", torch_dtype=torch.float16 if torch.cuda.is_available() else torch.float32)
device = torch.device("cuda" if torch.cuda.is_available() else "cpu")
text_model.to(device)
def generate_text(prompt):
inputs = tokenizer(prompt, return_tensors="pt").to(device)
outputs = text_model.generate(**inputs, max_length=10000)
return tokenizer.decode(outputs[0], skip_special_tokens=True)
# Load Image Generation Model (Stable Diffusion)
pipe = StableDiffusionPipeline.from_pretrained(
"runwayml/stable-diffusion-v1-5",
torch_dtype=torch.float16 if torch.cuda.is_available() else torch.float32,
revision="fp16" if torch.cuda.is_available() else None,
)
pipe = pipe.to(device)
def generate_image(prompt):
image = pipe(prompt).images[0]
return image
# Load TTS Model (Coqui TTS)
tts = TTS(model_name="tts_models/en/ljspeech/tacotron2-DDC", progress_bar=False)
def generate_audio(prompt):
output_path = "output.wav"
tts.tts_to_file(text=prompt, file_path=output_path)
return output_path
# Gradio Interface
with gr.Blocks(title="Charriot_v2") as demo:
gr.Markdown("# π€ Charriot_v2: Text, Image, and Audio Generator")
with gr.Tab("π Text Generation"):
text_input = gr.Textbox(label="Enter Prompt")
text_output = gr.Textbox(label="Generated Text")
text_btn = gr.Button("Generate Text")
text_btn.click(fn=generate_text, inputs=text_input, outputs=text_output)
with gr.Tab("π¨ Image Generation"):
img_prompt = gr.Textbox(label="Enter Prompt")
img_output = gr.Image(label="Generated Image")
img_btn = gr.Button("Generate Image")
img_btn.click(fn=generate_image, inputs=img_prompt, outputs=img_output)
with gr.Tab("π Audio Generation"):
audio_prompt = gr.Textbox(label="Enter Prompt")
audio_output = gr.Audio(label="Generated Audio")
audio_btn = gr.Button("Generate Audio")
audio_btn.click(fn=generate_audio, inputs=audio_prompt, outputs=audio_output)
demo.launch()
|