Spaces:
Sleeping
Sleeping
import torch | |
from transformers import pipeline, AutoTokenizer, AutoModelForCausalLM | |
from diffusers import StableDiffusionPipeline | |
from TTS.api import TTS | |
import gradio as gr | |
from PIL import Image | |
# Load Text Generation Model (phi-2) | |
tokenizer = AutoTokenizer.from_pretrained("microsoft/phi-2") | |
text_model = AutoModelForCausalLM.from_pretrained("microsoft/phi-2", torch_dtype=torch.float16 if torch.cuda.is_available() else torch.float32) | |
device = torch.device("cuda" if torch.cuda.is_available() else "cpu") | |
text_model.to(device) | |
def generate_text(prompt): | |
inputs = tokenizer(prompt, return_tensors="pt").to(device) | |
outputs = text_model.generate(**inputs, max_length=10000) | |
return tokenizer.decode(outputs[0], skip_special_tokens=True) | |
# Load Image Generation Model (Stable Diffusion) | |
pipe = StableDiffusionPipeline.from_pretrained( | |
"runwayml/stable-diffusion-v1-5", | |
torch_dtype=torch.float16 if torch.cuda.is_available() else torch.float32, | |
revision="fp16" if torch.cuda.is_available() else None, | |
) | |
pipe = pipe.to(device) | |
def generate_image(prompt): | |
image = pipe(prompt).images[0] | |
return image | |
# Load TTS Model (Coqui TTS) | |
tts = TTS(model_name="tts_models/en/ljspeech/tacotron2-DDC", progress_bar=False) | |
def generate_audio(prompt): | |
output_path = "output.wav" | |
tts.tts_to_file(text=prompt, file_path=output_path) | |
return output_path | |
# Gradio Interface | |
with gr.Blocks(title="Charriot_v2") as demo: | |
gr.Markdown("# π€ Charriot_v2: Text, Image, and Audio Generator") | |
with gr.Tab("π Text Generation"): | |
text_input = gr.Textbox(label="Enter Prompt") | |
text_output = gr.Textbox(label="Generated Text") | |
text_btn = gr.Button("Generate Text") | |
text_btn.click(fn=generate_text, inputs=text_input, outputs=text_output) | |
with gr.Tab("π¨ Image Generation"): | |
img_prompt = gr.Textbox(label="Enter Prompt") | |
img_output = gr.Image(label="Generated Image") | |
img_btn = gr.Button("Generate Image") | |
img_btn.click(fn=generate_image, inputs=img_prompt, outputs=img_output) | |
with gr.Tab("π Audio Generation"): | |
audio_prompt = gr.Textbox(label="Enter Prompt") | |
audio_output = gr.Audio(label="Generated Audio") | |
audio_btn = gr.Button("Generate Audio") | |
audio_btn.click(fn=generate_audio, inputs=audio_prompt, outputs=audio_output) | |
demo.launch() | |