import streamlit as st import os import requests import io from PIL import Image api_token = os.environ.get("api_token") API_URL_WH = "https://api-inference.huggingface.co/models/openai/whisper-large-v3" API_URL = "https://api-inference.huggingface.co/models/ehristoforu/dalle-3-xl" headers = {"Authorization": f"Bearer {api_token}"} st.title("Realtime Text2Image Voice") type = st.selectbox( 'Choose input type', ("Text", "Voice") ) def imquery(payload): response = requests.post(API_URL, headers=headers, json=payload) return response.content def generate_image(prompt): image_bytes = imquery({ "inputs": prompt, }) return image_bytes if type == "Text": prompt = st.text_input("Enter prompt") if prompt: out = imquery(prompt) st.image(out) else: prompt = st.file_uploader("Paste your audiofile", type=["mp3", "m4a", "wav"]) if prompt: def query(filename): with open(filename, "rb") as f: data = f.read() response = requests.post(API_URL_WH, headers=headers, data=data) return response.json() output = query(prompt) imoo = generate_image(output) st.image(imoo)