ahmed-eisa commited on
Commit
c0a983b
·
1 Parent(s): cb38255

added ausio generation

Browse files
Files changed (7) hide show
  1. audio_chat_client.py +26 -0
  2. client.py +27 -0
  3. main.py +22 -3
  4. models.py +21 -1
  5. requirements.txt +2 -1
  6. schemas.py +3 -0
  7. utils.py +9 -0
audio_chat_client.py ADDED
@@ -0,0 +1,26 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ import requests
2
+ import streamlit as st
3
+ # from streamlitui import StreamlitUI
4
+
5
+ # stui = StreamlitUI(api_url="http://localhost:8000") # FastAPI backend URL
6
+ st.title("FastAPI ChatBot")
7
+
8
+ if "messages" not in st.session_state:
9
+ st.session_state.messages = []
10
+ for message in st.session_state.messages:
11
+ with st.chat_message(message["role"]):
12
+ content = message["content"]
13
+ if isinstance(content, bytes):
14
+ st.audio(content)
15
+ else:
16
+ st.markdown(content)
17
+
18
+
19
+ if prompt := st.chat_input("Write your prompt in this input field"):
20
+ response = requests.get(
21
+ f"https://ahmed-eisa-genai-service.hf.space/generate/audio", params={"prompt": prompt}
22
+ )
23
+ response.raise_for_status()
24
+ with st.chat_message("assistant"):
25
+ st.text("Here is your generated audio")
26
+ st.audio(response.content)
client.py ADDED
@@ -0,0 +1,27 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ import requests
2
+ import streamlit as st
3
+ # from streamlitui import StreamlitUI
4
+
5
+ # stui = StreamlitUI(api_url="http://localhost:8000") # FastAPI backend URL
6
+ st.title("FastAPI ChatBot")
7
+
8
+ if "messages" not in st.session_state:
9
+ st.session_state.messages = []
10
+
11
+ for message in st.session_state.messages:
12
+ with st.chat_message(message["role"]):
13
+ st.markdown(message["content"])
14
+
15
+ if prompt := st.chat_input("Write your prompt in this input field"):
16
+ st.session_state.messages.append({"role": "user", "content": prompt})
17
+
18
+ with st.chat_message("user"):
19
+ st.text(prompt)
20
+
21
+ response = requests.get(
22
+ f"https://ahmed-eisa-genai-service.hf.space//generate/text", params={"prompt": prompt}
23
+ )
24
+ response.raise_for_status()
25
+
26
+ with st.chat_message("assistant"):
27
+ st.markdown(response.text)
main.py CHANGED
@@ -1,7 +1,10 @@
1
  # main.py
2
- from fastapi import FastAPI
3
- from models import load_text_model,generate_text
4
 
 
 
 
5
  app = FastAPI()
6
 
7
  @app.get("/")
@@ -12,4 +15,20 @@ def root_controller():
12
  def serve_language_model_controller(prompt: str) -> str:
13
  pipe = load_text_model()
14
  output = generate_text(pipe, prompt)
15
- return output
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
  # main.py
2
+ from fastapi import FastAPI,status
3
+ from fastapi.responses import StreamingResponse
4
 
5
+ from models import load_text_model,generate_text,load_audio_model,generate_audio
6
+ from schemas import VoicePresets
7
+ from utils import audio_array_to_buffer
8
  app = FastAPI()
9
 
10
  @app.get("/")
 
15
  def serve_language_model_controller(prompt: str) -> str:
16
  pipe = load_text_model()
17
  output = generate_text(pipe, prompt)
18
+ return output
19
+
20
+
21
+ @app.get(
22
+ "/generate/audio",
23
+ responses={status.HTTP_200_OK: {"content": {"audio/wav": {}}}},
24
+ response_class=StreamingResponse,
25
+ )
26
+ def serve_text_to_audio_model_controller(
27
+ prompt: str,
28
+ preset: VoicePresets = "v2/en_speaker_1",
29
+ ):
30
+ processor, model = load_audio_model()
31
+ output, sample_rate = generate_audio(processor, model, prompt, preset)
32
+ return StreamingResponse(
33
+ audio_array_to_buffer(output, sample_rate), media_type="audio/wav"
34
+ )
models.py CHANGED
@@ -1,7 +1,9 @@
1
  # models.py
2
 
3
  import torch
4
- from transformers import Pipeline, pipeline
 
 
5
 
6
  prompt = "How to set up a FastAPI project?"
7
  system_prompt = """
@@ -12,6 +14,24 @@ Always respond in markdown.
12
 
13
  device = torch.device("cuda" if torch.cuda.is_available() else "cpu")
14
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
15
  def load_text_model():
16
  pipe = pipeline(
17
  "text-generation",
 
1
  # models.py
2
 
3
  import torch
4
+ from transformers import Pipeline, pipeline,AutoProcessor, AutoModel, BarkProcessor, BarkModel
5
+ from schemas import VoicePresets
6
+ import numpy as np
7
 
8
  prompt = "How to set up a FastAPI project?"
9
  system_prompt = """
 
14
 
15
  device = torch.device("cuda" if torch.cuda.is_available() else "cpu")
16
 
17
+ def load_audio_model() -> tuple[BarkProcessor, BarkModel]:
18
+ processor = AutoProcessor.from_pretrained("suno/bark-small", device=device)
19
+ model = AutoModel.from_pretrained("suno/bark-small", device=device)
20
+ return processor, model
21
+
22
+ def generate_audio(
23
+ processor: BarkProcessor,
24
+ model: BarkModel,
25
+ prompt: str,
26
+ preset: VoicePresets,
27
+ ) -> tuple[np.array, int]:
28
+ inputs = processor(text=[prompt], return_tensors="pt", voice_preset=preset)
29
+ output = model.generate(**inputs, do_sample=True).cpu().numpy().squeeze()
30
+ sample_rate = model.generation_config.sample_rate
31
+ return output, sample_rate
32
+
33
+
34
+
35
  def load_text_model():
36
  pipe = pipeline(
37
  "text-generation",
requirements.txt CHANGED
@@ -3,4 +3,5 @@ uvicorn
3
  transformers
4
  torch
5
  pydantic
6
- bitsandbytes
 
 
3
  transformers
4
  torch
5
  pydantic
6
+ bitsandbytes
7
+ soundfile
schemas.py ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ from typing import Literal
2
+
3
+ VoicePresets = Literal["v2/en_speaker_1", "v2/en_speaker_9"]
utils.py ADDED
@@ -0,0 +1,9 @@
 
 
 
 
 
 
 
 
 
 
1
+ from io import BytesIO
2
+ import soundfile
3
+ import numpy as np
4
+
5
+ def audio_array_to_buffer(audio_array: np.array, sample_rate: int) -> BytesIO:
6
+ buffer = BytesIO()
7
+ soundfile.write(buffer, audio_array, sample_rate, format="wav")
8
+ buffer.seek(0)
9
+ return buffer