puzan789 commited on
Commit
2d31940
·
0 Parent(s):
.gitignore ADDED
@@ -0,0 +1,53 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ # Byte-compiled / optimized / DLL files
2
+ __pycache__/
3
+ *.py[cod]
4
+ *$py.class
5
+
6
+ # Virtual environment
7
+ venv/
8
+ .env/
9
+ *.env
10
+
11
+ # FastAPI
12
+ *.log
13
+ *.db
14
+ instance/
15
+
16
+ # VSCode settings
17
+ .vscode/
18
+
19
+ # PyCharm settings
20
+ .idea/
21
+
22
+ # MyPy and other Python checks
23
+ .mypy_cache/
24
+ .pytest_cache/
25
+
26
+ # Jupyter Notebooks
27
+ .ipynb_checkpoints/
28
+
29
+ # Docker
30
+ *.container
31
+ .dockerignore
32
+ docker-compose.override.yml
33
+
34
+ # Cache and temp files
35
+ *.swp
36
+ *.swo
37
+
38
+ # Ignore compiled Cython files
39
+ *.c
40
+ *.so
41
+
42
+ # Ignore test coverage reports
43
+ .coverage
44
+ htmlcov/
45
+
46
+ # Ignore build directories
47
+ build/
48
+ dist/
49
+ *.egg-info/
50
+
51
+ # Ignore database files
52
+ *.sqlite3
53
+ *.db
README.md ADDED
@@ -0,0 +1,10 @@
 
 
 
 
 
 
 
 
 
 
 
1
+ ---
2
+ title: Osama
3
+ emoji: 🕵️
4
+ colorFrom: purple
5
+ colorTo: yellow
6
+ sdk: docker
7
+ pinned: false
8
+ ---
9
+
10
+ Check out the configuration reference at https://huggingface.co/docs/hub/spaces-config-reference
app.py ADDED
@@ -0,0 +1,67 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ import streamlit as st
2
+ from src.for_streamlit.spt import SpeechToText
3
+ from src.for_streamlit.texttotext import ConversationHandler
4
+ from src.for_streamlit.texttospeech import TextToSpeech
5
+ from streamlit_mic_recorder import mic_recorder
6
+
7
+ st.title("🎙️ Voice to Voice ")
8
+ st.write("Click the button below to start recording.")
9
+
10
+ # Cache the models to prevent reloading
11
+ @st.cache_resource
12
+ def load_speech_to_text():
13
+ return SpeechToText()
14
+
15
+ @st.cache_resource
16
+ def load_conversation_handler():
17
+ return ConversationHandler()
18
+
19
+ @st.cache_resource
20
+ def load_text_to_speech():
21
+ return TextToSpeech()
22
+
23
+ # Load models once
24
+ speech_to_text = load_speech_to_text()
25
+ conversation_handler = load_conversation_handler()
26
+ text_to_speech = load_text_to_speech()
27
+
28
+ # Capture microphone input
29
+ audio_data = mic_recorder()
30
+
31
+ def main():
32
+ if audio_data and 'bytes' in audio_data:
33
+ audio_bytes = audio_data['bytes']
34
+
35
+ # Play recorded audio
36
+ st.audio(audio_bytes, format="audio/wav")
37
+ st.write("Transcribing...")
38
+
39
+ # Transcribe the audio
40
+ transcription = speech_to_text.record_and_transcribe(audio_bytes)
41
+ if transcription:
42
+ st.success("Transcription:")
43
+ st.write(transcription)
44
+
45
+ st.write("Generating response...")
46
+ response = conversation_handler.give_response(transcription)
47
+
48
+ if response:
49
+ st.success("Response:")
50
+ st.write(response.content)
51
+
52
+ # Convert response text to speech
53
+ audio_buffer = text_to_speech.synthesize(response.content)
54
+ if audio_buffer:
55
+ st.success("Generated audio:")
56
+ st.audio(audio_buffer, format="audio/wav")
57
+ else:
58
+ st.error("No audio available.")
59
+ else:
60
+ st.error("No response available.")
61
+ else:
62
+ st.error("No transcription available.")
63
+ else:
64
+ st.warning("Please record some audio.")
65
+
66
+ if __name__ == "__main__":
67
+ main()
requirements.txt ADDED
@@ -0,0 +1,10 @@
 
 
 
 
 
 
 
 
 
 
 
1
+ elevenlabs
2
+ groq
3
+ langchain
4
+ langchain-core
5
+ langchain-groq
6
+ python-dotenv
7
+ Requests
8
+ streamlit
9
+ langchain-community
10
+ streamlit-mic-recorder
src/for_streamlit/__init__.py ADDED
File without changes
src/for_streamlit/spt.py ADDED
@@ -0,0 +1,27 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ import wave
2
+ import io
3
+ from groq import Groq
4
+
5
+ class SpeechToText:
6
+ def __init__(self):
7
+ self.client = Groq()
8
+
9
+ def record_and_transcribe(self, audio_bytes):
10
+ wav_buffer = io.BytesIO(audio_bytes)
11
+
12
+ try:
13
+ transcription = self.client.audio.transcriptions.create(
14
+ file=("audio.wav", wav_buffer),
15
+ model="whisper-large-v3-turbo"
16
+ )
17
+ return transcription.text
18
+
19
+ except Exception as e:
20
+ print(f"Error transcribing: {e}")
21
+ return str(e)
22
+ finally:
23
+ wav_buffer.close()
24
+
25
+
26
+ if __name__ == "__main__":
27
+ print("This script is designed to be used as a module, not run directly.")
src/for_streamlit/texttospeech.py ADDED
@@ -0,0 +1,58 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ import os
2
+ from elevenlabs import ElevenLabs,Voice,VoiceSettings
3
+ from typing import Optional
4
+ from elevenlabs import play
5
+ from dotenv import load_dotenv
6
+
7
+ import io
8
+ load_dotenv()
9
+ print(os.getenv("ELEVENLABS_API_KEY"))
10
+ class TextToSpeech:
11
+ REQUIRED_ENV_VARS=["ELEVENLABS_API_KEY","ELEVENLABS_VOICE_ID"]
12
+ def __init__(self):
13
+ """Initialize"""
14
+ self._validate_env_vars()
15
+ self._client: Optional[ElevenLabs] = None
16
+
17
+ def _validate_env_vars(self) -> None:
18
+ """validate that all the environment variables are set"""
19
+ missing_vars=[var for var in self.REQUIRED_ENV_VARS if not os.getenv(var)]
20
+ if missing_vars:
21
+ raise ValueError(f"Missing required environment variables: {', '.join(missing_vars)}")
22
+
23
+ @property
24
+ def client(self) -> Optional[ElevenLabs]:
25
+ """Get or create a client instance"""
26
+ if self._client is None:
27
+ self._client = ElevenLabs(api_key=os.getenv("ELEVENLABS_API_KEY"))
28
+ return self._client
29
+ def synthesize(self,text:str)->bytes:
30
+ """Convert text to speech"""
31
+ if not text.strip():
32
+ raise ValueError("Input text cannot be empty")
33
+ if len(text)>5000:
34
+ raise ValueError("Input text cannot exceed 5000 characters")
35
+ try:
36
+ audio_generator =self.client.generate(
37
+ text=text,
38
+ voice=Voice(
39
+ voice_id=os.getenv("ELEVENLABS_VOICE_ID"),
40
+ settings=VoiceSettings(stability=0.5, similarity_boost=0.5),
41
+ ),
42
+ model=os.getenv("TTS_MODEL_NAME"),
43
+
44
+ )
45
+ audio_bytes = b"".join(audio_generator)
46
+ return audio_bytes
47
+ except Exception as e:
48
+ print(f"Error synthesizing text: {str(e)}")
49
+ return None
50
+
51
+
52
+ # if __name__=="__main__":
53
+ # ts=TextToSpeech()
54
+ # import asyncio
55
+ # async def main():
56
+ # audio_buffer = await ts.synthesize("Yeah, another example is decision trees. You're from Nepal, right, Ilam?")
57
+ # play(audio_buffer)
58
+ # asyncio.run(main())
src/for_streamlit/texttotext.py ADDED
@@ -0,0 +1,52 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ from langchain_core.prompts import ChatPromptTemplate, MessagesPlaceholder, HumanMessagePromptTemplate
2
+ from langchain_core.messages import HumanMessage, AIMessage
3
+ from langchain_groq import ChatGroq
4
+ from typing import List
5
+ import os
6
+ from services.prompts import ASSISTANT_PROMPT
7
+ from langchain.memory import ConversationSummaryMemory
8
+ from dotenv import load_dotenv
9
+ load_dotenv()
10
+ os.environ["GROQ_API_KEY"]=os.getenv("GROQ_API_KEY")
11
+
12
+ class ConversationHandler:
13
+ def __init__(self, model_name="llama-3.3-70b-versatile", temperature=0.7):
14
+ self.chat_model = ChatGroq(
15
+ model_name=model_name,
16
+ temperature=temperature
17
+ )
18
+ self.prompt = ChatPromptTemplate.from_messages([
19
+ ("system", ASSISTANT_PROMPT)])
20
+ self.memory=ConversationSummaryMemory(
21
+ llm=self.chat_model,
22
+ max_token_limit=2000,
23
+ return_messages=True,
24
+ memory_key="chat_history"
25
+ )
26
+
27
+ def give_response(self,user_input):
28
+ chain= self.prompt|self.chat_model
29
+ memory_variables = self.memory.load_memory_variables({})
30
+ response=chain.invoke(
31
+ {
32
+ "user_query": user_input,
33
+ "chat_history": memory_variables["chat_history"]
34
+
35
+
36
+ }
37
+ )
38
+ print(response.content)
39
+ self.memory.save_context(
40
+ {"input": user_input},
41
+ {"output": response.content}
42
+ )
43
+ return response
44
+ def summarize_conversation(self) -> str:
45
+ memory_variables = self.memory.load_memory_variables({})
46
+ return self.memory.predict_new_summary(
47
+ messages=memory_variables["chat_history"],
48
+ existing_summary=""
49
+ )
50
+
51
+ def clear_memory(self):
52
+ self.memory.clear()