Spaces:

puzan789
/

mohi

Sleeping

App Files Files Community

puzan789 commited on Feb 20

Commit

2d31940

0 Parent(s):

updated

Browse files

Files changed (8) hide show

.gitignore +53 -0
README.md +10 -0
app.py +67 -0
requirements.txt +10 -0
src/for_streamlit/__init__.py +0 -0
src/for_streamlit/spt.py +27 -0
src/for_streamlit/texttospeech.py +58 -0
src/for_streamlit/texttotext.py +52 -0

.gitignore ADDED Viewed

	@@ -0,0 +1,53 @@

+# Byte-compiled / optimized / DLL files
+__pycache__/
+*.py[cod]
+*$py.class
+# Virtual environment
+venv/
+.env/
+*.env
+# FastAPI
+*.log
+*.db
+instance/
+# VSCode settings
+.vscode/
+# PyCharm settings
+.idea/
+# MyPy and other Python checks
+.mypy_cache/
+.pytest_cache/
+# Jupyter Notebooks
+.ipynb_checkpoints/
+# Docker
+*.container
+.dockerignore
+docker-compose.override.yml
+# Cache and temp files
+*.swp
+*.swo
+# Ignore compiled Cython files
+*.c
+*.so
+# Ignore test coverage reports
+.coverage
+htmlcov/
+# Ignore build directories
+build/
+dist/
+*.egg-info/
+# Ignore database files
+*.sqlite3
+*.db

README.md ADDED Viewed

	@@ -0,0 +1,10 @@

+---
+title: Osama
+emoji: 🕵️
+colorFrom: purple
+colorTo: yellow
+sdk: docker
+pinned: false
+---
+Check out the configuration reference at https://huggingface.co/docs/hub/spaces-config-reference

app.py ADDED Viewed

	@@ -0,0 +1,67 @@

+import streamlit as st
+from src.for_streamlit.spt import SpeechToText
+from src.for_streamlit.texttotext import ConversationHandler
+from src.for_streamlit.texttospeech import TextToSpeech
+from streamlit_mic_recorder import mic_recorder
+st.title("🎙️ Voice to Voice ")
+st.write("Click the button below to start recording.")
+# Cache the models to prevent reloading
+@st.cache_resource
+def load_speech_to_text():
+    return SpeechToText()
+@st.cache_resource
+def load_conversation_handler():
+    return ConversationHandler()
+@st.cache_resource
+def load_text_to_speech():
+    return TextToSpeech()
+# Load models once
+speech_to_text = load_speech_to_text()
+conversation_handler = load_conversation_handler()
+text_to_speech = load_text_to_speech()
+# Capture microphone input
+audio_data = mic_recorder()
+def main():
+    if audio_data and 'bytes' in audio_data:
+        audio_bytes = audio_data['bytes']
+        # Play recorded audio
+        st.audio(audio_bytes, format="audio/wav")
+        st.write("Transcribing...")
+        # Transcribe the audio
+        transcription = speech_to_text.record_and_transcribe(audio_bytes)
+        if transcription:
+            st.success("Transcription:")
+            st.write(transcription)
+            st.write("Generating response...")
+            response = conversation_handler.give_response(transcription)
+            if response:
+                st.success("Response:")
+                st.write(response.content)
+                # Convert response text to speech
+                audio_buffer = text_to_speech.synthesize(response.content)
+                if audio_buffer:
+                    st.success("Generated audio:")
+                    st.audio(audio_buffer, format="audio/wav")
+                else:
+                    st.error("No audio available.")
+            else:
+                st.error("No response available.")
+        else:
+            st.error("No transcription available.")
+    else:
+        st.warning("Please record some audio.")
+if __name__ == "__main__":
+    main()

requirements.txt ADDED Viewed

	@@ -0,0 +1,10 @@

+elevenlabs
+groq
+langchain
+langchain-core
+langchain-groq
+python-dotenv
+Requests
+streamlit
+langchain-community
+streamlit-mic-recorder

src/for_streamlit/__init__.py ADDED Viewed

File without changes

src/for_streamlit/spt.py ADDED Viewed

	@@ -0,0 +1,27 @@

+import wave
+import io
+from groq import Groq
+class SpeechToText:
+    def __init__(self):
+        self.client = Groq()
+    def record_and_transcribe(self, audio_bytes):
+        wav_buffer = io.BytesIO(audio_bytes)
+        try:
+            transcription = self.client.audio.transcriptions.create(
+                file=("audio.wav", wav_buffer),
+                model="whisper-large-v3-turbo"
+            )
+            return transcription.text
+        except Exception as e:
+            print(f"Error transcribing: {e}")
+            return str(e)
+        finally:
+            wav_buffer.close()
+if __name__ == "__main__":
+    print("This script is designed to be used as a module, not run directly.")

src/for_streamlit/texttospeech.py ADDED Viewed

	@@ -0,0 +1,58 @@

+import os
+from elevenlabs import ElevenLabs,Voice,VoiceSettings
+from typing import Optional
+from elevenlabs import play
+from dotenv import load_dotenv
+import io
+load_dotenv()
+print(os.getenv("ELEVENLABS_API_KEY"))
+class TextToSpeech:
+    REQUIRED_ENV_VARS=["ELEVENLABS_API_KEY","ELEVENLABS_VOICE_ID"]
+    def __init__(self):
+        """Initialize"""
+        self._validate_env_vars()
+        self._client: Optional[ElevenLabs] = None
+    def _validate_env_vars(self) -> None:
+        """validate that all the environment variables are set"""
+        missing_vars=[var for var in self.REQUIRED_ENV_VARS if not os.getenv(var)]
+        if missing_vars:
+            raise ValueError(f"Missing required environment variables: {', '.join(missing_vars)}")
+    @property
+    def client(self) -> Optional[ElevenLabs]:
+        """Get or create a client instance"""
+        if self._client is None:
+            self._client = ElevenLabs(api_key=os.getenv("ELEVENLABS_API_KEY"))
+        return self._client
+    def synthesize(self,text:str)->bytes:
+        """Convert text to speech"""
+        if not text.strip():
+            raise ValueError("Input text cannot be empty")
+        if len(text)>5000:
+            raise ValueError("Input text cannot exceed 5000 characters")
+        try:
+            audio_generator =self.client.generate(
+                text=text,
+                voice=Voice(
+                    voice_id=os.getenv("ELEVENLABS_VOICE_ID"),
+                    settings=VoiceSettings(stability=0.5, similarity_boost=0.5),
+                ),
+                model=os.getenv("TTS_MODEL_NAME"),
+            )
+            audio_bytes = b"".join(audio_generator)
+            return audio_bytes
+        except Exception as e:
+            print(f"Error synthesizing text: {str(e)}")
+            return None
+# if __name__=="__main__":
+#     ts=TextToSpeech()
+#     import asyncio
+#     async def main():
+#         audio_buffer = await ts.synthesize("Yeah, another example is decision trees. You're from Nepal, right, Ilam?")
+#         play(audio_buffer)
+#     asyncio.run(main())

src/for_streamlit/texttotext.py ADDED Viewed

	@@ -0,0 +1,52 @@

+from langchain_core.prompts import ChatPromptTemplate, MessagesPlaceholder, HumanMessagePromptTemplate
+from langchain_core.messages import HumanMessage, AIMessage
+from langchain_groq import ChatGroq
+from typing import List
+import os
+from services.prompts import ASSISTANT_PROMPT
+from langchain.memory import ConversationSummaryMemory
+from dotenv import load_dotenv
+load_dotenv()
+os.environ["GROQ_API_KEY"]=os.getenv("GROQ_API_KEY")
+class ConversationHandler:
+    def __init__(self, model_name="llama-3.3-70b-versatile", temperature=0.7):
+        self.chat_model = ChatGroq(
+            model_name=model_name,
+            temperature=temperature
+        )
+        self.prompt = ChatPromptTemplate.from_messages([
+            ("system", ASSISTANT_PROMPT)])
+        self.memory=ConversationSummaryMemory(
+            llm=self.chat_model,
+            max_token_limit=2000,
+            return_messages=True,
+            memory_key="chat_history"
+        )
+    def give_response(self,user_input):
+        chain= self.prompt|self.chat_model
+        memory_variables = self.memory.load_memory_variables({})
+        response=chain.invoke(
+            {
+                "user_query": user_input,
+                "chat_history": memory_variables["chat_history"]
+            }
+        )
+        print(response.content)
+        self.memory.save_context(
+            {"input": user_input},
+            {"output": response.content}
+        )
+        return response
+    def summarize_conversation(self) -> str:
+        memory_variables =  self.memory.load_memory_variables({})
+        return self.memory.predict_new_summary(
+            messages=memory_variables["chat_history"],
+            existing_summary=""
+        )
+    def clear_memory(self):
+         self.memory.clear()