Spaces:

riu-rd
/

contact-center-operations

Running

App Files Files Community

riu-rd commited on Jun 29

Commit

af74f3c

verified ·

1 Parent(s): 351a5c2

Upload 17 files

Browse files

Files changed (8) hide show

api.py +2 -2
requirements.txt +2 -1
services/__pycache__/audio_gemini.cpython-311.pyc +0 -0
services/__pycache__/audio_whisper.cpython-311.pyc +0 -0
services/__pycache__/text_processor.cpython-311.pyc +0 -0
services/audio_gemini.py +14 -3
services/audio_whisper.py +21 -5
services/text_processor.py +27 -27

api.py CHANGED Viewed

@@ -38,7 +38,7 @@ app.add_middleware(
 async def docs():
     return RedirectResponse(url="/docs")
-@app.post("/audio/whisper", response_model=Dict[str, str])
 async def audio_whisper(audio: UploadFile = File(...)):
     """
     Transcribes and translates an audio file using OpenAI's Whisper model.
@@ -64,7 +64,7 @@ async def audio_whisper(audio: UploadFile = File(...)):
         # Catch exceptions from the audio processing service or file reading
         raise HTTPException(status_code=500, detail=f"Audio processing failed: {str(e)}")
-@app.post("/audio/gemini", response_model=Dict[str, str])
 async def audio_gemini(audio: UploadFile = File(...)):
     """
     Receives an audio file, transcribes it, and translates the transcription

 async def docs():
     return RedirectResponse(url="/docs")
+@app.post("/audio/whisper", response_model=Dict[str, Any])
 async def audio_whisper(audio: UploadFile = File(...)):
     """
     Transcribes and translates an audio file using OpenAI's Whisper model.
         # Catch exceptions from the audio processing service or file reading
         raise HTTPException(status_code=500, detail=f"Audio processing failed: {str(e)}")
+@app.post("/audio/gemini", response_model=Dict[str, Any])
 async def audio_gemini(audio: UploadFile = File(...)):
     """
     Receives an audio file, transcribes it, and translates the transcription

requirements.txt CHANGED Viewed

@@ -26,4 +26,5 @@ soundfile
 openai-whisper
 pydantic
 langchain-google-genai
-langchain

 openai-whisper
 pydantic
 langchain-google-genai
+langchain
+tqdm

services/__pycache__/audio_gemini.cpython-311.pyc CHANGED Viewed

Binary files a/services/__pycache__/audio_gemini.cpython-311.pyc and b/services/__pycache__/audio_gemini.cpython-311.pyc differ

services/__pycache__/audio_whisper.cpython-311.pyc CHANGED Viewed

Binary files a/services/__pycache__/audio_whisper.cpython-311.pyc and b/services/__pycache__/audio_whisper.cpython-311.pyc differ

services/__pycache__/text_processor.cpython-311.pyc CHANGED Viewed

Binary files a/services/__pycache__/text_processor.cpython-311.pyc and b/services/__pycache__/text_processor.cpython-311.pyc differ

services/audio_gemini.py CHANGED Viewed

@@ -4,6 +4,13 @@ from typing import Dict
 import google.genai as genai
 from dotenv import load_dotenv
 from google.genai.types import Part
 # Load environment variables from a .env file in the root directory
 load_dotenv()
@@ -59,7 +66,7 @@ def _translate_to_english(text: str) -> str:
     return resp.text.strip() # type: ignore
-def process_audio_with_gemini(audio_bytes: bytes) -> Dict[str, str]:
     """
     Processes an audio file by first transcribing it and then translating the
     resulting text to English using the Gemini model.
@@ -84,8 +91,12 @@ def process_audio_with_gemini(audio_bytes: bytes) -> Dict[str, str]:
         translation = ""
         if transcription:
             translation = _translate_to_english(transcription)
-        return {"transcription": transcription, "translation": translation}
     except Exception as e:
         # Re-raise the exception with more context to be caught by the API endpoint
         raise Exception(f"Error processing audio with Gemini: {str(e)}")

 import google.genai as genai
 from dotenv import load_dotenv
 from google.genai.types import Part
+from pydantic import BaseModel
+from services.text_processor import process_text_to_insight
+# Add the TextRequest model definition here or import it
+class TextRequest(BaseModel):
+    text: str
 # Load environment variables from a .env file in the root directory
 load_dotenv()
     return resp.text.strip() # type: ignore
+def process_audio_with_gemini(audio_bytes: bytes) -> Dict[str, any]:
     """
     Processes an audio file by first transcribing it and then translating the
     resulting text to English using the Gemini model.
         translation = ""
         if transcription:
             translation = _translate_to_english(transcription)
+        # Step 3: Generate insights using TextRequest object
+        text_request = TextRequest(text=transcription)
+        audio_text_insights = process_text_to_insight(text_request)
+        return {"transcription": transcription, "translation": translation, "insights": audio_text_insights}
     except Exception as e:
         # Re-raise the exception with more context to be caught by the API endpoint
         raise Exception(f"Error processing audio with Gemini: {str(e)}")

services/audio_whisper.py CHANGED Viewed

@@ -3,6 +3,12 @@ import torch
 import tempfile
 import os
 from typing import Dict
 # Determine the most efficient device available (CUDA if possible, otherwise CPU)
 DEVICE = "cuda" if torch.cuda.is_available() else "cpu"
@@ -17,7 +23,7 @@ except Exception as e:
     print(f"Fatal: Error loading Whisper model: {e}")
     model = None
-def process_audio_with_whisper(audio_bytes: bytes) -> Dict[str, str]:
     """
     Transcribes and translates a given audio file's bytes using the Whisper model.
@@ -30,7 +36,7 @@ def process_audio_with_whisper(audio_bytes: bytes) -> Dict[str, str]:
     Returns:
         A dictionary containing the Tagalog transcription and English translation.
-        Example: {"transcription": "...", "translation": "..."}
     Raises:
         ValueError: If the Whisper model was not loaded successfully.
@@ -67,10 +73,18 @@ def process_audio_with_whisper(audio_bytes: bytes) -> Dict[str, str]:
             task="translate"
         )
         return {
-            "transcription": transcription_result.get('text', '').strip(), # type: ignore
-            "translation": translation_result.get('text', '').strip() # type: ignore
         }
     except Exception as e:
         # Log and re-raise any exceptions to be handled by the FastAPI endpoint
         print(f"An error occurred during Whisper processing: {e}")
@@ -78,4 +92,6 @@ def process_audio_with_whisper(audio_bytes: bytes) -> Dict[str, str]:
     finally:
         # Ensure the temporary file is deleted after processing
         if 'temp_path' in locals() and os.path.exists(temp_path):
-            os.remove(temp_path)

 import tempfile
 import os
 from typing import Dict
+from services.text_processor import process_text_to_insight
+from pydantic import BaseModel
+# Add the TextRequest model definition here or import it
+class TextRequest(BaseModel):
+    text: str
 # Determine the most efficient device available (CUDA if possible, otherwise CPU)
 DEVICE = "cuda" if torch.cuda.is_available() else "cpu"
     print(f"Fatal: Error loading Whisper model: {e}")
     model = None
+def process_audio_with_whisper(audio_bytes: bytes):
     """
     Transcribes and translates a given audio file's bytes using the Whisper model.
     Returns:
         A dictionary containing the Tagalog transcription and English translation.
+        Example: {"transcription": "...", "translation": "...", "insights": "..."}
     Raises:
         ValueError: If the Whisper model was not loaded successfully.
             task="translate"
         )
+        # Get the transcribed text
+        transcribed_text = transcription_result.get('text', '').strip()
+        insights = process_text_to_insight(transcribed_text)
         return {
+            "transcription": transcribed_text,
+            "translation": translation_result.get('text', '').strip(),
+            "insights": insights
         }
     except Exception as e:
         # Log and re-raise any exceptions to be handled by the FastAPI endpoint
         print(f"An error occurred during Whisper processing: {e}")
     finally:
         # Ensure the temporary file is deleted after processing
         if 'temp_path' in locals() and os.path.exists(temp_path):
+            os.remove(temp_path)
+            print("=== Debug Whisper Output ===")

services/text_processor.py CHANGED Viewed

@@ -10,11 +10,11 @@ from langchain.output_parsers import OutputFixingParser
 from langchain.prompts import PromptTemplate, FewShotPromptTemplate
 from datetime import datetime
 import time
 import os
 from dotenv import load_dotenv
-import tqdm
 # Load environment variables from a .env file in the root directory
 load_dotenv()
@@ -140,39 +140,39 @@ sentiment_examples = [{'input': 'There is an unauthorized charge on my BPI Famil
 # --- Pydantic Models
 class GeneralInfo(BaseModel):
   case_id: Optional[str] = Field(None, description="An unique identifier given to each case message")
-  raw_message: str = Field(None, description="The raw and unstructured form of the original message or conversation") # type: ignore
-  message_source: Literal['Email', 'Phone', 'Branch', 'Facebook'] = Field(None, description="The channel to which the text was received from") # type: ignore
   customer_tier: Optional[Literal['High', 'Mid', 'Low']] = Field(None, description="The tier of the customer sending the message")
   status: Optional[Literal['New', 'Assigned', 'Closed']] = Field(None, description="The status of the message, whether it was new, already assigned, or closed")
   start_date: Optional[datetime] = Field(None, description="The date and time when the message was initiated or received.")
   close_date: Optional[datetime] = Field(None, description="The date and time when the message was marked as closed or resolved.")
 class TextOverview(BaseModel):
-  summary: str = Field(None, description="A one liner summary of the text provided. Indicates the main purpose and intention of the text. Use proper case.") # type: ignore
-  tags: List[str] = Field(None, description="A list of keywords that can be used to tag and classify the message meaningfuly. Use lowercase") # type: ignore
 class TransactionType(BaseModel):
-  interaction_type: Literal['Request', 'Inquiry', 'Complaint'] = Field(None, description="The interaction type of the message, indicates whether the customer is inquiring, complaining, or requesting to the bank") # type: ignore
-  product_type: Literal['Credit Cards', 'Deposits', 'Loans'] = Field(None, description="The product that is best connected to the purpose of the message. Indicates if the message is related to Credit Cards, Deposits, or Loans") # type: ignore
 class SentimentConfidence(BaseModel):
-  sentiment_tag: str = Field(None, description="The sentiment tag being assessed. Can be either 'Positivee', 'Negative', or 'Neutral") # type: ignore
   sentiment_confidence_score: Optional[float] = Field(None, ge=0.0, le=1.0, description="how confident the given sentiment category is when associated with the intent of the message. Use two decimal points for the score")
   emotional_indicators: Optional[List[str]] = Field(None, description="Bigrams or trigrams that best display the particular sentiment of the message. Use lowercase. Use 'Blank' if there is no good keyword.")
 class Sentiment(BaseModel):
-  sentiment_category: Literal['Negative', 'Neutral', 'Positive'] = Field(None, description="the sentiment demonstrated within the message. Indicates whether the message has negative, positive, or neutral connotations") # type: ignore
   sentiment_reasoning: Optional[str] = Field(None, description="A one liner that depicts main reason why the text was categorized as a certain sentiment. No need to add any emphases on keywords. Use proper case.")
   sentiment_distribution: List[SentimentConfidence] = Field(description="A distribution that shows how likely each sentiment (Positive, Neutral, and Negative). Note that the sum of the confidence scores should be equal to 1.0 since it's a probability distribution")
 class Urgency(BaseModel):
-  priority_category: Literal['High', 'Medium', 'Low'] = Field(None, description = "Describes how urgent a message needs to be addressed.") # type: ignore
   priority_reason: Optional[str] = Field(None, description = "An explanation of why the priority level of a message is the way it is.")
 class ChatLogEntry(BaseModel):
-  turn_id: int = Field(None, description="A number that indicates the order in which the message is found in the conversation") # type: ignore
-  speaker: Literal['Customer', 'Bank Agent', 'Chatbot']  = Field(None, description="The entity who sent the message during the specified turn") # type: ignore
-  text: str = Field(None, description="The message sent within the turn of the speaker") # type: ignore
 class DialogueHistory(BaseModel):
   dialogue_history: List[ChatLogEntry] = Field(
@@ -232,7 +232,7 @@ ctt_fewshot_prompt = FewShotPromptTemplate(
 ctt_chain_fs = ctt_fewshot_prompt | llm_text_insights
 ctt_chain_wrapped = RunnableLambda(lambda x: {
-    "text_to_classify": x["text"] # type: ignore
 }) | ctt_chain_fs
@@ -267,8 +267,8 @@ cpl_fewshot_prompt = FewShotPromptTemplate(
 cpl_chain_fs = cpl_fewshot_prompt | llm_text_insights
 cpl_chain_wrapped = RunnableLambda(lambda x: {
-    "text_to_classify": x["text"] # type: ignore
-}) | cpl_chain_fs | RunnableLambda(lambda x: urgency_parser.parse(x.content).model_dump_json(indent=2)) # type: ignore
 ct_prompt = PromptTemplate.from_template(
     """You are an expert contact center operations agent and analyst at a banking firm. Your task is to review customer messages and classify each message by selecting exactly one label from the following services/products offered by the bank: "labels": ['Credit Cards', 'Loans', 'Deposits'].
@@ -290,7 +290,7 @@ ct_fewshot_prompt = FewShotPromptTemplate(
 ct_chain_fs = ct_fewshot_prompt | llm_text_insights
 ct_chain_wrapped = RunnableLambda(lambda x: {
-    "text_to_classify": x["text"] # type: ignore
 }) | ct_chain_fs
 sentiment_prompt = PromptTemplate.from_template(
@@ -322,7 +322,7 @@ sentiment_fewshot_prompt = FewShotPromptTemplate(
 sentiment_chain_fs = sentiment_fewshot_prompt | llm_text_insights
-sentiment_chain_wrapped = RunnableLambda(lambda x: {"text_to_classify": x["text"]}) | sentiment_chain_fs | RunnableLambda(lambda x: sentiment_parser.parse(x.content).model_dump_json(indent=2)) # type: ignore
 summary_prompt = PromptTemplate.from_template(
     """You are an expert contact center operations agent and analyst at a banking firm.
@@ -336,7 +336,7 @@ summary_prompt = PromptTemplate.from_template(
 summary_chain = summary_prompt | llm_text_insights
 summary_chain_wrapped = RunnableLambda(lambda x: {
-    "text_to_summarize": x["text"] # type: ignore
 }) | summary_chain
 kw_prompt = PromptTemplate.from_template(
@@ -353,7 +353,7 @@ kw_prompt = PromptTemplate.from_template(
 kw_chain = kw_prompt | llm_text_insights
 kw_chain_wrapped = RunnableLambda(lambda x: {
-    "text_to_extract": x["text"] # type: ignore
 }) | kw_chain
@@ -392,35 +392,35 @@ dialogue_history_prompt = PromptTemplate(
 dialogue_history_chain = dialogue_history_prompt | llm_text_insights
 dialogue_history_chain_wrapped = RunnableLambda(lambda x: {
-    "sample_text": x["text"] # type: ignore
-}) | dialogue_history_chain | RunnableLambda(lambda x: dialogue_history_parser.parse(x.content).model_dump_json(indent=2)) # type: ignore
 def process_text_to_insight(text, sleep_time_req = 5):
   try:
     result = {}
-    result['case_transaction_type'] = ctt_chain_wrapped.invoke({'text': text}).content.strip() # type: ignore
     time.sleep(sleep_time_req)
     result['case_priority_level'] = cpl_chain_wrapped.invoke({'text': text})
     time.sleep(sleep_time_req)
-    result['case_type'] = ct_chain_wrapped.invoke({'text': text}).content.strip() # type: ignore
     time.sleep(sleep_time_req)
     result['sentiment'] = sentiment_chain_wrapped.invoke({'text': text})
     time.sleep(sleep_time_req)
-    result['summary'] = summary_chain_wrapped.invoke({'text': text}).content.strip() # type: ignore
     time.sleep(sleep_time_req)
-    result['keywords'] = kw_chain_wrapped.invoke({'text': text}).content.strip() # type: ignore
     result['dialogue_history'] = dialogue_history_chain_wrapped.invoke({'text': text})
   except Exception as e:
-    tqdm.write(f"[error] Skipping row due to: {e}") # type: ignore
     result = {
         "case_text": text,
         "case_transaction_type": None,

 from langchain.prompts import PromptTemplate, FewShotPromptTemplate
 from datetime import datetime
 import time
+from tqdm import tqdm
 import os
 from dotenv import load_dotenv
 # Load environment variables from a .env file in the root directory
 load_dotenv()
 # --- Pydantic Models
 class GeneralInfo(BaseModel):
   case_id: Optional[str] = Field(None, description="An unique identifier given to each case message")
+  raw_message: str = Field(None, description="The raw and unstructured form of the original message or conversation")
+  message_source: Literal['Email', 'Phone', 'Branch', 'Facebook'] = Field(None, description="The channel to which the text was received from")
   customer_tier: Optional[Literal['High', 'Mid', 'Low']] = Field(None, description="The tier of the customer sending the message")
   status: Optional[Literal['New', 'Assigned', 'Closed']] = Field(None, description="The status of the message, whether it was new, already assigned, or closed")
   start_date: Optional[datetime] = Field(None, description="The date and time when the message was initiated or received.")
   close_date: Optional[datetime] = Field(None, description="The date and time when the message was marked as closed or resolved.")
 class TextOverview(BaseModel):
+  summary: str = Field(None, description="A one liner summary of the text provided. Indicates the main purpose and intention of the text. Use proper case.")
+  tags: List[str] = Field(None, description="A list of keywords that can be used to tag and classify the message meaningfuly. Use lowercase")
 class TransactionType(BaseModel):
+  interaction_type: Literal['Request', 'Inquiry', 'Complaint'] = Field(None, description="The interaction type of the message, indicates whether the customer is inquiring, complaining, or requesting to the bank")
+  product_type: Literal['Credit Cards', 'Deposits', 'Loans'] = Field(None, description="The product that is best connected to the purpose of the message. Indicates if the message is related to Credit Cards, Deposits, or Loans")
 class SentimentConfidence(BaseModel):
+  sentiment_tag: str = Field(None, description="The sentiment tag being assessed. Can be either 'Positivee', 'Negative', or 'Neutral")
   sentiment_confidence_score: Optional[float] = Field(None, ge=0.0, le=1.0, description="how confident the given sentiment category is when associated with the intent of the message. Use two decimal points for the score")
   emotional_indicators: Optional[List[str]] = Field(None, description="Bigrams or trigrams that best display the particular sentiment of the message. Use lowercase. Use 'Blank' if there is no good keyword.")
 class Sentiment(BaseModel):
+  sentiment_category: Literal['Negative', 'Neutral', 'Positive'] = Field(None, description="the sentiment demonstrated within the message. Indicates whether the message has negative, positive, or neutral connotations")
   sentiment_reasoning: Optional[str] = Field(None, description="A one liner that depicts main reason why the text was categorized as a certain sentiment. No need to add any emphases on keywords. Use proper case.")
   sentiment_distribution: List[SentimentConfidence] = Field(description="A distribution that shows how likely each sentiment (Positive, Neutral, and Negative). Note that the sum of the confidence scores should be equal to 1.0 since it's a probability distribution")
 class Urgency(BaseModel):
+  priority_category: Literal['High', 'Medium', 'Low'] = Field(None, description = "Describes how urgent a message needs to be addressed.")
   priority_reason: Optional[str] = Field(None, description = "An explanation of why the priority level of a message is the way it is.")
 class ChatLogEntry(BaseModel):
+  turn_id: int = Field(None, description="A number that indicates the order in which the message is found in the conversation")
+  speaker: Literal['Customer', 'Bank Agent', 'Chatbot']  = Field(None, description="The entity who sent the message during the specified turn")
+  text: str = Field(None, description="The message sent within the turn of the speaker")
 class DialogueHistory(BaseModel):
   dialogue_history: List[ChatLogEntry] = Field(
 ctt_chain_fs = ctt_fewshot_prompt | llm_text_insights
 ctt_chain_wrapped = RunnableLambda(lambda x: {
+    "text_to_classify": x["text"]
 }) | ctt_chain_fs
 cpl_chain_fs = cpl_fewshot_prompt | llm_text_insights
 cpl_chain_wrapped = RunnableLambda(lambda x: {
+    "text_to_classify": x["text"]
+}) | cpl_chain_fs | RunnableLambda(lambda x: urgency_parser.parse(x.content).model_dump_json(indent=2))
 ct_prompt = PromptTemplate.from_template(
     """You are an expert contact center operations agent and analyst at a banking firm. Your task is to review customer messages and classify each message by selecting exactly one label from the following services/products offered by the bank: "labels": ['Credit Cards', 'Loans', 'Deposits'].
 ct_chain_fs = ct_fewshot_prompt | llm_text_insights
 ct_chain_wrapped = RunnableLambda(lambda x: {
+    "text_to_classify": x["text"]
 }) | ct_chain_fs
 sentiment_prompt = PromptTemplate.from_template(
 sentiment_chain_fs = sentiment_fewshot_prompt | llm_text_insights
+sentiment_chain_wrapped = RunnableLambda(lambda x: {"text_to_classify": x["text"]}) | sentiment_chain_fs | RunnableLambda(lambda x: sentiment_parser.parse(x.content).model_dump_json(indent=2))
 summary_prompt = PromptTemplate.from_template(
     """You are an expert contact center operations agent and analyst at a banking firm.
 summary_chain = summary_prompt | llm_text_insights
 summary_chain_wrapped = RunnableLambda(lambda x: {
+    "text_to_summarize": x["text"]
 }) | summary_chain
 kw_prompt = PromptTemplate.from_template(
 kw_chain = kw_prompt | llm_text_insights
 kw_chain_wrapped = RunnableLambda(lambda x: {
+    "text_to_extract": x["text"]
 }) | kw_chain
 dialogue_history_chain = dialogue_history_prompt | llm_text_insights
 dialogue_history_chain_wrapped = RunnableLambda(lambda x: {
+    "sample_text": x["text"]
+}) | dialogue_history_chain | RunnableLambda(lambda x: dialogue_history_parser.parse(x.content).model_dump_json(indent=2))
 def process_text_to_insight(text, sleep_time_req = 5):
   try:
     result = {}
+    result['case_transaction_type'] = ctt_chain_wrapped.invoke({'text': text}).content.strip()
     time.sleep(sleep_time_req)
     result['case_priority_level'] = cpl_chain_wrapped.invoke({'text': text})
     time.sleep(sleep_time_req)
+    result['case_type'] = ct_chain_wrapped.invoke({'text': text}).content.strip()
     time.sleep(sleep_time_req)
     result['sentiment'] = sentiment_chain_wrapped.invoke({'text': text})
     time.sleep(sleep_time_req)
+    result['summary'] = summary_chain_wrapped.invoke({'text': text}).content.strip()
     time.sleep(sleep_time_req)
+    result['keywords'] = kw_chain_wrapped.invoke({'text': text}).content.strip()
     result['dialogue_history'] = dialogue_history_chain_wrapped.invoke({'text': text})
   except Exception as e:
+    tqdm.write(f"[error] Skipping row due to: {e}")
     result = {
         "case_text": text,
         "case_transaction_type": None,