Upload 17 files
Browse files- Dockerfile +39 -39
- README.md +106 -106
- api.py +9 -10
- requirements.txt +4 -1
- services/__pycache__/audio_diarization.cpython-311.pyc +0 -0
- services/__pycache__/text_processor.cpython-311.pyc +0 -0
- services/audio_diarization.py +1 -31
- services/text_processor.py +434 -1
Dockerfile
CHANGED
@@ -1,40 +1,40 @@
|
|
1 |
-
FROM python:3.11.8
|
2 |
-
|
3 |
-
WORKDIR /
|
4 |
-
|
5 |
-
# Install system dependencies including FFmpeg
|
6 |
-
RUN apt-get update && apt-get install -y \
|
7 |
-
|
8 |
-
|
9 |
-
|
10 |
-
|
11 |
-
|
12 |
-
|
13 |
-
|
14 |
-
|
15 |
-
|
16 |
-
# Copy requirements.txt to the container
|
17 |
-
COPY requirements.txt ./
|
18 |
-
|
19 |
-
# Install Python dependencies
|
20 |
-
RUN pip install --no-cache-dir -r requirements.txt
|
21 |
-
|
22 |
-
# Add a non-root user to run the application
|
23 |
-
RUN useradd -m -u 1000 user
|
24 |
-
|
25 |
-
# Set the user and home directory environment variables
|
26 |
-
USER user
|
27 |
-
ENV HOME=/home/user \
|
28 |
-
PATH=/home/user/.local/bin:$PATH
|
29 |
-
|
30 |
-
# Create the application directory
|
31 |
-
WORKDIR $HOME/app
|
32 |
-
|
33 |
-
# Copy the application code and model files
|
34 |
-
COPY --chown=user . $HOME/app/
|
35 |
-
|
36 |
-
# Expose the port the FastAPI app runs on
|
37 |
-
EXPOSE 7860
|
38 |
-
|
39 |
-
# Command to run the FastAPI app
|
40 |
CMD ["uvicorn", "api:app", "--host", "0.0.0.0", "--port", "7860"]
|
|
|
1 |
+
FROM python:3.11.8
|
2 |
+
|
3 |
+
WORKDIR /
|
4 |
+
|
5 |
+
# Install system dependencies including FFmpeg
|
6 |
+
RUN apt-get update && apt-get install -y \
|
7 |
+
ffmpeg \
|
8 |
+
libsm6 \
|
9 |
+
libxext6 \
|
10 |
+
libxrender-dev \
|
11 |
+
libgomp1 \
|
12 |
+
wget \
|
13 |
+
git \
|
14 |
+
&& rm -rf /var/lib/apt/lists/*
|
15 |
+
|
16 |
+
# Copy requirements.txt to the container
|
17 |
+
COPY requirements.txt ./
|
18 |
+
|
19 |
+
# Install Python dependencies
|
20 |
+
RUN pip install --no-cache-dir -r requirements.txt
|
21 |
+
|
22 |
+
# Add a non-root user to run the application
|
23 |
+
RUN useradd -m -u 1000 user
|
24 |
+
|
25 |
+
# Set the user and home directory environment variables
|
26 |
+
USER user
|
27 |
+
ENV HOME=/home/user \
|
28 |
+
PATH=/home/user/.local/bin:$PATH
|
29 |
+
|
30 |
+
# Create the application directory
|
31 |
+
WORKDIR $HOME/app
|
32 |
+
|
33 |
+
# Copy the application code and model files
|
34 |
+
COPY --chown=user . $HOME/app/
|
35 |
+
|
36 |
+
# Expose the port the FastAPI app runs on
|
37 |
+
EXPOSE 7860
|
38 |
+
|
39 |
+
# Command to run the FastAPI app
|
40 |
CMD ["uvicorn", "api:app", "--host", "0.0.0.0", "--port", "7860"]
|
README.md
CHANGED
@@ -1,107 +1,107 @@
|
|
1 |
-
---
|
2 |
-
title: Contact Center Operations
|
3 |
-
emoji: ⚡
|
4 |
-
colorFrom: red
|
5 |
-
colorTo: green
|
6 |
-
sdk: docker
|
7 |
-
pinned: false
|
8 |
-
license: mit
|
9 |
-
short_description: Contact Center Operation Insights
|
10 |
-
---
|
11 |
-
|
12 |
-
# Contact Center Operations
|
13 |
-
|
14 |
-
A FastAPI application for contact center operation insights and analytics.
|
15 |
-
|
16 |
-
## System Requirements
|
17 |
-
|
18 |
-
This application requires FFmpeg for audio processing capabilities.
|
19 |
-
|
20 |
-
## Local Development Setup
|
21 |
-
|
22 |
-
Follow these steps to run the FastAPI application locally on your machine.
|
23 |
-
|
24 |
-
### Prerequisites
|
25 |
-
|
26 |
-
- Python 3.11
|
27 |
-
- Git
|
28 |
-
- FFmpeg (for audio processing)
|
29 |
-
|
30 |
-
### Installing FFmpeg
|
31 |
-
|
32 |
-
**On Ubuntu/Debian:**
|
33 |
-
```bash
|
34 |
-
sudo apt-get update
|
35 |
-
sudo apt-get install ffmpeg
|
36 |
-
```
|
37 |
-
|
38 |
-
**On macOS:**
|
39 |
-
```bash
|
40 |
-
brew install ffmpeg
|
41 |
-
```
|
42 |
-
|
43 |
-
**On Windows:**
|
44 |
-
Download from [FFmpeg official site](https://ffmpeg.org/download.html) and add to PATH.
|
45 |
-
|
46 |
-
### Installation
|
47 |
-
|
48 |
-
1. **Clone the repository**
|
49 |
-
```bash
|
50 |
-
git clone
|
51 |
-
```
|
52 |
-
|
53 |
-
2. **Create a virtual environment**
|
54 |
-
|
55 |
-
**Option A: Using venv**
|
56 |
-
```bash
|
57 |
-
python3.11 -m venv venv
|
58 |
-
source venv/bin/activate # On Windows: venv\Scripts\activate
|
59 |
-
```
|
60 |
-
|
61 |
-
**Option B: Using conda**
|
62 |
-
```bash
|
63 |
-
conda create -n contact-center python=3.11
|
64 |
-
conda activate contact-center
|
65 |
-
```
|
66 |
-
|
67 |
-
3. **Install dependencies**
|
68 |
-
```bash
|
69 |
-
pip install -r requirements.txt
|
70 |
-
```
|
71 |
-
|
72 |
-
4. **Set up environment variables**
|
73 |
-
Create a `.env` file in the backend directory:
|
74 |
-
```bash
|
75 |
-
GEMINI_API_KEY=your_api_key_here
|
76 |
-
```
|
77 |
-
|
78 |
-
5. **Run the application**
|
79 |
-
```bash
|
80 |
-
python api.py
|
81 |
-
```
|
82 |
-
|
83 |
-
The FastAPI application will start and be available at `http://localhost:8000`.
|
84 |
-
|
85 |
-
### API Documentation
|
86 |
-
|
87 |
-
Once the application is running, you can access:
|
88 |
-
- Interactive API documentation: `http://localhost:8000/docs`
|
89 |
-
- Alternative API documentation: `http://localhost:8000/redoc`
|
90 |
-
|
91 |
-
### Deactivating the Environment
|
92 |
-
|
93 |
-
When you're done working with the application:
|
94 |
-
|
95 |
-
**For venv:**
|
96 |
-
```bash
|
97 |
-
deactivate
|
98 |
-
```
|
99 |
-
|
100 |
-
**For conda:**
|
101 |
-
```bash
|
102 |
-
conda deactivate
|
103 |
-
```
|
104 |
-
|
105 |
-
---
|
106 |
-
|
107 |
Check out the configuration reference at https://huggingface.co/docs/hub/spaces-config-reference
|
|
|
1 |
+
---
|
2 |
+
title: Contact Center Operations
|
3 |
+
emoji: ⚡
|
4 |
+
colorFrom: red
|
5 |
+
colorTo: green
|
6 |
+
sdk: docker
|
7 |
+
pinned: false
|
8 |
+
license: mit
|
9 |
+
short_description: Contact Center Operation Insights
|
10 |
+
---
|
11 |
+
|
12 |
+
# Contact Center Operations
|
13 |
+
|
14 |
+
A FastAPI application for contact center operation insights and analytics.
|
15 |
+
|
16 |
+
## System Requirements
|
17 |
+
|
18 |
+
This application requires FFmpeg for audio processing capabilities.
|
19 |
+
|
20 |
+
## Local Development Setup
|
21 |
+
|
22 |
+
Follow these steps to run the FastAPI application locally on your machine.
|
23 |
+
|
24 |
+
### Prerequisites
|
25 |
+
|
26 |
+
- Python 3.11
|
27 |
+
- Git
|
28 |
+
- FFmpeg (for audio processing)
|
29 |
+
|
30 |
+
### Installing FFmpeg
|
31 |
+
|
32 |
+
**On Ubuntu/Debian:**
|
33 |
+
```bash
|
34 |
+
sudo apt-get update
|
35 |
+
sudo apt-get install ffmpeg
|
36 |
+
```
|
37 |
+
|
38 |
+
**On macOS:**
|
39 |
+
```bash
|
40 |
+
brew install ffmpeg
|
41 |
+
```
|
42 |
+
|
43 |
+
**On Windows:**
|
44 |
+
Download from [FFmpeg official site](https://ffmpeg.org/download.html) and add to PATH.
|
45 |
+
|
46 |
+
### Installation
|
47 |
+
|
48 |
+
1. **Clone the repository**
|
49 |
+
```bash
|
50 |
+
git clone
|
51 |
+
```
|
52 |
+
|
53 |
+
2. **Create a virtual environment**
|
54 |
+
|
55 |
+
**Option A: Using venv**
|
56 |
+
```bash
|
57 |
+
python3.11 -m venv venv
|
58 |
+
source venv/bin/activate # On Windows: venv\Scripts\activate
|
59 |
+
```
|
60 |
+
|
61 |
+
**Option B: Using conda**
|
62 |
+
```bash
|
63 |
+
conda create -n contact-center python=3.11
|
64 |
+
conda activate contact-center
|
65 |
+
```
|
66 |
+
|
67 |
+
3. **Install dependencies**
|
68 |
+
```bash
|
69 |
+
pip install -r requirements.txt
|
70 |
+
```
|
71 |
+
|
72 |
+
4. **Set up environment variables**
|
73 |
+
Create a `.env` file in the backend directory:
|
74 |
+
```bash
|
75 |
+
GEMINI_API_KEY=your_api_key_here
|
76 |
+
```
|
77 |
+
|
78 |
+
5. **Run the application**
|
79 |
+
```bash
|
80 |
+
python api.py
|
81 |
+
```
|
82 |
+
|
83 |
+
The FastAPI application will start and be available at `http://localhost:8000`.
|
84 |
+
|
85 |
+
### API Documentation
|
86 |
+
|
87 |
+
Once the application is running, you can access:
|
88 |
+
- Interactive API documentation: `http://localhost:8000/docs`
|
89 |
+
- Alternative API documentation: `http://localhost:8000/redoc`
|
90 |
+
|
91 |
+
### Deactivating the Environment
|
92 |
+
|
93 |
+
When you're done working with the application:
|
94 |
+
|
95 |
+
**For venv:**
|
96 |
+
```bash
|
97 |
+
deactivate
|
98 |
+
```
|
99 |
+
|
100 |
+
**For conda:**
|
101 |
+
```bash
|
102 |
+
conda deactivate
|
103 |
+
```
|
104 |
+
|
105 |
+
---
|
106 |
+
|
107 |
Check out the configuration reference at https://huggingface.co/docs/hub/spaces-config-reference
|
api.py
CHANGED
@@ -9,6 +9,7 @@ from services.audio_whisper import process_audio_with_whisper
|
|
9 |
from services.audio_gemini import process_audio_with_gemini
|
10 |
from services.audio_diarization import process_audio_diarization, AudioDiarizationError
|
11 |
from services.image_ocr_processor import process_pdf_to_image, process_document_image
|
|
|
12 |
|
13 |
class TextRequest(BaseModel):
|
14 |
text: str
|
@@ -198,11 +199,12 @@ async def process_document(document: UploadFile = File(...)):
|
|
198 |
|
199 |
except Exception as e:
|
200 |
raise HTTPException(status_code=500, detail=f"Processing failed: {str(e)}")
|
201 |
-
|
202 |
-
|
203 |
-
|
|
|
204 |
"""
|
205 |
-
|
206 |
"""
|
207 |
try:
|
208 |
# Basic validation
|
@@ -212,13 +214,10 @@ async def text_insights(request: TextRequest) -> HelloWorldResponse:
|
|
212 |
detail="Text cannot be empty or contain only whitespace."
|
213 |
)
|
214 |
|
215 |
-
|
216 |
-
|
217 |
-
received_text=request.text,
|
218 |
-
status="success"
|
219 |
-
)
|
220 |
|
221 |
-
return
|
222 |
|
223 |
except HTTPException:
|
224 |
raise
|
|
|
9 |
from services.audio_gemini import process_audio_with_gemini
|
10 |
from services.audio_diarization import process_audio_diarization, AudioDiarizationError
|
11 |
from services.image_ocr_processor import process_pdf_to_image, process_document_image
|
12 |
+
from services.text_processor import process_text_to_insight
|
13 |
|
14 |
class TextRequest(BaseModel):
|
15 |
text: str
|
|
|
199 |
|
200 |
except Exception as e:
|
201 |
raise HTTPException(status_code=500, detail=f"Processing failed: {str(e)}")
|
202 |
+
|
203 |
+
|
204 |
+
@app.post("/text", response_model= Dict[str, str])
|
205 |
+
async def text_insights(request: TextRequest):
|
206 |
"""
|
207 |
+
Input text and extract insights
|
208 |
"""
|
209 |
try:
|
210 |
# Basic validation
|
|
|
214 |
detail="Text cannot be empty or contain only whitespace."
|
215 |
)
|
216 |
|
217 |
+
# Process text
|
218 |
+
result = process_text_to_insight(request)
|
|
|
|
|
|
|
219 |
|
220 |
+
return result
|
221 |
|
222 |
except HTTPException:
|
223 |
raise
|
requirements.txt
CHANGED
@@ -7,6 +7,7 @@ Pillow
|
|
7 |
torch
|
8 |
transformers
|
9 |
nltk
|
|
|
10 |
python-dotenv
|
11 |
PyPDF2
|
12 |
pdf2image
|
@@ -23,4 +24,6 @@ google-cloud-aiplatform
|
|
23 |
librosa
|
24 |
soundfile
|
25 |
openai-whisper
|
26 |
-
pydantic
|
|
|
|
|
|
7 |
torch
|
8 |
transformers
|
9 |
nltk
|
10 |
+
tqdm
|
11 |
python-dotenv
|
12 |
PyPDF2
|
13 |
pdf2image
|
|
|
24 |
librosa
|
25 |
soundfile
|
26 |
openai-whisper
|
27 |
+
pydantic
|
28 |
+
langchain-google-genai
|
29 |
+
langchain
|
services/__pycache__/audio_diarization.cpython-311.pyc
CHANGED
Binary files a/services/__pycache__/audio_diarization.cpython-311.pyc and b/services/__pycache__/audio_diarization.cpython-311.pyc differ
|
|
services/__pycache__/text_processor.cpython-311.pyc
ADDED
Binary file (28.6 kB). View file
|
|
services/audio_diarization.py
CHANGED
@@ -290,34 +290,4 @@ def process_audio_diarization(audio_bytes: bytes, filename: str = None) -> Dict[
|
|
290 |
raise
|
291 |
except Exception as e:
|
292 |
# Catch any unexpected errors
|
293 |
-
raise AudioDiarizationError(f"Unexpected error during audio diarization: {str(e)}")
|
294 |
-
|
295 |
-
|
296 |
-
# Example usage and testing function
|
297 |
-
def test_diarization_service():
|
298 |
-
"""
|
299 |
-
Test function for the diarization service.
|
300 |
-
This is mainly for development and debugging purposes.
|
301 |
-
"""
|
302 |
-
try:
|
303 |
-
# This would require an actual audio file to test
|
304 |
-
print("Audio diarization service loaded successfully")
|
305 |
-
print("Available functions:")
|
306 |
-
print("- process_audio_diarization(audio_bytes, filename)")
|
307 |
-
print("- get_gemini_client()")
|
308 |
-
print("- get_audio_duration(audio_bytes)")
|
309 |
-
print("- detect_audio_format(audio_bytes)")
|
310 |
-
|
311 |
-
# Check if API key is available
|
312 |
-
api_key = os.getenv("GEMINI_API_KEY")
|
313 |
-
if api_key:
|
314 |
-
print("✓ GEMINI_API_KEY found in environment")
|
315 |
-
else:
|
316 |
-
print("✗ GEMINI_API_KEY not found in environment")
|
317 |
-
|
318 |
-
except Exception as e:
|
319 |
-
print(f"Service test failed: {e}")
|
320 |
-
|
321 |
-
|
322 |
-
if __name__ == "__main__":
|
323 |
-
test_diarization_service()
|
|
|
290 |
raise
|
291 |
except Exception as e:
|
292 |
# Catch any unexpected errors
|
293 |
+
raise AudioDiarizationError(f"Unexpected error during audio diarization: {str(e)}")
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
services/text_processor.py
CHANGED
@@ -1 +1,434 @@
|
|
1 |
-
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
1 |
+
from pydantic import BaseModel, Field
|
2 |
+
from typing import List, Optional, Literal, Annotated, Union
|
3 |
+
from langchain_google_genai import ChatGoogleGenerativeAI
|
4 |
+
from langchain_core.messages import HumanMessage
|
5 |
+
from langchain_core.prompts import ChatPromptTemplate
|
6 |
+
from langchain_core.runnables import RunnableLambda
|
7 |
+
from langchain_core.output_parsers import PydanticOutputParser
|
8 |
+
from langchain_google_genai import ChatGoogleGenerativeAI
|
9 |
+
from langchain.output_parsers import OutputFixingParser
|
10 |
+
from langchain.prompts import PromptTemplate, FewShotPromptTemplate
|
11 |
+
from datetime import datetime
|
12 |
+
import time
|
13 |
+
|
14 |
+
|
15 |
+
import os
|
16 |
+
from dotenv import load_dotenv
|
17 |
+
import tqdm
|
18 |
+
|
19 |
+
# Load environment variables from a .env file in the root directory
|
20 |
+
load_dotenv()
|
21 |
+
|
22 |
+
# --- Configuration ---
|
23 |
+
GEMINI_API_KEY = os.getenv("GEMINI_API_KEY")
|
24 |
+
|
25 |
+
llm_text_insights = ChatGoogleGenerativeAI(model = 'gemini-2.0-flash', google_api_key = GEMINI_API_KEY)
|
26 |
+
llm_for_parsing = ChatGoogleGenerativeAI(model="gemini-1.5-flash", google_api_key= GEMINI_API_KEY)
|
27 |
+
llm_for_fixing = ChatGoogleGenerativeAI(model="gemini-1.5-flash", google_api_key= GEMINI_API_KEY)
|
28 |
+
|
29 |
+
# --- Client Initialization ---
|
30 |
+
if not GEMINI_API_KEY:
|
31 |
+
raise ValueError(
|
32 |
+
"GEMINI_API_KEY not found in environment variables. "
|
33 |
+
"Please create a .env file in the project root and set the key."
|
34 |
+
)
|
35 |
+
|
36 |
+
|
37 |
+
# --- File Identification ---
|
38 |
+
file_extension_list = {
|
39 |
+
'audio': ["aac", "mid", "mp3", "m4a", "ogg", "flac", "wav", "amr", "aiff"],
|
40 |
+
'image': ["dwg", "xcf", "jpg", "jpx", "png", "apng", "gif", "webp", "cr2", "tif", "bmp", "jxr", "psd", "ico", "heic", "avif"],
|
41 |
+
'text': ["csv"]
|
42 |
+
}
|
43 |
+
|
44 |
+
# get file path
|
45 |
+
def get_file_path(file):
|
46 |
+
return list(file.keys())[0]
|
47 |
+
|
48 |
+
# function to returning file type (audio, image, or text)
|
49 |
+
def identify_filetype(file):
|
50 |
+
file_extension = os.path.splitext(get_file_path(file))[1]
|
51 |
+
for key, value in file_extension_list.items():
|
52 |
+
if file_extension[1:].lower() in value:
|
53 |
+
return key
|
54 |
+
return 'Error: FileType Not Supported'
|
55 |
+
|
56 |
+
# --- Classification Examples ---
|
57 |
+
ctt_examples = [{'input': 'There is an unauthorized charge on my BPI Family Savings credit card statement. Please investigate.',
|
58 |
+
'output': 'Complaint'},
|
59 |
+
{'input': 'Pwede ko po bang malaman kung anong mga options para mag-apply ng loan online?',
|
60 |
+
'output': 'Inquiry'},
|
61 |
+
{'input': '["Hi po! I\'d like to know the requirements for BPI Auto Loan. Planning to get a brand new SUV.", "I\'m employed. Thanks!"]',
|
62 |
+
'output': 'Inquiry'},
|
63 |
+
{'input': 'Hello, I want to inquire about the process of opening a BPI Kaya Secure Savings account.',
|
64 |
+
'output': 'Inquiry'},
|
65 |
+
{'input': "['User: I have a problem. My BPI credit card has an unauthorized transaction.', 'Chatbot: I understand. Please provide the date, amount and merchant of the suspicious charge.', 'User: April 1st, 2026, ₱3000, Zalora', 'Chatbot: Thank you. We have filed a dispute report. Please wait for our investigation results within 7 business days.']",
|
66 |
+
'output': 'Complaint'},
|
67 |
+
{'input': 'What are the steps to open a joint savings account with BPI?',
|
68 |
+
'output': 'Inquiry'},
|
69 |
+
{'input': "['How to get a new BPI ATM?', 'Kasi My card will expire soon what i supposed to do?', 'Please check your best to respond thanks po.']",
|
70 |
+
'output': 'Request'},
|
71 |
+
{'input': 'Hi, BPI! I forgot my online banking password. How can I reset it?',
|
72 |
+
'output': 'Request'},
|
73 |
+
{'input': 'Hi BPI! I lost my credit card. How do I report it?',
|
74 |
+
'output': 'Request'},
|
75 |
+
{'input': 'I want to know how much my credit card balance is and when is the due date. Thanks!',
|
76 |
+
'output': 'Inquiry'}]
|
77 |
+
cpl_examples = [{'input': 'There is an unauthorized charge on my BPI Family Savings credit card statement. Please investigate.',
|
78 |
+
'output': 'High'},
|
79 |
+
{'input': 'Pwede ko po bang malaman kung anong mga options para mag-apply ng loan online?',
|
80 |
+
'output': 'Medium'},
|
81 |
+
{'input': '["Hi po! I\'d like to know the requirements for BPI Auto Loan. Planning to get a brand new SUV.", "I\'m employed. Thanks!"]',
|
82 |
+
'output': 'Medium'},
|
83 |
+
{'input': 'Hello, I want to inquire about the process of opening a BPI Kaya Secure Savings account.',
|
84 |
+
'output': 'Medium'},
|
85 |
+
{'input': "['User: I have a problem. My BPI credit card has an unauthorized transaction.', 'Chatbot: I understand. Please provide the date, amount and merchant of the suspicious charge.', 'User: April 1st, 2026, ₱3000, Zalora', 'Chatbot: Thank you. We have filed a dispute report. Please wait for our investigation results within 7 business days.']",
|
86 |
+
'output': 'High'},
|
87 |
+
{'input': 'What are the steps to open a joint savings account with BPI?',
|
88 |
+
'output': 'Low'},
|
89 |
+
{'input': "['How to get a new BPI ATM?', 'Kasi My card will expire soon what i supposed to do?', 'Please check your best to respond thanks po.']",
|
90 |
+
'output': 'High'},
|
91 |
+
{'input': 'Hi, BPI! I forgot my online banking password. How can I reset it?',
|
92 |
+
'output': 'High'},
|
93 |
+
{'input': 'Hi BPI! I lost my credit card. How do I report it?',
|
94 |
+
'output': 'High'},
|
95 |
+
{'input': 'I want to know how much my credit card balance is and when is the due date. Thanks!',
|
96 |
+
'output': 'Low'}]
|
97 |
+
|
98 |
+
ct_examples = [{'input': 'There is an unauthorized charge on my BPI Family Savings credit card statement. Please investigate.',
|
99 |
+
'output': 'Credit Cards'},
|
100 |
+
{'input': 'Pwede ko po bang malaman kung anong mga options para mag-apply ng loan online?',
|
101 |
+
'output': 'Loans'},
|
102 |
+
{'input': '["Hi po! I\'d like to know the requirements for BPI Auto Loan. Planning to get a brand new SUV.", "I\'m employed. Thanks!"]',
|
103 |
+
'output': 'Loans'},
|
104 |
+
{'input': 'Hello, I want to inquire about the process of opening a BPI Kaya Secure Savings account.',
|
105 |
+
'output': 'Deposits'},
|
106 |
+
{'input': "['User: I have a problem. My BPI credit card has an unauthorized transaction.', 'Chatbot: I understand. Please provide the date, amount and merchant of the suspicious charge.', 'User: April 1st, 2026, ₱3000, Zalora', 'Chatbot: Thank you. We have filed a dispute report. Please wait for our investigation results within 7 business days.']",
|
107 |
+
'output': 'Credit Cards'},
|
108 |
+
{'input': 'What are the steps to open a joint savings account with BPI?',
|
109 |
+
'output': 'Deposits'},
|
110 |
+
{'input': "['How to get a new BPI ATM?', 'Kasi My card will expire soon what i supposed to do?', 'Please check your best to respond thanks po.']",
|
111 |
+
'output': 'Deposits'},
|
112 |
+
{'input': 'Hi, BPI! I forgot my online banking password. How can I reset it?',
|
113 |
+
'output': 'Deposits'},
|
114 |
+
{'input': 'Hi BPI! I lost my credit card. How do I report it?',
|
115 |
+
'output': 'Credit Cards'},
|
116 |
+
{'input': 'I want to know how much my credit card balance is and when is the due date. Thanks!',
|
117 |
+
'output': 'Credit Cards'}]
|
118 |
+
|
119 |
+
sentiment_examples = [{'input': 'There is an unauthorized charge on my BPI Family Savings credit card statement. Please investigate.',
|
120 |
+
'output': 'Negative'},
|
121 |
+
{'input': 'Pwede ko po bang malaman kung anong mga options para mag-apply ng loan online?',
|
122 |
+
'output': 'Positive'},
|
123 |
+
{'input': '["Hi po! I\'d like to know the requirements for BPI Auto Loan. Planning to get a brand new SUV.", "I\'m employed. Thanks!"]',
|
124 |
+
'output': 'Positive'},
|
125 |
+
{'input': 'Hello, I want to inquire about the process of opening a BPI Kaya Secure Savings account.',
|
126 |
+
'output': 'Positive'},
|
127 |
+
{'input': "['User: I have a problem. My BPI credit card has an unauthorized transaction.', 'Chatbot: I understand. Please provide the date, amount and merchant of the suspicious charge.', 'User: April 1st, 2026, ₱3000, Zalora', 'Chatbot: Thank you. We have filed a dispute report. Please wait for our investigation results within 7 business days.']",
|
128 |
+
'output': 'Negative'},
|
129 |
+
{'input': 'What are the steps to open a joint savings account with BPI?',
|
130 |
+
'output': 'Positive'},
|
131 |
+
{'input': "['How to get a new BPI ATM?', 'Kasi My card will expire soon what i supposed to do?', 'Please check your best to respond thanks po.']",
|
132 |
+
'output': 'Neutral'},
|
133 |
+
{'input': 'Hi, BPI! I forgot my online banking password. How can I reset it?',
|
134 |
+
'output': 'Positive'},
|
135 |
+
{'input': 'Hi BPI! I lost my credit card. How do I report it?',
|
136 |
+
'output': 'Neutral'},
|
137 |
+
{'input': 'I want to know how much my credit card balance is and when is the due date. Thanks!',
|
138 |
+
'output': 'Positive'}]
|
139 |
+
|
140 |
+
# --- Pydantic Models
|
141 |
+
class GeneralInfo(BaseModel):
|
142 |
+
case_id: Optional[str] = Field(None, description="An unique identifier given to each case message")
|
143 |
+
raw_message: str = Field(None, description="The raw and unstructured form of the original message or conversation") # type: ignore
|
144 |
+
message_source: Literal['Email', 'Phone', 'Branch', 'Facebook'] = Field(None, description="The channel to which the text was received from") # type: ignore
|
145 |
+
customer_tier: Optional[Literal['High', 'Mid', 'Low']] = Field(None, description="The tier of the customer sending the message")
|
146 |
+
status: Optional[Literal['New', 'Assigned', 'Closed']] = Field(None, description="The status of the message, whether it was new, already assigned, or closed")
|
147 |
+
start_date: Optional[datetime] = Field(None, description="The date and time when the message was initiated or received.")
|
148 |
+
close_date: Optional[datetime] = Field(None, description="The date and time when the message was marked as closed or resolved.")
|
149 |
+
|
150 |
+
class TextOverview(BaseModel):
|
151 |
+
summary: str = Field(None, description="A one liner summary of the text provided. Indicates the main purpose and intention of the text. Use proper case.") # type: ignore
|
152 |
+
tags: List[str] = Field(None, description="A list of keywords that can be used to tag and classify the message meaningfuly. Use lowercase") # type: ignore
|
153 |
+
|
154 |
+
class TransactionType(BaseModel):
|
155 |
+
interaction_type: Literal['Request', 'Inquiry', 'Complaint'] = Field(None, description="The interaction type of the message, indicates whether the customer is inquiring, complaining, or requesting to the bank") # type: ignore
|
156 |
+
product_type: Literal['Credit Cards', 'Deposits', 'Loans'] = Field(None, description="The product that is best connected to the purpose of the message. Indicates if the message is related to Credit Cards, Deposits, or Loans") # type: ignore
|
157 |
+
|
158 |
+
class SentimentConfidence(BaseModel):
|
159 |
+
sentiment_tag: str = Field(None, description="The sentiment tag being assessed. Can be either 'Positivee', 'Negative', or 'Neutral") # type: ignore
|
160 |
+
sentiment_confidence_score: Optional[float] = Field(None, ge=0.0, le=1.0, description="how confident the given sentiment category is when associated with the intent of the message. Use two decimal points for the score")
|
161 |
+
emotional_indicators: Optional[List[str]] = Field(None, description="Bigrams or trigrams that best display the particular sentiment of the message. Use lowercase. Use 'Blank' if there is no good keyword.")
|
162 |
+
|
163 |
+
class Sentiment(BaseModel):
|
164 |
+
sentiment_category: Literal['Negative', 'Neutral', 'Positive'] = Field(None, description="the sentiment demonstrated within the message. Indicates whether the message has negative, positive, or neutral connotations") # type: ignore
|
165 |
+
sentiment_reasoning: Optional[str] = Field(None, description="A one liner that depicts main reason why the text was categorized as a certain sentiment. No need to add any emphases on keywords. Use proper case.")
|
166 |
+
sentiment_distribution: List[SentimentConfidence] = Field(description="A distribution that shows how likely each sentiment (Positive, Neutral, and Negative). Note that the sum of the confidence scores should be equal to 1.0 since it's a probability distribution")
|
167 |
+
|
168 |
+
class Urgency(BaseModel):
|
169 |
+
priority_category: Literal['High', 'Medium', 'Low'] = Field(None, description = "Describes how urgent a message needs to be addressed.") # type: ignore
|
170 |
+
priority_reason: Optional[str] = Field(None, description = "An explanation of why the priority level of a message is the way it is.")
|
171 |
+
|
172 |
+
class ChatLogEntry(BaseModel):
|
173 |
+
turn_id: int = Field(None, description="A number that indicates the order in which the message is found in the conversation") # type: ignore
|
174 |
+
speaker: Literal['Customer', 'Bank Agent', 'Chatbot'] = Field(None, description="The entity who sent the message during the specified turn") # type: ignore
|
175 |
+
text: str = Field(None, description="The message sent within the turn of the speaker") # type: ignore
|
176 |
+
|
177 |
+
class DialogueHistory(BaseModel):
|
178 |
+
dialogue_history: List[ChatLogEntry] = Field(
|
179 |
+
default_factory=list, description="A record of the chat history with a breakdown of the order of the message, the person who sent the message, and the message itself."
|
180 |
+
)
|
181 |
+
|
182 |
+
class TextInsightStructured(BaseModel):
|
183 |
+
general_information: GeneralInfo = Field(description="Contains the background information attached to a message sent. This is given by the user. If none is provided, put 'Blank'.")
|
184 |
+
text_overview: TextOverview = Field(description="Provide a brief overview of key information about the message")
|
185 |
+
urgency: Urgency = Field(description="Provides information about the urgency of the message.")
|
186 |
+
transaction_type: TransactionType = Field(description="Provides information about the type of transaction related to the message")
|
187 |
+
sentiment: Sentiment = Field(description="Provides information about the sentiments and indicators of sentiment that can be extracted from the message")
|
188 |
+
dialogue_history: List[ChatLogEntry] = Field(
|
189 |
+
default_factory=list, description="A record of the chat history with a breakdown of the order of the message, the person who sent the message, and the message itself."
|
190 |
+
)
|
191 |
+
|
192 |
+
sentiment_parser = PydanticOutputParser(pydantic_object=Sentiment)
|
193 |
+
transaction_type_parser = PydanticOutputParser(pydantic_object=TransactionType)
|
194 |
+
urgency_parser = PydanticOutputParser(pydantic_object=Urgency)
|
195 |
+
dialogue_history_parser = PydanticOutputParser(pydantic_object=DialogueHistory)
|
196 |
+
text_insight_parser = PydanticOutputParser(pydantic_object=TextInsightStructured)
|
197 |
+
|
198 |
+
# --- Prompt Templates ---
|
199 |
+
|
200 |
+
ctt_prompt = PromptTemplate.from_template(
|
201 |
+
"""You are an expert contact center operations agent and analyst at a banking firm. Your task is to review customer messages and classify each message by selecting exactly one label from the following list: ['Request', 'Inquiry', 'Complaint'].
|
202 |
+
For each message, return only one word. That is, the label, in proper case.
|
203 |
+
Please ensure that the classification you will give is based on the main intent of the message.
|
204 |
+
|
205 |
+
TRANSACTION TYPE DEFINITION AND GUIDELINES
|
206 |
+
Complaint:
|
207 |
+
- if Customer states a situation that is considered detrimental or unfavorable to the bank (e.g., errors, unresponsive personnel, personal dissatisfaction, etc.)
|
208 |
+
|
209 |
+
Inquiry:
|
210 |
+
- if Customer asks for information about a bank product or service that they wish to avail
|
211 |
+
- if Customer asks for something but no immediate action must be done
|
212 |
+
|
213 |
+
Request:
|
214 |
+
- if Customer asks for something that the bank can do for them (e.g., asking for bank statements, unlocking cards, notifying lost cards, etc.)
|
215 |
+
- if Customer asks for something that will be succeeded by an action on their end (e.g., resetting password/PIN, closing accounts, etc.)
|
216 |
+
|
217 |
+
Classify the following text: {text_to_classify}. Return only the label as output. No explanations needed."""
|
218 |
+
)
|
219 |
+
|
220 |
+
example_formatter = PromptTemplate.from_template(
|
221 |
+
"Input: {input} \n Output: {output}"
|
222 |
+
)
|
223 |
+
|
224 |
+
ctt_fewshot_prompt = FewShotPromptTemplate(
|
225 |
+
examples = ctt_examples,
|
226 |
+
example_prompt = example_formatter,
|
227 |
+
suffix = ctt_prompt.template,
|
228 |
+
input_variables = ['text_to_classify'],
|
229 |
+
example_separator = "\n\n"
|
230 |
+
)
|
231 |
+
|
232 |
+
ctt_chain_fs = ctt_fewshot_prompt | llm_text_insights
|
233 |
+
|
234 |
+
ctt_chain_wrapped = RunnableLambda(lambda x: {
|
235 |
+
"text_to_classify": x["text"] # type: ignore
|
236 |
+
}) | ctt_chain_fs
|
237 |
+
|
238 |
+
|
239 |
+
cpl_prompt = PromptTemplate.from_template(
|
240 |
+
"""You are an expert contact center operations agent and analyst at a banking firm. Your task is to review customer messages and classify each message by selecting exactly one label from the following priority levels: ['High', 'Medium', 'Low'].
|
241 |
+
For each message, return only one word. That is, the label, in proper case.
|
242 |
+
Please ensure your classification is based on the main intent of the message.
|
243 |
+
Take into account the nature of the customer message and the gravity of supposed issue on personal and historical data to classify the message.
|
244 |
+
|
245 |
+
PRIORITY DEFINITIONS AND GUIDELINES:
|
246 |
+
High: - Urgent issues, complaints, or problems requiring immediate resolution. - Fraud, unauthorized transactions, or security breaches. - Account lockouts or inability to access funds. - Time-sensitive concerns with direct financial impact or security risks.
|
247 |
+
Medium: - Customer expressing clear interest in availing a service or applying for a product (e.g., opening an account, applying for a loan, requesting a credit card). - General product/service inquiries with clear indicators of active interest. - Scheduling appointments related to new services or significant account changes.
|
248 |
+
Low: - General inquiries without a clear indicator of active interest or intent to avail a service. - Requests for descriptive information without clear business growth potential or direct impact. - Feedback, suggestions, marketing responses, or survey participation. - Requests with no urgency or direct impact on account access, security, or immediate financial transactions.
|
249 |
+
|
250 |
+
Moreover, using one sentence, indicate the reason why the message is classified its priority level.
|
251 |
+
|
252 |
+
Follow the format below:
|
253 |
+
{format_instructions}
|
254 |
+
|
255 |
+
Classify the following text: {text_to_classify}."""
|
256 |
+
)
|
257 |
+
|
258 |
+
cpl_fewshot_prompt = FewShotPromptTemplate(
|
259 |
+
examples = cpl_examples,
|
260 |
+
example_prompt = example_formatter,
|
261 |
+
suffix = cpl_prompt.template,
|
262 |
+
input_variables = ['text_to_classify'],
|
263 |
+
partial_variables = {'format_instructions': urgency_parser.get_format_instructions()},
|
264 |
+
example_separator = "\n\n"
|
265 |
+
)
|
266 |
+
|
267 |
+
cpl_chain_fs = cpl_fewshot_prompt | llm_text_insights
|
268 |
+
|
269 |
+
cpl_chain_wrapped = RunnableLambda(lambda x: {
|
270 |
+
"text_to_classify": x["text"] # type: ignore
|
271 |
+
}) | cpl_chain_fs | RunnableLambda(lambda x: urgency_parser.parse(x.content).model_dump_json(indent=2)) # type: ignore
|
272 |
+
|
273 |
+
ct_prompt = PromptTemplate.from_template(
|
274 |
+
"""You are an expert contact center operations agent and analyst at a banking firm. Your task is to review customer messages and classify each message by selecting exactly one label from the following services/products offered by the bank: "labels": ['Credit Cards', 'Loans', 'Deposits'].
|
275 |
+
For each message, return only one word. That is, the label, in proper case.
|
276 |
+
Please ensure your classification is based on the main intent of the message.
|
277 |
+
Take into account the nature of the customer message and the proximity of the message to the aforementioned services/products to classify the message.
|
278 |
+
|
279 |
+
Classify the following text: {text_to_classify}. Return only the label as output. No explanations needed."""
|
280 |
+
)
|
281 |
+
|
282 |
+
ct_fewshot_prompt = FewShotPromptTemplate(
|
283 |
+
examples = ct_examples,
|
284 |
+
example_prompt = example_formatter,
|
285 |
+
suffix = ct_prompt.template,
|
286 |
+
input_variables = ['text_to_classify'],
|
287 |
+
example_separator = "\n\n"
|
288 |
+
)
|
289 |
+
|
290 |
+
ct_chain_fs = ct_fewshot_prompt | llm_text_insights
|
291 |
+
|
292 |
+
ct_chain_wrapped = RunnableLambda(lambda x: {
|
293 |
+
"text_to_classify": x["text"] # type: ignore
|
294 |
+
}) | ct_chain_fs
|
295 |
+
|
296 |
+
sentiment_prompt = PromptTemplate.from_template(
|
297 |
+
"""You are an expert contact center operations agent and analyst at a banking firm. Analyze the customer messages below and classify their overall emotional sentiment based on the customer's tone and intent. Choose from: ['Positive', 'Negative', 'Neutral'].
|
298 |
+
Base your answer on whether the customer is satisfied, complaining, confused, or asking politely.
|
299 |
+
Do NOT get distracted by polite greetings like 'Hi' or 'Good day', as well as "Thanks". Do not include them in your analysis and focus on the real intent. Respond with one word per text.
|
300 |
+
If you encounter a list of messages that looks like a customer-chatbot interaction signified by "User:" and "Chatbot:" in the message, focus on the first few messages of the USER only.
|
301 |
+
|
302 |
+
SENTIMENT DEFINITIONS AND GUIDELINES:
|
303 |
+
Positive: If a customer expresses interest in applying for a product, making a transaction, even if the tone is neutral.
|
304 |
+
Neutral: If a customer expresses curiosity or questions the process but has not shown interest or if a customer want to make a request unrelated to concern, then it is also Neutral (e.g. request for statement)
|
305 |
+
Negative: If a customer expresses desire to leave or switch to another bank, classify it as Negative, even if the tone is neutral.
|
306 |
+
|
307 |
+
Make sure to provide a reasoning. Provide also a distribution of confidence score (0.0 to 1.0) for the three labels and make sure they add up to 1.0. For each sentiment, give also the keywords that can be attributed to that sentiment.
|
308 |
+
|
309 |
+
{format_instructions}
|
310 |
+
|
311 |
+
Classify the following text: {text_to_classify}."""
|
312 |
+
)
|
313 |
+
|
314 |
+
sentiment_fewshot_prompt = FewShotPromptTemplate(
|
315 |
+
examples = sentiment_examples,
|
316 |
+
example_prompt = example_formatter,
|
317 |
+
suffix = sentiment_prompt.template,
|
318 |
+
input_variables = ['text_to_classify'],
|
319 |
+
partial_variables = {"format_instructions": sentiment_parser.get_format_instructions()},
|
320 |
+
example_separator = "\n\n"
|
321 |
+
)
|
322 |
+
|
323 |
+
sentiment_chain_fs = sentiment_fewshot_prompt | llm_text_insights
|
324 |
+
|
325 |
+
sentiment_chain_wrapped = RunnableLambda(lambda x: {"text_to_classify": x["text"]}) | sentiment_chain_fs | RunnableLambda(lambda x: sentiment_parser.parse(x.content).model_dump_json(indent=2)) # type: ignore
|
326 |
+
|
327 |
+
summary_prompt = PromptTemplate.from_template(
|
328 |
+
"""You are an expert contact center operations agent and analyst at a banking firm.
|
329 |
+
Your task is to extract significant insight and information from customer messages, and summarize the key ideas from a message in one sentence.
|
330 |
+
Take note that the messages come from customers interacting with a bank.
|
331 |
+
If you encounter a list of messages that looks like a customer-chatbot interaction signified by "User:" and "Chatbot:" in the message, synthesize what they talked about. Focus on the customer and just add what the chatbot did.
|
332 |
+
|
333 |
+
Summarize the following message in no more than 15 words: {text_to_summarize}"""
|
334 |
+
)
|
335 |
+
|
336 |
+
summary_chain = summary_prompt | llm_text_insights
|
337 |
+
|
338 |
+
summary_chain_wrapped = RunnableLambda(lambda x: {
|
339 |
+
"text_to_summarize": x["text"] # type: ignore
|
340 |
+
}) | summary_chain
|
341 |
+
|
342 |
+
kw_prompt = PromptTemplate.from_template(
|
343 |
+
"""You are an expert contact center operations agent and analyst at a banking firm.
|
344 |
+
Your task is to extract significant insight and information from customer messages. To do this, extract the top 8 most important and relevant keywords.
|
345 |
+
Focus on single words (1-gram). If a short phrase (2-3 words) is exceptionally important and captures a key concept better than individual words, you may include it. Provide the keywords as a comma-separated list.
|
346 |
+
Take note that the messages come from customers interacting with a bank.
|
347 |
+
If you encounter a list of messages that looks like a customer-chatbot interaction signified by "User:" and "Chatbot:" in the message, extract the keywords in such a way that captures the essence of, or synthesizes, their conversation.
|
348 |
+
|
349 |
+
Extract at most 4 most important and relevant keywords from the following message. Output as a comma-separated list.
|
350 |
+
Message: {text_to_extract}"""
|
351 |
+
)
|
352 |
+
|
353 |
+
kw_chain = kw_prompt | llm_text_insights
|
354 |
+
|
355 |
+
kw_chain_wrapped = RunnableLambda(lambda x: {
|
356 |
+
"text_to_extract": x["text"] # type: ignore
|
357 |
+
}) | kw_chain
|
358 |
+
|
359 |
+
|
360 |
+
dialogue_history_prompt = PromptTemplate(
|
361 |
+
template = """You are an expert dialogue analyst for banking contact center operations. Your task is to extract and structure dialogue history with high accuracy and consistency.
|
362 |
+
|
363 |
+
SPEAKER IDENTIFICATION GUIDELINES
|
364 |
+
- **Receiver**: The representative, customer service agent, or automated system. This can only take two possible answers 'Bank Agent' or 'Chatbot'
|
365 |
+
- **Customer**: The person calling or contacting the bank
|
366 |
+
- If the text contains "User" and "Chatbot" indicators, then use 'Chatbot' for the Receiver, otherwise, use 'Bank Agent'
|
367 |
+
|
368 |
+
TURN PROCESSING GUIDELINES
|
369 |
+
**Sequential Numbering**: Assign turn_id starting from 1, incrementing by 1 for each speaker change
|
370 |
+
**Speaker Consistency**: Maintain consistent speaker labels throughout the conversation
|
371 |
+
**Message Integrity**: Capture complete thoughts, even if they span multiple sentences
|
372 |
+
**Error Handling**: If unclear who is speaking, analyze context clues:
|
373 |
+
- Greetings typically come from agents
|
374 |
+
- Questions about personal info come from agents
|
375 |
+
- Requests for services come from customers
|
376 |
+
|
377 |
+
EXTRACTION CHECKS
|
378 |
+
Before finalizing dialogue_history, verify:
|
379 |
+
- No duplicate turn_ids
|
380 |
+
- No missing turns in sequence
|
381 |
+
- Speaker labels are consistent
|
382 |
+
|
383 |
+
Output the format as follows:
|
384 |
+
|
385 |
+
{format_instructions}
|
386 |
+
|
387 |
+
Extract the history of the following text: {sample_text}.
|
388 |
+
""",
|
389 |
+
input_variables=["sample_text"],
|
390 |
+
partial_variables={"format_instructions": dialogue_history_parser.get_format_instructions()})
|
391 |
+
|
392 |
+
dialogue_history_chain = dialogue_history_prompt | llm_text_insights
|
393 |
+
|
394 |
+
dialogue_history_chain_wrapped = RunnableLambda(lambda x: {
|
395 |
+
"sample_text": x["text"] # type: ignore
|
396 |
+
}) | dialogue_history_chain | RunnableLambda(lambda x: dialogue_history_parser.parse(x.content).model_dump_json(indent=2)) # type: ignore
|
397 |
+
|
398 |
+
|
399 |
+
def process_text_to_insight(text, sleep_time_req = 5):
|
400 |
+
try:
|
401 |
+
result = {}
|
402 |
+
|
403 |
+
result['case_transaction_type'] = ctt_chain_wrapped.invoke({'text': text}).content.strip() # type: ignore
|
404 |
+
time.sleep(sleep_time_req)
|
405 |
+
|
406 |
+
result['case_priority_level'] = cpl_chain_wrapped.invoke({'text': text})
|
407 |
+
time.sleep(sleep_time_req)
|
408 |
+
|
409 |
+
result['case_type'] = ct_chain_wrapped.invoke({'text': text}).content.strip() # type: ignore
|
410 |
+
time.sleep(sleep_time_req)
|
411 |
+
|
412 |
+
result['sentiment'] = sentiment_chain_wrapped.invoke({'text': text})
|
413 |
+
time.sleep(sleep_time_req)
|
414 |
+
|
415 |
+
result['summary'] = summary_chain_wrapped.invoke({'text': text}).content.strip() # type: ignore
|
416 |
+
time.sleep(sleep_time_req)
|
417 |
+
|
418 |
+
result['keywords'] = kw_chain_wrapped.invoke({'text': text}).content.strip() # type: ignore
|
419 |
+
|
420 |
+
result['dialogue_history'] = dialogue_history_chain_wrapped.invoke({'text': text})
|
421 |
+
|
422 |
+
except Exception as e:
|
423 |
+
tqdm.write(f"[error] Skipping row due to: {e}") # type: ignore
|
424 |
+
result = {
|
425 |
+
"case_text": text,
|
426 |
+
"case_transaction_type": None,
|
427 |
+
"case_priority_level": None,
|
428 |
+
"case_type": None,
|
429 |
+
"sentiment": None,
|
430 |
+
"summary": None,
|
431 |
+
"keywords": None,
|
432 |
+
"dialogue_history": None
|
433 |
+
}
|
434 |
+
return result
|