Spaces:
Sleeping
Sleeping
Upload 10 files
Browse files- .gitattributes +3 -35
- .gitignore +1 -0
- Dockerfile +28 -0
- README.md +406 -0
- app.py +643 -0
- chatbot_model.py +130 -0
- llm_utils.py +83 -0
- profile_preprocessing.py +127 -0
- requirements.txt +31 -0
- scraping_profile.py +42 -0
.gitattributes
CHANGED
@@ -1,35 +1,3 @@
|
|
1 |
-
|
2 |
-
|
3 |
-
*.
|
4 |
-
*.bz2 filter=lfs diff=lfs merge=lfs -text
|
5 |
-
*.ckpt filter=lfs diff=lfs merge=lfs -text
|
6 |
-
*.ftz filter=lfs diff=lfs merge=lfs -text
|
7 |
-
*.gz filter=lfs diff=lfs merge=lfs -text
|
8 |
-
*.h5 filter=lfs diff=lfs merge=lfs -text
|
9 |
-
*.joblib filter=lfs diff=lfs merge=lfs -text
|
10 |
-
*.lfs.* filter=lfs diff=lfs merge=lfs -text
|
11 |
-
*.mlmodel filter=lfs diff=lfs merge=lfs -text
|
12 |
-
*.model filter=lfs diff=lfs merge=lfs -text
|
13 |
-
*.msgpack filter=lfs diff=lfs merge=lfs -text
|
14 |
-
*.npy filter=lfs diff=lfs merge=lfs -text
|
15 |
-
*.npz filter=lfs diff=lfs merge=lfs -text
|
16 |
-
*.onnx filter=lfs diff=lfs merge=lfs -text
|
17 |
-
*.ot filter=lfs diff=lfs merge=lfs -text
|
18 |
-
*.parquet filter=lfs diff=lfs merge=lfs -text
|
19 |
-
*.pb filter=lfs diff=lfs merge=lfs -text
|
20 |
-
*.pickle filter=lfs diff=lfs merge=lfs -text
|
21 |
-
*.pkl filter=lfs diff=lfs merge=lfs -text
|
22 |
-
*.pt filter=lfs diff=lfs merge=lfs -text
|
23 |
-
*.pth filter=lfs diff=lfs merge=lfs -text
|
24 |
-
*.rar filter=lfs diff=lfs merge=lfs -text
|
25 |
-
*.safetensors filter=lfs diff=lfs merge=lfs -text
|
26 |
-
saved_model/**/* filter=lfs diff=lfs merge=lfs -text
|
27 |
-
*.tar.* filter=lfs diff=lfs merge=lfs -text
|
28 |
-
*.tar filter=lfs diff=lfs merge=lfs -text
|
29 |
-
*.tflite filter=lfs diff=lfs merge=lfs -text
|
30 |
-
*.tgz filter=lfs diff=lfs merge=lfs -text
|
31 |
-
*.wasm filter=lfs diff=lfs merge=lfs -text
|
32 |
-
*.xz filter=lfs diff=lfs merge=lfs -text
|
33 |
-
*.zip filter=lfs diff=lfs merge=lfs -text
|
34 |
-
*.zst filter=lfs diff=lfs merge=lfs -text
|
35 |
-
*tfevents* filter=lfs diff=lfs merge=lfs -text
|
|
|
1 |
+
# Auto detect text files and perform LF normalization
|
2 |
+
* text=auto
|
3 |
+
# *.png filter=lfs diff=lfs merge=lfs -text
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
.gitignore
ADDED
@@ -0,0 +1 @@
|
|
|
|
|
1 |
+
.env
|
Dockerfile
ADDED
@@ -0,0 +1,28 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
1 |
+
|
2 |
+
|
3 |
+
# Use an official Python runtime as a parent image
|
4 |
+
FROM python:3.11-slim
|
5 |
+
|
6 |
+
|
7 |
+
# Set the working directory in the container
|
8 |
+
WORKDIR /app
|
9 |
+
|
10 |
+
ENV HF_HOME=/data/hf_cache
|
11 |
+
ENV TRANSFORMERS_CACHE=/data/hf_cache/transformers
|
12 |
+
ENV HF_DATASETS_CACHE=/data/hf_cache/datasets
|
13 |
+
ENV HF_HUB_CACHE=/data/hf_cache/hub
|
14 |
+
|
15 |
+
RUN mkdir -p /data/hf_cache/transformers /data/hf_cache/datasets /data/hf_cache/hub && chmod -R 777 /data/hf_cache
|
16 |
+
|
17 |
+
# Copy requirements.txt and install dependencies
|
18 |
+
COPY requirements.txt .
|
19 |
+
RUN pip install --no-cache-dir -r requirements.txt
|
20 |
+
|
21 |
+
# Copy the rest of your app's code
|
22 |
+
COPY . .
|
23 |
+
|
24 |
+
# Expose the port Streamlit runs on
|
25 |
+
EXPOSE 8501
|
26 |
+
|
27 |
+
# Run Streamlit
|
28 |
+
CMD ["streamlit", "run", "app.py", "--server.port=8501", "--server.address=0.0.0.0"]
|
README.md
ADDED
@@ -0,0 +1,406 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
1 |
+
---
|
2 |
+
title: Linkedin Assistant
|
3 |
+
emoji: 🚀
|
4 |
+
colorFrom: red
|
5 |
+
colorTo: red
|
6 |
+
sdk: docker
|
7 |
+
app_port: 8501
|
8 |
+
tags:
|
9 |
+
- streamlit
|
10 |
+
pinned: false
|
11 |
+
short_description: Streamlit template space
|
12 |
+
license: mit
|
13 |
+
---
|
14 |
+
|
15 |
+
|
16 |
+
# 🤖 LinkedIn AI Career Assistant
|
17 |
+
|
18 |
+
[](https://sri-vallabh-linkedin-profile-ai-assistant-app-ffuh1c.streamlit.app/)
|
19 |
+
[](https://www.python.org/downloads/)
|
20 |
+
[](https://streamlit.io/)
|
21 |
+
[](https://langchain-ai.github.io/langgraph/)
|
22 |
+
[](https://groq.com/)
|
23 |
+
|
24 |
+
An intelligent AI-powered career assistant that analyzes LinkedIn profiles, provides job fit analysis, and offers personalized career guidance through an interactive chat interface powered by Groq's llama3-8b-8192 model.
|
25 |
+
|
26 |
+
## 🚀 **Live Demo**
|
27 |
+
|
28 |
+
Try the application live at: **https://sri-vallabh-linkedin-profile-ai-assistant-app-ffuh1c.streamlit.app/**
|
29 |
+
|
30 |
+
## 📋 **Table of Contents**
|
31 |
+
|
32 |
+
- [Overview](#overview)
|
33 |
+
- [Key Features](#key-features)
|
34 |
+
- [Architecture](#architecture)
|
35 |
+
- [Installation](#installation)
|
36 |
+
- [Usage](#usage)
|
37 |
+
- [Technical Implementation](#technical-implementation)
|
38 |
+
- [API Keys Setup](#api-keys-setup)
|
39 |
+
- [Session Management](#session-management)
|
40 |
+
- [Contributing](#contributing)
|
41 |
+
- [License](#license)
|
42 |
+
|
43 |
+
## 🎯 **Overview**
|
44 |
+
|
45 |
+
The LinkedIn AI Career Assistant is a sophisticated career optimization tool that combines Groq's powerful llama3-8b-8192 model with LangGraph's multi-agent framework to provide comprehensive LinkedIn profile analysis. Built using **Streamlit**, **LangGraph**, and **Groq API**, this application offers an interactive chat-based experience for professional career development.
|
46 |
+
|
47 |
+
### **What Makes This Special?**
|
48 |
+
|
49 |
+
- **🧠 Multi-Agent AI System**: Utilizes LangGraph to orchestrate specialized AI tools for different analysis tasks
|
50 |
+
- **💾 Thread-Based Sessions**: Maintains conversation context with intelligent thread management based on LinkedIn URLs
|
51 |
+
- **🎯 Job Fit Analysis**: Provides detailed match scores and improvement suggestions for target roles
|
52 |
+
- **📊 Profile Analysis**: Comprehensive strengths and weaknesses assessment
|
53 |
+
- **🔄 Real-time Scraping**: Fetches live LinkedIn profile data using Apify integration
|
54 |
+
- **⚡ Groq-Powered**: Lightning-fast responses using Groq's optimized llama3-8b-8192 model
|
55 |
+
|
56 |
+
## 🌟 **Key Features**
|
57 |
+
|
58 |
+
### 1. **Interactive Chat Interface**
|
59 |
+
- **LinkedIn URL Input**: Simply paste your LinkedIn profile URL to get started
|
60 |
+
- **Conversational AI**: Natural language interaction for profile optimization
|
61 |
+
- **Real-time Analysis**: Instant feedback and suggestions as you chat
|
62 |
+
- **Custom Styling**: Modern chat bubble interface with professional design
|
63 |
+
|
64 |
+
### 2. **Comprehensive Profile Analysis**
|
65 |
+
- **Strengths Identification**: Highlights technical skills, projects, education, and soft skills
|
66 |
+
- **Weakness Detection**: Identifies gaps in technical skills, experience, and missing context
|
67 |
+
- **Actionable Suggestions**: Provides specific recommendations for profile enhancement
|
68 |
+
- **Section-by-Section Access**: Detailed extraction of individual LinkedIn profile sections
|
69 |
+
|
70 |
+
### 3. **Advanced Job Fit Analysis**
|
71 |
+
- **Match Score Calculation**: Quantifies how well your profile fits target roles (0-100%)
|
72 |
+
- **Skill Gap Analysis**: Identifies missing skills required for your target position
|
73 |
+
- **Role-Specific Feedback**: Tailored suggestions for improving job compatibility
|
74 |
+
- **Visual Score Display**: Circular progress indicators for match percentages
|
75 |
+
|
76 |
+
### 4. **Intelligent Session Management**
|
77 |
+
- **URL-Based Threading**: Automatically finds existing conversations for the same LinkedIn profile
|
78 |
+
- **Session Continuity**: Choose to continue previous chats or start fresh
|
79 |
+
- **SQLite Persistence**: Robust conversation storage with automatic checkpointing
|
80 |
+
- **Thread Isolation**: Secure separation of different user sessions
|
81 |
+
|
82 |
+
### 5. **Professional Data Handling**
|
83 |
+
- **Pydantic Validation**: Robust data validation using structured schemas
|
84 |
+
- **State Management**: Comprehensive state tracking across conversation flows
|
85 |
+
- **Error Handling**: Graceful handling of API failures and data parsing issues
|
86 |
+
- **Memory Optimization**: Efficient storage and retrieval of conversation context
|
87 |
+
|
88 |
+
## 🏗️ **Architecture**
|
89 |
+
|
90 |
+
### **Multi-Agent System Design**
|
91 |
+
|
92 |
+
```
|
93 |
+
┌─────────────────────────────────────────────────────────────┐
|
94 |
+
│ User Interface (Streamlit) │
|
95 |
+
│ Custom Chat Interface │
|
96 |
+
└─────────────────────┬─���─────────────────────────────────────┘
|
97 |
+
│
|
98 |
+
┌─────────────────────┴───────────────────────────────────────┐
|
99 |
+
│ LangGraph Orchestrator │
|
100 |
+
│ (ChatbotState Schema) │
|
101 |
+
│ ┌─────────────────┬─────────────────┬─────────────────┐ │
|
102 |
+
│ │ Chatbot Node │ Profile Tool │ Job Match Tool │ │
|
103 |
+
│ │ (Router) │ (Analyzer) │ (Matcher) │ │
|
104 |
+
│ │ │ │ │ │
|
105 |
+
│ │ Extract Tool │ │ │ │
|
106 |
+
│ │ (Section Data) │ │ │ │
|
107 |
+
│ └─────────────────┴─────────────────┴─────────────────┘ │
|
108 |
+
└─────────────────────┬───────────────────────────────────────┘
|
109 |
+
│
|
110 |
+
┌─────────────────────┴───────────────────────────────────────┐
|
111 |
+
│ External Services │
|
112 |
+
│ ┌─────────────────┬─────────────────┬─────────────────┐ │
|
113 |
+
│ │ Apify LinkedIn │ Groq API │ SQLite │ │
|
114 |
+
│ │ Scraper │ (llama3-8b-8192)│ Checkpointer │ │
|
115 |
+
│ └─────────────────┴─────────────────┴─────────────────┘ │
|
116 |
+
└─────────────────────────────────────────────────────────────┘
|
117 |
+
```
|
118 |
+
|
119 |
+
### **Core Components**
|
120 |
+
|
121 |
+
1. **ChatBot Node**: Main conversation router with tool calling capabilities
|
122 |
+
2. **Profile Analyzer**: Comprehensive profile evaluation for strengths and weaknesses
|
123 |
+
3. **Job Matcher**: Role compatibility analysis with scoring and suggestions
|
124 |
+
4. **Extract Tool**: Granular access to specific profile sections
|
125 |
+
5. **State Management**: Pydantic-based ChatbotState with comprehensive field tracking
|
126 |
+
6. **Thread System**: URL-based session identification and management
|
127 |
+
|
128 |
+
## 🛠️ **Installation**
|
129 |
+
|
130 |
+
### **Prerequisites**
|
131 |
+
|
132 |
+
- Python 3.8 or higher
|
133 |
+
- pip package manager
|
134 |
+
- Groq API key
|
135 |
+
- Apify API token
|
136 |
+
|
137 |
+
### **Quick Start**
|
138 |
+
|
139 |
+
1. **Clone the Repository**
|
140 |
+
```bash
|
141 |
+
git clone https://github.com/Sri-Vallabh/Linkedin-Profile-AI-Assistant.git
|
142 |
+
cd Linkedin-Profile-AI-Assistant
|
143 |
+
```
|
144 |
+
|
145 |
+
2. **Install Dependencies**
|
146 |
+
```bash
|
147 |
+
pip install -r requirements.txt
|
148 |
+
```
|
149 |
+
|
150 |
+
3. **Set Up Environment Variables**
|
151 |
+
```bash
|
152 |
+
cp .env.example .env
|
153 |
+
# Edit .env with your API keys
|
154 |
+
```
|
155 |
+
|
156 |
+
4. **Run the Application**
|
157 |
+
```bash
|
158 |
+
streamlit run app.py
|
159 |
+
```
|
160 |
+
|
161 |
+
5. **Access the Application**
|
162 |
+
```
|
163 |
+
Open your browser and go to: http://localhost:8501
|
164 |
+
```
|
165 |
+
|
166 |
+
### **Requirements**
|
167 |
+
|
168 |
+
```txt
|
169 |
+
streamlit>=1.28.0
|
170 |
+
langchain>=0.0.350
|
171 |
+
langchain-openai>=0.0.8
|
172 |
+
langgraph>=0.0.55
|
173 |
+
openai>=1.3.0
|
174 |
+
pydantic>=2.0.0
|
175 |
+
python-dotenv>=1.0.0
|
176 |
+
apify-client>=1.0.0
|
177 |
+
dirtyjson>=1.0.8
|
178 |
+
```
|
179 |
+
|
180 |
+
## 📖 **Usage**
|
181 |
+
|
182 |
+
### **Getting Started**
|
183 |
+
|
184 |
+
1. **Launch the Application**
|
185 |
+
- Open the application in your browser
|
186 |
+
- You'll see the main interface with a LinkedIn URL input field
|
187 |
+
|
188 |
+
2. **Enter Your LinkedIn Profile**
|
189 |
+
- Paste your LinkedIn profile URL (e.g., `https://www.linkedin.com/in/your-profile/`)
|
190 |
+
- The system will automatically scrape and analyze your profile
|
191 |
+
|
192 |
+
3. **Choose Session Mode**
|
193 |
+
- If a previous session exists, choose to continue or start fresh
|
194 |
+
- New sessions initialize with full profile preprocessing
|
195 |
+
|
196 |
+
4. **Start Chatting**
|
197 |
+
- Begin conversations with queries like:
|
198 |
+
- "Analyze my profile strengths and weaknesses"
|
199 |
+
- "I want to apply for a Data Scientist role"
|
200 |
+
- "Show me my about section"
|
201 |
+
- "What skills am I missing for a Software Engineer position?"
|
202 |
+
|
203 |
+
### **Available Commands**
|
204 |
+
|
205 |
+
- **Profile Analysis**: "Analyze my profile" - Full strengths/weaknesses analysis
|
206 |
+
- **Job Matching**: "I want to apply for [role]" - Match score and skill gaps
|
207 |
+
- **Section Access**: "Show me my [section]" - Extract specific profile sections
|
208 |
+
- **General Queries**: Ask any career-related questions for guidance
|
209 |
+
|
210 |
+
### **Sample Conversations**
|
211 |
+
|
212 |
+
```
|
213 |
+
User: "Analyze my LinkedIn profile"
|
214 |
+
AI: ✅ Profile analysis complete!
|
215 |
+
|
216 |
+
💪 Strengths
|
217 |
+
- Technical: Python, Machine Learning, Data Analysis
|
218 |
+
- Projects: E-commerce recommendation system, Stock prediction model
|
219 |
+
- Education: Computer Science degree, Data Science certification
|
220 |
+
- Soft Skills: Problem-solving, Team collaboration
|
221 |
+
|
222 |
+
⚠️ Weaknesses
|
223 |
+
- Technical Gaps: Cloud computing platforms, MLOps tools
|
224 |
+
- Project/Experience Gaps: Limited production deployment experience
|
225 |
+
- Missing Context: Quantified project impacts and metrics
|
226 |
+
|
227 |
+
🛠 Suggestions to improve
|
228 |
+
- Add AWS/Azure cloud certifications
|
229 |
+
- Include specific metrics for project outcomes
|
230 |
+
- Highlight leadership or mentoring experiences
|
231 |
+
```
|
232 |
+
|
233 |
+
```
|
234 |
+
User: "I want to apply for a Senior Data Scientist role"
|
235 |
+
AI: 📊 Job Fit Analysis
|
236 |
+
|
237 |
+
🎯 Target Role: Senior Data Scientist
|
238 |
+
Match Score: 78%
|
239 |
+
|
240 |
+
Missing Skills:
|
241 |
+
• Deep Learning frameworks (TensorFlow, PyTorch)
|
242 |
+
• MLOps and model deployment
|
243 |
+
• Leadership and team management experience
|
244 |
+
|
245 |
+
Suggestions:
|
246 |
+
• Complete online courses in deep learning
|
247 |
+
• Build projects showcasing end-to-end ML pipelines
|
248 |
+
• Seek opportunities to lead junior team members
|
249 |
+
```
|
250 |
+
|
251 |
+
## 🔧 **Technical Implementation**
|
252 |
+
|
253 |
+
### **State Management**
|
254 |
+
|
255 |
+
The application uses a sophisticated Pydantic-based state management system:
|
256 |
+
|
257 |
+
```python
|
258 |
+
class ChatbotState(BaseModel):
|
259 |
+
profile: Dict[str, Any] # Processed LinkedIn profile data
|
260 |
+
profile_url: Optional[str] # Original LinkedIn URL
|
261 |
+
sections: Dict[str, str] # Individual profile sections
|
262 |
+
enhanced_content: Dict[str, str] # Future AI-generated improvements
|
263 |
+
profile_analysis: Optional[Dict[str, Any]] # Strengths/weaknesses
|
264 |
+
job_fit: Optional[Dict[str, Any]] # Job matching results
|
265 |
+
target_role: Optional[str] # User's target job role
|
266 |
+
messages: Annotated[List[BaseMessage], add_messages] # Chat history
|
267 |
+
next_tool_name: Optional[str] # Tool routing information
|
268 |
+
```
|
269 |
+
|
270 |
+
### **Tool Integration**
|
271 |
+
|
272 |
+
The system includes three specialized tools:
|
273 |
+
|
274 |
+
1. **Profile Analyzer Tool**:
|
275 |
+
- Comprehensive profile evaluation
|
276 |
+
- Structured output with strengths, weaknesses, suggestions
|
277 |
+
- Uses ProfileAnalysisModel for validation
|
278 |
+
|
279 |
+
2. **Job Matcher Tool**:
|
280 |
+
- Role-specific compatibility analysis
|
281 |
+
- Calculates match scores (0-100%)
|
282 |
+
- Identifies missing skills and provides suggestions
|
283 |
+
|
284 |
+
3. **Extract Tool**:
|
285 |
+
- Granular access to profile sections
|
286 |
+
- Supports nested data extraction with dot notation
|
287 |
+
- Returns structured results for specific queries
|
288 |
+
|
289 |
+
### **Session Architecture**
|
290 |
+
|
291 |
+
- **Thread Management**: URL-based thread identification for session continuity
|
292 |
+
- **Checkpointing**: SQLite-based persistent storage with automatic fallback
|
293 |
+
- **State Validation**: Comprehensive Pydantic validation for data integrity
|
294 |
+
- **Memory Optimization**: Efficient message history management
|
295 |
+
|
296 |
+
### **LLM Integration**
|
297 |
+
|
298 |
+
- **Model**: Groq's llama3-8b-8192 for fast, high-quality responses
|
299 |
+
- **API**: OpenAI-compatible interface through Groq
|
300 |
+
- **Tool Calling**: Native support for structured tool invocation
|
301 |
+
- **Error Handling**: Robust retry mechanisms and graceful degradation
|
302 |
+
|
303 |
+
## 🔑 **API Keys Setup**
|
304 |
+
|
305 |
+
Create a `.env` file in the root directory:
|
306 |
+
|
307 |
+
```env
|
308 |
+
# Groq API Key (required)
|
309 |
+
GROQ_API_KEY=your_groq_api_key_here
|
310 |
+
|
311 |
+
# Apify API Token (required for LinkedIn scraping)
|
312 |
+
APIFY_API_TOKEN=your_apify_token_here
|
313 |
+
```
|
314 |
+
|
315 |
+
### **Getting API Keys**
|
316 |
+
|
317 |
+
1. **Groq API Key**:
|
318 |
+
- Visit [Groq Console](https://console.groq.com/)
|
319 |
+
- Create an account and generate an API key
|
320 |
+
- Used for llama3-8b-8192 model inference
|
321 |
+
|
322 |
+
2. **Apify API Token**:
|
323 |
+
- Go to [Apify Console](https://console.apify.com/)
|
324 |
+
- Sign up and get your API token
|
325 |
+
- Used for LinkedIn profile scraping
|
326 |
+
|
327 |
+
## 💾 **Session Management**
|
328 |
+
|
329 |
+
The application implements intelligent session management:
|
330 |
+
|
331 |
+
### **Thread-Based System**
|
332 |
+
- Each LinkedIn profile URL gets a unique thread ID
|
333 |
+
- Automatic detection of existing conversations for the same profile
|
334 |
+
- Secure isolation between different user sessions
|
335 |
+
|
336 |
+
### **Conversation Persistence**
|
337 |
+
- SQLite-based storage for production environments
|
338 |
+
- Memory-based fallback for development/testing
|
339 |
+
- Automatic checkpointing after each interaction
|
340 |
+
- Recovery capability in case of interruptions
|
341 |
+
|
342 |
+
### **User Experience**
|
343 |
+
- Choice to continue previous conversations or start fresh
|
344 |
+
- Seamless transition between sessions
|
345 |
+
- Maintained conversation context across browser refreshes
|
346 |
+
|
347 |
+
## 🤝 **Contributing**
|
348 |
+
|
349 |
+
We welcome contributions to improve the LinkedIn AI Career Assistant! Here's how you can help:
|
350 |
+
|
351 |
+
### **Development Setup**
|
352 |
+
|
353 |
+
1. Fork the repository
|
354 |
+
2. Create a feature branch: `git checkout -b feature/your-feature-name`
|
355 |
+
3. Make your changes and test thoroughly
|
356 |
+
4. Submit a pull request with a clear description
|
357 |
+
|
358 |
+
### **Areas for Contribution**
|
359 |
+
|
360 |
+
- **Tool Enhancement**: Implement the commented-out content_generator tool
|
361 |
+
- **UI/UX Improvements**: Enhance the Streamlit interface design
|
362 |
+
- **Performance Optimization**: Improve response times and resource usage
|
363 |
+
- **Testing**: Add comprehensive test coverage
|
364 |
+
- **Documentation**: Expand examples and API documentation
|
365 |
+
|
366 |
+
### **Code Style**
|
367 |
+
|
368 |
+
- Follow PEP 8 guidelines for Python code
|
369 |
+
- Use meaningful variable and function names
|
370 |
+
- Add docstrings for all functions and classes
|
371 |
+
- Include type hints where appropriate
|
372 |
+
- Validate data models with Pydantic
|
373 |
+
|
374 |
+
## 📝 **License**
|
375 |
+
|
376 |
+
This project is licensed under the MIT License - see the [LICENSE](LICENSE) file for details.
|
377 |
+
|
378 |
+
## 🙏 **Acknowledgments**
|
379 |
+
|
380 |
+
- **Groq** for providing fast and efficient LLM inference
|
381 |
+
- **LangChain/LangGraph** for the multi-agent framework
|
382 |
+
- **Streamlit** for the web application framework
|
383 |
+
- **Apify** for LinkedIn scraping capabilities
|
384 |
+
- **Hugging Face** for hosting the live demo
|
385 |
+
|
386 |
+
## 📞 **Support**
|
387 |
+
|
388 |
+
For questions, issues, or suggestions:
|
389 |
+
|
390 |
+
- **Create an Issue**: [GitHub Issues](https://github.com/Sri-Vallabh/Linkedin-Profile-AI-Assistant/issues)
|
391 |
+
- **Discussions**: [GitHub Discussions](https://github.com/Sri-Vallabh/Linkedin-Profile-AI-Assistant/discussions)
|
392 |
+
- **Email**: [email protected]
|
393 |
+
|
394 |
+
## 🔄 **Recent Updates**
|
395 |
+
|
396 |
+
- **v2.0**: Migrated to Groq API for faster inference
|
397 |
+
- **Thread Management**: Implemented URL-based session tracking
|
398 |
+
- **Enhanced UI**: Custom chat interface with professional styling
|
399 |
+
- **Robust State**: Pydantic-based data validation and error handling
|
400 |
+
- **Tool Optimization**: Streamlined to three core analysis tools
|
401 |
+
|
402 |
+
---
|
403 |
+
|
404 |
+
**Built with ❤️ by Sri Vallabh**
|
405 |
+
|
406 |
+
*Empowering professionals to optimize their LinkedIn presence and advance their careers through AI-powered insights.*
|
app.py
ADDED
@@ -0,0 +1,643 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
1 |
+
import os
|
2 |
+
import json
|
3 |
+
import re
|
4 |
+
import time
|
5 |
+
from typing import Dict, Any, List, Optional, Annotated
|
6 |
+
from chatbot_model import (
|
7 |
+
UserMemory,
|
8 |
+
ChatbotState,
|
9 |
+
ProfileAnalysisModel,
|
10 |
+
JobFitModel,
|
11 |
+
ContentGenerationModel,
|
12 |
+
|
13 |
+
)
|
14 |
+
from llm_utils import call_llm_and_parse
|
15 |
+
from profile_preprocessing import (
|
16 |
+
preprocess_profile,
|
17 |
+
initialize_state,
|
18 |
+
normalize_url
|
19 |
+
)
|
20 |
+
from openai import OpenAI
|
21 |
+
import streamlit as st
|
22 |
+
import hashlib
|
23 |
+
from dotenv import load_dotenv
|
24 |
+
from pydantic import BaseModel, Field,ValidationError
|
25 |
+
# import pdb; pdb.set_trace()
|
26 |
+
from scraping_profile import scrape_linkedin_profile
|
27 |
+
from langchain_openai import ChatOpenAI
|
28 |
+
from langchain_core.messages import SystemMessage, HumanMessage, AIMessage,BaseMessage,ToolMessage
|
29 |
+
from langchain_core.tools import tool
|
30 |
+
from langgraph.graph import StateGraph, END,START
|
31 |
+
from langgraph.checkpoint.memory import MemorySaver
|
32 |
+
from langgraph.graph import add_messages # if your framework exposes this
|
33 |
+
from langgraph.prebuilt import ToolNode,tools_condition,InjectedState
|
34 |
+
import dirtyjson
|
35 |
+
import sqlite3
|
36 |
+
try:
|
37 |
+
from langgraph.checkpoint.sqlite import SqliteSaver
|
38 |
+
SQLITE_AVAILABLE = True
|
39 |
+
except ImportError:
|
40 |
+
SQLITE_AVAILABLE = False
|
41 |
+
|
42 |
+
|
43 |
+
|
44 |
+
# ========== 1. ENVIRONMENT & LLM SETUP ==========
|
45 |
+
load_dotenv()
|
46 |
+
groq_key = os.getenv("GROQ_API_KEY")
|
47 |
+
assert groq_key, "GROQ_API_KEY not found in environment!"
|
48 |
+
groq_client=OpenAI(
|
49 |
+
api_key=os.getenv("GROQ_API_KEY"),
|
50 |
+
base_url="https://api.groq.com/openai/v1"
|
51 |
+
)
|
52 |
+
|
53 |
+
def normalize_url(url):
|
54 |
+
return url.strip().rstrip('/')
|
55 |
+
|
56 |
+
def validate_state(state: dict) -> None:
|
57 |
+
"""
|
58 |
+
Validate given state dict against ChatbotState schema.
|
59 |
+
Displays result in Streamlit instead of printing.
|
60 |
+
"""
|
61 |
+
# st.write("=== Validating chatbot state ===")
|
62 |
+
try:
|
63 |
+
ChatbotState.model_validate(state)
|
64 |
+
# st.success("✅ State is valid!")
|
65 |
+
except ValidationError as e:
|
66 |
+
st.error("❌ Validation failed!")
|
67 |
+
errors_list = []
|
68 |
+
for error in e.errors():
|
69 |
+
loc = " → ".join(str(item) for item in error['loc'])
|
70 |
+
msg = error['msg']
|
71 |
+
errors_list.append(f"- At: {loc}\n Error: {msg}")
|
72 |
+
st.write("\n".join(errors_list))
|
73 |
+
# Optionally show raw validation error too:
|
74 |
+
st.expander("See raw validation error").write(str(e))
|
75 |
+
st.stop()
|
76 |
+
|
77 |
+
|
78 |
+
user_memory = UserMemory()
|
79 |
+
|
80 |
+
# ========== 7. AGENT FUNCTIONS ==========
|
81 |
+
|
82 |
+
def profile_analysis_prompt(profile: Dict[str, str]) -> str:
|
83 |
+
return f"""
|
84 |
+
You are a top-tier LinkedIn career coach and AI analyst.
|
85 |
+
|
86 |
+
Analyze the following candidate profile carefully.
|
87 |
+
|
88 |
+
Candidate profile data:
|
89 |
+
FullName: {profile.get("FullName", "")}
|
90 |
+
Headline: {profile.get("Headline", "")}
|
91 |
+
JobTitle: {profile.get("JobTitle", "")}
|
92 |
+
CompanyName: {profile.get("CompanyName", "")}
|
93 |
+
CompanyIndustry: {profile.get("CompanyIndustry", "")}
|
94 |
+
CurrentJobDuration: {profile.get("CurrentJobDuration", "")}
|
95 |
+
About: {profile.get("About", "")}
|
96 |
+
Experiences: {profile.get("Experiences", "")}
|
97 |
+
Skills: {profile.get("Skills", "")}
|
98 |
+
Educations: {profile.get("Educations", "")}
|
99 |
+
Certifications: {profile.get("Certifications", "")}
|
100 |
+
HonorsAndAwards: {profile.get("HonorsAndAwards", "")}
|
101 |
+
Verifications: {profile.get("Verifications", "")}
|
102 |
+
Highlights: {profile.get("Highlights", "")}
|
103 |
+
Projects: {profile.get("Projects", "")}
|
104 |
+
Publications: {profile.get("Publications", "")}
|
105 |
+
Patents: {profile.get("Patents", "")}
|
106 |
+
Courses: {profile.get("Courses", "")}
|
107 |
+
TestScores: {profile.get("TestScores", "")}
|
108 |
+
|
109 |
+
|
110 |
+
Identify and summarize:
|
111 |
+
1. strengths:
|
112 |
+
- technical strengths (skills, tools, frameworks)
|
113 |
+
- project strengths (impactful projects, innovation)
|
114 |
+
- educational strengths (degrees, certifications, awards)
|
115 |
+
- soft skills and personality traits (teamwork, leadership)
|
116 |
+
2. weaknesses:
|
117 |
+
- missing or weak technical skills
|
118 |
+
- gaps in projects, experience, or education
|
119 |
+
- unclear profile sections or missing context
|
120 |
+
3. actionable suggestions:
|
121 |
+
- concrete ways to improve profile headline, about section, or add projects
|
122 |
+
- suggestions to learn or highlight new skills
|
123 |
+
- ideas to make the profile more attractive for recruiters
|
124 |
+
|
125 |
+
Important instructions:
|
126 |
+
- Respond ONLY with valid JSON.
|
127 |
+
- Do NOT include text before or after JSON.
|
128 |
+
- Be concise but detailed.
|
129 |
+
|
130 |
+
|
131 |
+
|
132 |
+
Example JSON format:
|
133 |
+
{{
|
134 |
+
"strengths": {{
|
135 |
+
"technical": ["...", "..."],
|
136 |
+
"projects": ["...", "..."],
|
137 |
+
"education": ["...", "..."],
|
138 |
+
"soft_skills": ["...", "..."]
|
139 |
+
}},
|
140 |
+
"weaknesses": {{
|
141 |
+
"technical_gaps": ["...", "..."],
|
142 |
+
"project_or_experience_gaps": ["...", "..."],
|
143 |
+
"missing_context": ["...", "..."]
|
144 |
+
}},
|
145 |
+
"suggestions": [
|
146 |
+
"...",
|
147 |
+
"...",
|
148 |
+
"..."
|
149 |
+
]
|
150 |
+
}}
|
151 |
+
""".strip()
|
152 |
+
|
153 |
+
|
154 |
+
|
155 |
+
|
156 |
+
def job_fit_prompt(sections: Dict[str, str], target_role: str) -> str:
|
157 |
+
return f"""
|
158 |
+
You are an expert career coach and recruiter.
|
159 |
+
|
160 |
+
Compare the following candidate profile against the typical requirements for the role of "{target_role}".
|
161 |
+
|
162 |
+
Candidate Profile:
|
163 |
+
- Headline: {sections.get('headline', '')}
|
164 |
+
- About: {sections.get('about', '')}
|
165 |
+
- Job Title: {sections.get('job_title', '')}
|
166 |
+
- Company: {sections.get('company_name', '')}
|
167 |
+
- Industry: {sections.get('company_industry', '')}
|
168 |
+
- Current Job Duration: {sections.get('current_job_duration', '')}
|
169 |
+
- Skills: {sections.get('skills', '')}
|
170 |
+
- Projects: {sections.get('projects', '')}
|
171 |
+
- Educations: {sections.get('educations', '')}
|
172 |
+
- Certifications: {sections.get('certifications', '')}
|
173 |
+
- Honors & Awards: {sections.get('honors_and_awards', '')}
|
174 |
+
- Experiences: {sections.get('experiences', '')}
|
175 |
+
|
176 |
+
**Instructions:**
|
177 |
+
- Respond ONLY with valid JSON.
|
178 |
+
- Your JSON must exactly match the following schema:
|
179 |
+
{{
|
180 |
+
"match_score": 85,
|
181 |
+
"missing_skills": ["Skill1", "Skill2"],
|
182 |
+
"suggestions": ["...", "...", "..."]
|
183 |
+
}}
|
184 |
+
- "match_score": integer from 0–100 estimating how well the profile fits the target role.
|
185 |
+
- "missing_skills": key missing or weakly mentioned skills.
|
186 |
+
- "suggestions": 3 actionable recommendations to improve fit (e.g., learn tools, rewrite headline).
|
187 |
+
|
188 |
+
Do NOT include explanations, text outside JSON, or markdown.
|
189 |
+
Start with '{{' and end with '}}'.
|
190 |
+
The JSON must be directly parseable.
|
191 |
+
""".strip()
|
192 |
+
|
193 |
+
|
194 |
+
# --- Tool: Profile Analyzer ---
|
195 |
+
@tool
|
196 |
+
def profile_analyzer(state: Annotated[ChatbotState, InjectedState]) -> dict:
|
197 |
+
"""
|
198 |
+
Tool: Analyze the overall full user's profile to give strengths, weaknesses, suggestions.
|
199 |
+
This is needed only if full analysis of profile is needed.
|
200 |
+
Returns the full analysis in the form of a json.
|
201 |
+
|
202 |
+
- It takes no arguments
|
203 |
+
"""
|
204 |
+
|
205 |
+
|
206 |
+
# Get summarized profile (dictionary of strings)
|
207 |
+
profile = getattr(state, "profile", {}) or {}
|
208 |
+
|
209 |
+
# Build prompt
|
210 |
+
prompt = profile_analysis_prompt(profile)
|
211 |
+
|
212 |
+
# Call the LLM & parse structured result
|
213 |
+
analysis_model = call_llm_and_parse(groq_client,prompt, ProfileAnalysisModel)
|
214 |
+
analysis_dict = analysis_model.model_dump()
|
215 |
+
|
216 |
+
# Save to state and user memory
|
217 |
+
state.profile_analysis = analysis_dict
|
218 |
+
user_memory.save("profile_analysis", analysis_dict)
|
219 |
+
|
220 |
+
print("💾 [DEBUG] Saved analysis to user memory.")
|
221 |
+
print("📦 [DEBUG] Updated state.profile_analysis with analysis.")
|
222 |
+
|
223 |
+
return analysis_dict
|
224 |
+
|
225 |
+
# --- Tool: Job Matcher ---
|
226 |
+
|
227 |
+
|
228 |
+
@tool
|
229 |
+
def job_matcher(
|
230 |
+
state: Annotated[ChatbotState, InjectedState],
|
231 |
+
target_role: str = None
|
232 |
+
) -> dict:
|
233 |
+
"""
|
234 |
+
Tool: Analyze how well the user's profile fits the target role.
|
235 |
+
- If user is asking if he is a good fit for a certain role, or needs to see if his profile is compatible with a certain role, call this.
|
236 |
+
- Takes target_role as an argument.
|
237 |
+
- this tool is needed when match score, missing skills, suggestions are needed based on a job name given.
|
238 |
+
"""
|
239 |
+
print(f"target role is {target_role}")
|
240 |
+
# Update state.target_role if provided
|
241 |
+
|
242 |
+
sections = getattr(state, "sections", {})
|
243 |
+
|
244 |
+
# Build prompt
|
245 |
+
prompt = job_fit_prompt(sections, target_role)
|
246 |
+
|
247 |
+
# Call LLM and parse
|
248 |
+
try:
|
249 |
+
job_fit_model = call_llm_and_parse(groq_client,prompt, JobFitModel)
|
250 |
+
job_fit_dict = job_fit_model.model_dump()
|
251 |
+
job_fit_dict["target_role"] = target_role
|
252 |
+
except Exception as e:
|
253 |
+
job_fit_dict = {
|
254 |
+
"target_role":target_role,
|
255 |
+
"match_score": 0,
|
256 |
+
"missing_skills": [],
|
257 |
+
"suggestions": ["Parsing failed or incomplete response."]
|
258 |
+
}
|
259 |
+
|
260 |
+
# Save to state and user memory
|
261 |
+
state.job_fit = job_fit_dict
|
262 |
+
user_memory.save("job_fit", job_fit_dict)
|
263 |
+
|
264 |
+
return job_fit_dict
|
265 |
+
|
266 |
+
|
267 |
+
|
268 |
+
|
269 |
+
|
270 |
+
|
271 |
+
@tool
|
272 |
+
def extract_from_state_tool(
|
273 |
+
state: Annotated[ChatbotState, InjectedState],
|
274 |
+
key: str
|
275 |
+
) -> dict:
|
276 |
+
"""
|
277 |
+
This tool is used if user wants to ask about any particular part of this profile. Use this if a singe section is targeted. It expects key as an arguement, that represents what
|
278 |
+
the user is wanting to look at, from his profile.
|
279 |
+
Argument:
|
280 |
+
key: only pass one from the below list, identify one thing the user wants to look into and choose that:
|
281 |
+
"sections.about", "sections.headline", "sections.skills", "sections.projects",
|
282 |
+
"sections.educations", "sections.certifications", "sections.honors_and_awards",
|
283 |
+
"sections.experiences", "sections.publications", "sections.patents",
|
284 |
+
"sections.courses", "sections.test_scores", "sections.verifications",
|
285 |
+
"sections.highlights", "sections.job_title", "sections.company_name",
|
286 |
+
"sections.company_industry", "sections.current_job_duration", "sections.full_name",
|
287 |
+
"enhanced_content,"profile_analysis", "job_fit", "target_role", "editing_section"
|
288 |
+
"""
|
289 |
+
value = state
|
290 |
+
try:
|
291 |
+
for part in key.split('.'):
|
292 |
+
# Support both dict and Pydantic model
|
293 |
+
if isinstance(value, dict):
|
294 |
+
value = value.get(part)
|
295 |
+
elif hasattr(value, part):
|
296 |
+
value = getattr(value, part)
|
297 |
+
else:
|
298 |
+
value = None
|
299 |
+
if value is None:
|
300 |
+
break
|
301 |
+
except Exception:
|
302 |
+
value = None
|
303 |
+
return {"result": value}
|
304 |
+
|
305 |
+
|
306 |
+
tools = [
|
307 |
+
profile_analyzer,
|
308 |
+
job_matcher,
|
309 |
+
extract_from_state_tool
|
310 |
+
]
|
311 |
+
llm = ChatOpenAI(
|
312 |
+
api_key=groq_key,
|
313 |
+
base_url="https://api.groq.com/openai/v1",
|
314 |
+
model="llama3-8b-8192",
|
315 |
+
temperature=0
|
316 |
+
)
|
317 |
+
llm_with_tools = llm.bind_tools(tools)
|
318 |
+
|
319 |
+
|
320 |
+
|
321 |
+
# ========== 8. LANGGRAPH PIPELINE ==========
|
322 |
+
|
323 |
+
|
324 |
+
def chatbot_node(state: ChatbotState) -> ChatbotState:
|
325 |
+
validate_state(state)
|
326 |
+
|
327 |
+
messages = state.get("messages", [])
|
328 |
+
|
329 |
+
system_prompt = """
|
330 |
+
You are a helpful AI assistant specialized in LinkedIn profile coaching.
|
331 |
+
|
332 |
+
You can:
|
333 |
+
- Answer user questions.
|
334 |
+
- If user is greeting , greet him back also telling how you can help him.
|
335 |
+
- You should proactively use specialized tools whenever possible to give richer, data-driven answers.
|
336 |
+
IMPORTANT RULES:
|
337 |
+
- You must call at most one tool at a time.
|
338 |
+
- Never call multiple tools together in the same step.
|
339 |
+
- If user asks to show any section, use extract_from_state_tool, and after that, show the exact result from it.
|
340 |
+
- If information about that section is already known, dont call extract_from_state_tool, directly answer the user query.
|
341 |
+
- call profile_analyzer function only when full profile analysis is needed, otherwise rely on extract_from_state_tool.
|
342 |
+
- If user asks to enhance any section, check if it is there in history, otherwise call extract_from_state_tool first.
|
343 |
+
- Prefer to call a tool when answering instead of directly replying, especially if it can add new, useful insights or up-to-date data.
|
344 |
+
- If a tool has been recently used and new info isn’t needed, you may answer directly.
|
345 |
+
- Use tools to verify assumptions, enrich answers, or when the user asks about strengths, weaknesses, job fit, or wants improvements.
|
346 |
+
|
347 |
+
Always respond helpfully, clearly, and with actionable advice to guide the user in improving their LinkedIn profile.
|
348 |
+
"""
|
349 |
+
|
350 |
+
# Build messages & invoke LLM
|
351 |
+
messages = [SystemMessage(content=system_prompt)] + messages[-2:]
|
352 |
+
# messages = [SystemMessage(content=system_prompt)]
|
353 |
+
response = llm_with_tools.invoke(messages)
|
354 |
+
if hasattr(response, "tool_calls") and response.tool_calls:
|
355 |
+
first_tool = response.tool_calls[0]
|
356 |
+
tool_name = first_tool.get("name") if isinstance(first_tool, dict) else getattr(first_tool, "name", None)
|
357 |
+
tool_args = first_tool.get("args") if isinstance(first_tool, dict) else getattr(first_tool, "args", {})
|
358 |
+
print(f"[DEBBBBUUUUGGG] using tool {tool_name}")
|
359 |
+
|
360 |
+
# DEBUG
|
361 |
+
print("[DEBUG] LLM response:", response)
|
362 |
+
state.setdefault("messages", []).append(response)
|
363 |
+
|
364 |
+
return state
|
365 |
+
|
366 |
+
|
367 |
+
|
368 |
+
|
369 |
+
|
370 |
+
# --- Graph definition ---
|
371 |
+
graph = StateGraph(state_schema=ChatbotState)
|
372 |
+
graph.add_node("chatbot", chatbot_node)
|
373 |
+
graph.add_node("tools", ToolNode(tools))
|
374 |
+
graph.add_edge(START, "chatbot")
|
375 |
+
graph.add_conditional_edges("chatbot", tools_condition)
|
376 |
+
graph.add_edge("tools","chatbot")
|
377 |
+
graph.set_entry_point("chatbot")
|
378 |
+
|
379 |
+
# --- Streamlit UI ---
|
380 |
+
st.set_page_config(page_title="💼 LinkedIn AI Career Assistant", page_icon="🤖", layout="wide")
|
381 |
+
st.title("🧑💼 LinkedIn AI Career Assistant")
|
382 |
+
|
383 |
+
# --- Checkpointer and graph initialization ---
|
384 |
+
if "checkpointer" not in st.session_state:
|
385 |
+
if SQLITE_AVAILABLE:
|
386 |
+
conn = sqlite3.connect("checkpoints1.db", check_same_thread=False)
|
387 |
+
st.session_state["checkpointer"] = SqliteSaver(conn)
|
388 |
+
else:
|
389 |
+
st.session_state["checkpointer"] = MemorySaver()
|
390 |
+
checkpointer = st.session_state["checkpointer"]
|
391 |
+
|
392 |
+
if "app_graph" not in st.session_state:
|
393 |
+
st.session_state["app_graph"] = graph.compile(checkpointer=checkpointer)
|
394 |
+
app_graph = st.session_state["app_graph"]
|
395 |
+
# Find or create thread
|
396 |
+
def find_thread_id_for_url(checkpointer, url, max_threads=100):
|
397 |
+
search_url = normalize_url(url)
|
398 |
+
for tid in range(max_threads):
|
399 |
+
config = {"configurable": {"thread_id": str(tid), "checkpoint_ns": ""}}
|
400 |
+
state = checkpointer.get(config)
|
401 |
+
if state and "channel_values" in state:
|
402 |
+
user_state = state["channel_values"]
|
403 |
+
stored_url = normalize_url(user_state.get("profile_url", ""))
|
404 |
+
if stored_url == search_url:
|
405 |
+
return str(tid), user_state
|
406 |
+
return None, None
|
407 |
+
|
408 |
+
def delete_thread_checkpoint(checkpointer, thread_id):
|
409 |
+
# For SqliteSaver, use the delete_thread method if available
|
410 |
+
if hasattr(checkpointer, "delete_thread"):
|
411 |
+
checkpointer.delete_thread(thread_id)
|
412 |
+
else:
|
413 |
+
# For in-memory or custom checkpointers, implement as needed
|
414 |
+
pass
|
415 |
+
|
416 |
+
|
417 |
+
def get_next_thread_id(checkpointer, max_threads=100):
|
418 |
+
used = set()
|
419 |
+
for tid in range(max_threads):
|
420 |
+
config = {"configurable": {"thread_id": str(tid), "checkpoint_ns": ""}}
|
421 |
+
if checkpointer.get(config):
|
422 |
+
used.add(tid)
|
423 |
+
for tid in range(max_threads):
|
424 |
+
if tid not in used:
|
425 |
+
return str(tid)
|
426 |
+
raise RuntimeError("No available thread_id")
|
427 |
+
|
428 |
+
# --- Session selection and state initialization ---
|
429 |
+
|
430 |
+
if "chat_mode" not in st.session_state:
|
431 |
+
profile_url = st.text_input("Profile URL (e.g., https://www.linkedin.com/in/username/)")
|
432 |
+
if not profile_url:
|
433 |
+
st.info("Please enter a valid LinkedIn profile URL above to start.")
|
434 |
+
st.stop()
|
435 |
+
|
436 |
+
valid_pattern = r"^https://www\.linkedin\.com/in/[^/]+/?$"
|
437 |
+
if not re.match(valid_pattern, profile_url.strip()):
|
438 |
+
st.error("❌ Invalid LinkedIn profile URL. Make sure it matches the format.")
|
439 |
+
st.stop()
|
440 |
+
url = profile_url.strip()
|
441 |
+
|
442 |
+
existing_thread_id, previous_state = find_thread_id_for_url(checkpointer, url)
|
443 |
+
# Defensive: ensure required fields
|
444 |
+
required_fields = ["profile", "sections"]
|
445 |
+
if previous_state and not all(f in previous_state and previous_state[f] for f in required_fields):
|
446 |
+
st.warning("Previous session is missing required data. Please start a new chat.")
|
447 |
+
previous_state = None
|
448 |
+
|
449 |
+
if previous_state:
|
450 |
+
st.info("A previous session found. Choose:")
|
451 |
+
col1, col2 = st.columns(2)
|
452 |
+
if col1.button("Continue previous chat"):
|
453 |
+
st.session_state["chat_mode"] = "continue"
|
454 |
+
st.session_state["thread_id"] = existing_thread_id
|
455 |
+
st.session_state.state = previous_state
|
456 |
+
st.rerun()
|
457 |
+
elif col2.button("Start new chat"):
|
458 |
+
delete_thread_checkpoint(checkpointer, existing_thread_id)
|
459 |
+
with st.spinner("Fetching and processing profile... ⏳"):
|
460 |
+
raw=scrape_linkedin_profile(url)
|
461 |
+
thread_id = existing_thread_id
|
462 |
+
st.session_state["chat_mode"] = "new"
|
463 |
+
st.session_state["thread_id"] = thread_id
|
464 |
+
st.session_state.state = initialize_state(raw)
|
465 |
+
st.session_state.state["profile_url"] = normalize_url(url)
|
466 |
+
st.session_state.state["messages"] = []
|
467 |
+
st.rerun()
|
468 |
+
st.stop()
|
469 |
+
else:
|
470 |
+
with st.spinner("Fetching and processing profile... ⏳"):
|
471 |
+
raw=scrape_linkedin_profile(url)
|
472 |
+
thread_id = get_next_thread_id(checkpointer)
|
473 |
+
st.session_state["thread_id"] = thread_id
|
474 |
+
st.session_state["chat_mode"] = "new"
|
475 |
+
st.session_state.state = initialize_state(raw)
|
476 |
+
st.session_state.state["profile_url"] = normalize_url(url)
|
477 |
+
st.session_state.state["messages"] = []
|
478 |
+
st.rerun()
|
479 |
+
|
480 |
+
# --- Main chat UI (only after chat_mode is set) ---
|
481 |
+
state = st.session_state.state
|
482 |
+
thread_id = st.session_state.get("thread_id")
|
483 |
+
|
484 |
+
st.subheader("💬 Chat with your AI Assistant")
|
485 |
+
messages = state.get("messages", [])
|
486 |
+
chat_container = st.container()
|
487 |
+
|
488 |
+
with chat_container:
|
489 |
+
st.markdown(
|
490 |
+
"""
|
491 |
+
<style>
|
492 |
+
.chat-row { display: flex; width: 100%; margin-bottom: 12px; animation: fadeIn 0.5s; }
|
493 |
+
.chat-row.user { justify-content: flex-end; }
|
494 |
+
.chat-row.ai { justify-content: flex-start; }
|
495 |
+
.chat-bubble { font-family: 'Segoe UI', 'Roboto', 'Arial', sans-serif; font-size: 1.08rem; line-height: 1.65; padding: 14px 22px; border-radius: 20px; min-width: 60px; max-width: 75vw; box-shadow: 0 2px 12px rgba(0,0,0,0.10); word-break: break-word; display: inline-block; position: relative; margin-bottom: 2px; }
|
496 |
+
.bubble-user { background: linear-gradient(90deg, #43e97b 0%, #38f9d7 100%); color: #fff; border-bottom-right-radius: 6px; border-top-right-radius: 22px; text-align: right; box-shadow: 0 4px 16px rgba(67,233,123,0.13); }
|
497 |
+
.bubble-ai { background: linear-gradient(90deg, #e3f0ff 0%, #c9eaff 100%); color: #1a237e; border-bottom-left-radius: 6px; border-top-left-radius: 22px; text-align: left; border: 1.5px solid #b3e0fc; box-shadow: 0 4px 16px rgba(44, 62, 80, 0.08); }
|
498 |
+
.bubble-unknown { background: #fffbe6; color: #8a6d3b; border-radius: 14px; text-align: center; border: 1px solid #ffe082; display: inline-block; }
|
499 |
+
.sender-label { font-size: 0.93em; font-weight: 600; opacity: 0.7; margin-bottom: 4px; display: block; }
|
500 |
+
.avatar { width: 38px; height: 38px; border-radius: 50%; margin-right: 10px; margin-top: 2px; background: #e0e0e0; object-fit: cover; box-shadow: 0 2px 6px rgba(0,0,0,0.07); }
|
501 |
+
@keyframes fadeIn { from { opacity: 0; transform: translateY(12px);} to { opacity: 1; transform: translateY(0);} }
|
502 |
+
</style>
|
503 |
+
""",
|
504 |
+
unsafe_allow_html=True,
|
505 |
+
)
|
506 |
+
|
507 |
+
job_fit = state.get("job_fit")
|
508 |
+
for msg in messages:
|
509 |
+
if isinstance(msg, HumanMessage):
|
510 |
+
st.markdown(
|
511 |
+
f"""
|
512 |
+
<div class="chat-row user">
|
513 |
+
<div class="chat-bubble bubble-user">
|
514 |
+
<span class="sender-label">🧑💻 You</span>
|
515 |
+
{msg.content}
|
516 |
+
</div>
|
517 |
+
</div>
|
518 |
+
""",
|
519 |
+
unsafe_allow_html=True,
|
520 |
+
)
|
521 |
+
elif isinstance(msg, AIMessage):
|
522 |
+
if not msg.content or not msg.content.strip():
|
523 |
+
continue
|
524 |
+
st.markdown(
|
525 |
+
f"""
|
526 |
+
<div class="chat-row ai">
|
527 |
+
<img class="avatar" src="https://img.icons8.com/ios-filled/50/1a237e/robot-2.png" alt="AI"/>
|
528 |
+
<div class="chat-bubble bubble-ai">
|
529 |
+
<span class="sender-label">🤖 AI</span>
|
530 |
+
{msg.content}
|
531 |
+
</div>
|
532 |
+
</div>
|
533 |
+
""",
|
534 |
+
unsafe_allow_html=True,
|
535 |
+
)
|
536 |
+
elif isinstance(msg, ToolMessage):
|
537 |
+
raw_content = msg.content or "(no content)"
|
538 |
+
try:
|
539 |
+
parsed = json.loads(raw_content)
|
540 |
+
except Exception:
|
541 |
+
parsed = None
|
542 |
+
|
543 |
+
if parsed and isinstance(parsed, dict):
|
544 |
+
# --- Profile analysis format ---
|
545 |
+
if all(k in parsed for k in ("strengths", "weaknesses", "suggestions")):
|
546 |
+
strengths = parsed["strengths"]
|
547 |
+
weaknesses = parsed["weaknesses"]
|
548 |
+
suggestions = parsed["suggestions"]
|
549 |
+
|
550 |
+
formatted = (
|
551 |
+
"### 💪 **Strengths**\n"
|
552 |
+
f"- **Technical:** {', '.join(strengths.get('technical', []) or ['None'])}\n"
|
553 |
+
f"- **Projects:** {', '.join(strengths.get('projects', []) or ['None'])}\n"
|
554 |
+
f"- **Education:** {', '.join(strengths.get('education', []) or ['None'])}\n"
|
555 |
+
f"- **Soft Skills:** {', '.join(strengths.get('soft_skills', []) or ['None'])}\n\n"
|
556 |
+
"### ⚠️ **Weaknesses**\n"
|
557 |
+
f"- **Technical Gaps:** {', '.join(weaknesses.get('technical_gaps', []) or ['None'])}\n"
|
558 |
+
f"- **Project/Experience Gaps:** {', '.join(weaknesses.get('project_or_experience_gaps', []) or ['None'])}\n"
|
559 |
+
f"- **Missing Context:** {', '.join(weaknesses.get('missing_context', []) or ['None'])}\n\n"
|
560 |
+
"### 🛠 **Suggestions to improve**\n"
|
561 |
+
+ "\n".join(f"- {s}" for s in suggestions)
|
562 |
+
)
|
563 |
+
|
564 |
+
st.markdown(f"""
|
565 |
+
<div class="chat-row ai">
|
566 |
+
<img class="avatar" src="https://img.icons8.com/ios-filled/50/1a237e/robot-2.png" alt="Tool"/>
|
567 |
+
<div class="chat-bubble bubble-ai">
|
568 |
+
<span class="sender-label">📊 Profile Analysis</span>
|
569 |
+
{formatted}
|
570 |
+
</div>
|
571 |
+
</div>
|
572 |
+
""", unsafe_allow_html=True)
|
573 |
+
|
574 |
+
# --- Job fit format ---
|
575 |
+
elif "match_score" in parsed:
|
576 |
+
percent = parsed["match_score"]
|
577 |
+
suggestions = parsed.get("suggestions", [])
|
578 |
+
missing = parsed.get("missing_skills", [])
|
579 |
+
target_role = parsed.get('target_role', 'unspecified')
|
580 |
+
state["target_role"]=target_role
|
581 |
+
suggestions_html = "<br>".join(f"• {s}" for s in suggestions)
|
582 |
+
missing_html = "<br>".join(f"• {s}" for s in missing)
|
583 |
+
|
584 |
+
st.markdown(f"""
|
585 |
+
<div class="chat-row ai">
|
586 |
+
<img class="avatar" src="https://img.icons8.com/ios-filled/50/1a237e/robot-2.png" alt="Tool"/>
|
587 |
+
<div class="chat-bubble bubble-ai">
|
588 |
+
<span class="sender-label">📊 Job Fit</span>
|
589 |
+
<b>🎯 Target Role:</b> {target_role}<br>
|
590 |
+
<div style="
|
591 |
+
width: 120px; height: 120px; border-radius: 50%;
|
592 |
+
background: conic-gradient(#25D366 {percent * 3.6}deg, #e0e0e0 0deg);
|
593 |
+
display: flex; align-items: center; justify-content: center;
|
594 |
+
font-size: 1.8rem; color: #333; margin: 10px auto;">
|
595 |
+
{percent}%
|
596 |
+
</div>
|
597 |
+
<b>Missing Skills:</b><br>{missing_html}<br><br>
|
598 |
+
<b>Suggestions:</b><br>{suggestions_html}
|
599 |
+
</div>
|
600 |
+
</div>
|
601 |
+
""", unsafe_allow_html=True)
|
602 |
+
|
603 |
+
# --- Section text format ---
|
604 |
+
elif "result" in parsed:
|
605 |
+
text = parsed["result"]
|
606 |
+
st.markdown(f"""
|
607 |
+
<div class="chat-row ai">
|
608 |
+
<img class="avatar" src="https://img.icons8.com/ios-filled/50/1a237e/robot-2.png" alt="Tool"/>
|
609 |
+
<div class="chat-bubble bubble-ai">
|
610 |
+
<span class="sender-label">📄 Section Content</span>
|
611 |
+
{text}
|
612 |
+
</div>
|
613 |
+
</div>
|
614 |
+
""", unsafe_allow_html=True)
|
615 |
+
|
616 |
+
else:
|
617 |
+
st.markdown(
|
618 |
+
f"""
|
619 |
+
<div class="chat-row">
|
620 |
+
<div class="chat-bubble bubble-unknown">
|
621 |
+
<span class="sender-label">⚠️ Unknown</span>
|
622 |
+
{getattr(msg, 'content', str(msg))}
|
623 |
+
</div>
|
624 |
+
</div>
|
625 |
+
""",
|
626 |
+
unsafe_allow_html=True,
|
627 |
+
)
|
628 |
+
st.markdown('<div style="clear:both"></div>', unsafe_allow_html=True)
|
629 |
+
|
630 |
+
st.markdown("---")
|
631 |
+
|
632 |
+
user_input = st.chat_input(
|
633 |
+
placeholder="Ask about your LinkedIn profile, e.g., 'Analyze my profile, how do I fit for AI role, how is my about section?'"
|
634 |
+
)
|
635 |
+
|
636 |
+
if user_input and user_input.strip():
|
637 |
+
state.setdefault("messages", []).append(HumanMessage(content=user_input.strip()))
|
638 |
+
validate_state(state)
|
639 |
+
thread_id = st.session_state.get("thread_id")
|
640 |
+
config = {"configurable": {"thread_id": thread_id}}
|
641 |
+
with st.spinner("Processing your request..."):
|
642 |
+
st.session_state.state = app_graph.invoke(state, config)
|
643 |
+
st.rerun()
|
chatbot_model.py
ADDED
@@ -0,0 +1,130 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
1 |
+
from typing import List, Dict, Any, Optional, Annotated
|
2 |
+
from pydantic import BaseModel, Field
|
3 |
+
from langchain_core.messages import BaseMessage
|
4 |
+
from langgraph.graph import add_messages
|
5 |
+
class ChatbotState(BaseModel):
|
6 |
+
def get(self, key, default=None):
|
7 |
+
"""
|
8 |
+
Allow dict-like .get() access for compatibility.
|
9 |
+
"""
|
10 |
+
# First try attribute directly
|
11 |
+
if hasattr(self, key):
|
12 |
+
return getattr(self, key)
|
13 |
+
# Fallback: check if it's in __dict__
|
14 |
+
return self.__dict__.get(key, default)
|
15 |
+
def setdefault(self, key, default):
|
16 |
+
"""
|
17 |
+
Dict-like setdefault: if attribute is None, set it to default and return it.
|
18 |
+
Otherwise, return existing value.
|
19 |
+
"""
|
20 |
+
if hasattr(self, key):
|
21 |
+
value = getattr(self, key)
|
22 |
+
if value is None:
|
23 |
+
setattr(self, key, default)
|
24 |
+
return default
|
25 |
+
return value
|
26 |
+
else:
|
27 |
+
# attribute does not exist: set it
|
28 |
+
setattr(self, key, default)
|
29 |
+
return default
|
30 |
+
profile: Dict[str, Any] = Field(..., description="Preprocessed / summarized profile data")
|
31 |
+
profile_url: Optional[str] = Field(
|
32 |
+
default=None,
|
33 |
+
description="Original LinkedIn profile URL provided by the user."
|
34 |
+
)
|
35 |
+
|
36 |
+
# Quick access sections (about, headline, skills etc.)
|
37 |
+
sections: Dict[str, str] = Field(..., description="Flattened profile sections for quick access")
|
38 |
+
|
39 |
+
# Enhancements and analysis results
|
40 |
+
enhanced_content: Dict[str, str] = Field(
|
41 |
+
default_factory=dict,
|
42 |
+
description=(
|
43 |
+
"Map of improved or rewritten profile sections generated by the ContentGenerator tool. "
|
44 |
+
"Keys are section names (e.g., 'about', 'headline'); values are enhanced text."
|
45 |
+
)
|
46 |
+
)
|
47 |
+
|
48 |
+
profile_analysis: Optional[Dict[str, Any]] = Field(
|
49 |
+
None,
|
50 |
+
description=(
|
51 |
+
"Structured analysis of the user's profile produced by the ProfileAnalyzer tool, "
|
52 |
+
"including strengths, weaknesses, and actionable suggestions."
|
53 |
+
)
|
54 |
+
)
|
55 |
+
|
56 |
+
job_fit: Optional[Dict[str, Any]] = Field(
|
57 |
+
None,
|
58 |
+
description=(
|
59 |
+
"Assessment result from the JobMatcher tool, detailing how well the user's profile matches "
|
60 |
+
"the target role, including missing skills and match score."
|
61 |
+
)
|
62 |
+
)
|
63 |
+
|
64 |
+
target_role: Optional[str] = Field(
|
65 |
+
None,
|
66 |
+
description=(
|
67 |
+
"Target job role the user is aiming for. "
|
68 |
+
"Can be set by the user directly during the conversation or inferred by the chatbot."
|
69 |
+
)
|
70 |
+
)
|
71 |
+
|
72 |
+
editing_section: Optional[str] = Field(
|
73 |
+
None,
|
74 |
+
description=(
|
75 |
+
"Name of the profile section currently being edited or improved, "
|
76 |
+
"set dynamically when the ContentGenerator tool is invoked."
|
77 |
+
)
|
78 |
+
)
|
79 |
+
next_tool_name: Optional[str] = Field(
|
80 |
+
default=None,
|
81 |
+
description="Name of the next tool the chatbot wants to call, set dynamically after LLM response."
|
82 |
+
)
|
83 |
+
|
84 |
+
|
85 |
+
# Annotated chat history directly using BaseMessage
|
86 |
+
messages: Annotated[List[BaseMessage], add_messages] = Field(
|
87 |
+
default_factory=list,
|
88 |
+
description="List of user and assistant messages"
|
89 |
+
)
|
90 |
+
|
91 |
+
|
92 |
+
|
93 |
+
class ProfileAnalysisStrengths(BaseModel):
|
94 |
+
technical: List[str]
|
95 |
+
projects: List[str]
|
96 |
+
education: List[str]
|
97 |
+
soft_skills: List[str]
|
98 |
+
|
99 |
+
class ProfileAnalysisWeaknesses(BaseModel):
|
100 |
+
technical_gaps: List[str]
|
101 |
+
project_or_experience_gaps: List[str]
|
102 |
+
missing_context: List[str]
|
103 |
+
|
104 |
+
class ProfileAnalysisModel(BaseModel):
|
105 |
+
strengths: ProfileAnalysisStrengths
|
106 |
+
weaknesses: ProfileAnalysisWeaknesses
|
107 |
+
suggestions: List[str]
|
108 |
+
|
109 |
+
class JobFitModel(BaseModel):
|
110 |
+
match_score: int = Field(..., ge=0, le=100)
|
111 |
+
missing_skills: List[str]
|
112 |
+
suggestions: List[str]
|
113 |
+
|
114 |
+
class ContentGenerationModel(BaseModel):
|
115 |
+
new_content: str
|
116 |
+
|
117 |
+
|
118 |
+
# ========== 6. MEMORY SETUP ==========
|
119 |
+
|
120 |
+
class UserMemory:
|
121 |
+
def __init__(self):
|
122 |
+
self.profile = None
|
123 |
+
self.target_roles = []
|
124 |
+
self.history = []
|
125 |
+
|
126 |
+
def save(self, key, value):
|
127 |
+
self.history.append((key, value))
|
128 |
+
|
129 |
+
def get_history(self):
|
130 |
+
return self.history
|
llm_utils.py
ADDED
@@ -0,0 +1,83 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
1 |
+
import time
|
2 |
+
from typing import Type, Union, Dict, Any
|
3 |
+
from pydantic import BaseModel
|
4 |
+
import dirtyjson
|
5 |
+
import re
|
6 |
+
# Make sure you install dirtyjson: pip install dirtyjson
|
7 |
+
|
8 |
+
# === Optionally, import your Groq client from where you configure it ===
|
9 |
+
|
10 |
+
# === Helper function ===
|
11 |
+
|
12 |
+
def call_llm_and_parse(
|
13 |
+
groq_client,
|
14 |
+
prompt: str,
|
15 |
+
model: Type[BaseModel],
|
16 |
+
max_retries: int = 3,
|
17 |
+
delay: float = 1.0
|
18 |
+
) -> Union[BaseModel, Dict[str, Any]]:
|
19 |
+
"""
|
20 |
+
Call LLM with a prompt, parse the JSON response, and validate it using a Pydantic model.
|
21 |
+
|
22 |
+
Args:
|
23 |
+
prompt (str): The prompt to send to the LLM.
|
24 |
+
model (Type[BaseModel]): The Pydantic model to validate against.
|
25 |
+
max_retries (int, optional): Number of retries on failure. Default is 3.
|
26 |
+
delay (float, optional): Delay (in seconds) between retries, multiplied by attempt count.
|
27 |
+
|
28 |
+
Returns:
|
29 |
+
BaseModel: Validated Pydantic model instance if successful.
|
30 |
+
dict: Contains 'error' and 'raw' fields if validation fails after retries.
|
31 |
+
"""
|
32 |
+
for attempt in range(1, max_retries + 1):
|
33 |
+
try:
|
34 |
+
print(f"[call_llm_and_parse] Attempt {attempt}: sending prompt to LLM...")
|
35 |
+
|
36 |
+
completion = groq_client.chat.completions.create(
|
37 |
+
model="llama3-8b-8192",
|
38 |
+
messages=[{"role": "user", "content": prompt}],
|
39 |
+
temperature=0.3,
|
40 |
+
max_tokens=800
|
41 |
+
)
|
42 |
+
|
43 |
+
response_text = completion.choices[0].message.content
|
44 |
+
print(f"[call_llm_and_parse] Raw LLM response: {response_text[:200]}...") # first 200 chars
|
45 |
+
|
46 |
+
# Extract JSON (handle dirty or partial JSON)
|
47 |
+
json_str = extract_and_repair_json(response_text)
|
48 |
+
|
49 |
+
# Parse JSON using dirtyjson
|
50 |
+
parsed = dirtyjson.loads(json_str)
|
51 |
+
|
52 |
+
# Validate with Pydantic
|
53 |
+
validated = model.model_validate(parsed)
|
54 |
+
|
55 |
+
print("[call_llm_and_parse] Successfully parsed and validated.")
|
56 |
+
return validated
|
57 |
+
|
58 |
+
except Exception as e:
|
59 |
+
print(f"[Retry {attempt}] Error: {e}")
|
60 |
+
if attempt < max_retries:
|
61 |
+
time.sleep(delay * attempt)
|
62 |
+
else:
|
63 |
+
print("[call_llm_and_parse] Failed after retries.")
|
64 |
+
return {
|
65 |
+
"error": f"Validation failed after {max_retries} retries: {e}",
|
66 |
+
"raw": json_str if 'json_str' in locals() else response_text
|
67 |
+
}
|
68 |
+
|
69 |
+
|
70 |
+
def extract_and_repair_json(text: str) -> str:
|
71 |
+
"""
|
72 |
+
Extracts JSON starting from first '{' and balances braces.
|
73 |
+
"""
|
74 |
+
match = re.search(r'\{[\s\S]*', text)
|
75 |
+
if not match:
|
76 |
+
raise ValueError("No JSON object found.")
|
77 |
+
json_str = match.group()
|
78 |
+
# Fix unmatched braces
|
79 |
+
open_braces = json_str.count('{')
|
80 |
+
close_braces = json_str.count('}')
|
81 |
+
if open_braces > close_braces:
|
82 |
+
json_str += '}' * (open_braces - close_braces)
|
83 |
+
return json_str
|
profile_preprocessing.py
ADDED
@@ -0,0 +1,127 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
1 |
+
from typing import List, Dict, Any
|
2 |
+
from urllib.parse import urlparse
|
3 |
+
# ========== 3. PROFILE PREPROCESSING HELPERS ==========
|
4 |
+
def normalize_url(url):
|
5 |
+
return url.strip().rstrip('/')
|
6 |
+
|
7 |
+
def summarize_skills(skills: List[Dict]) -> str:
|
8 |
+
return ', '.join([s.get('title', '') for s in skills if s.get('title')])
|
9 |
+
|
10 |
+
def summarize_projects(projects: List[Dict]) -> str:
|
11 |
+
summaries = []
|
12 |
+
for p in projects:
|
13 |
+
title = p.get('title', '')
|
14 |
+
desc = ''
|
15 |
+
if p.get('subComponents'):
|
16 |
+
for comp in p['subComponents']:
|
17 |
+
for d in comp.get('description', []):
|
18 |
+
if d.get('type') == 'textComponent':
|
19 |
+
desc += d.get('text', '') + ' '
|
20 |
+
summaries.append(f"{title}: {desc.strip()}")
|
21 |
+
return '\n'.join(summaries)
|
22 |
+
|
23 |
+
def summarize_educations(educations: List[Dict]) -> str:
|
24 |
+
return ', '.join([
|
25 |
+
f"{e.get('title', '')} ({e.get('subtitle', '')}, {e.get('caption', '')})"
|
26 |
+
for e in educations if e.get('title')
|
27 |
+
])
|
28 |
+
|
29 |
+
def summarize_certs(certs: List[Dict]) -> str:
|
30 |
+
return ', '.join([
|
31 |
+
f"{c.get('title', '')} ({c.get('subtitle', '')}, {c.get('caption', '')})"
|
32 |
+
for c in certs if c.get('title')
|
33 |
+
])
|
34 |
+
|
35 |
+
def summarize_test_scores(scores: List[Dict]) -> str:
|
36 |
+
return ', '.join([
|
37 |
+
f"{s.get('title', '')} ({s.get('subtitle', '')})"
|
38 |
+
for s in scores if s.get('title')
|
39 |
+
])
|
40 |
+
|
41 |
+
def summarize_generic(items: List[Dict], key='title') -> str:
|
42 |
+
return ', '.join([item.get(key, '') for item in items if item.get(key)])
|
43 |
+
|
44 |
+
|
45 |
+
# === Preprocess raw profile into summarized profile ===
|
46 |
+
def preprocess_profile(raw_profile: Dict[str, Any]) -> Dict[str, str]:
|
47 |
+
return {
|
48 |
+
"FullName": raw_profile.get("fullName", ""),
|
49 |
+
"profile_url": raw_profile.get("linkedinUrl",""),
|
50 |
+
"Headline": raw_profile.get("headline", ""),
|
51 |
+
"JobTitle": raw_profile.get("jobTitle", ""),
|
52 |
+
"CompanyName": raw_profile.get("companyName", ""),
|
53 |
+
"CompanyIndustry": raw_profile.get("companyIndustry", ""),
|
54 |
+
"CurrentJobDuration": str(raw_profile.get("currentJobDuration", "")),
|
55 |
+
"About": raw_profile.get("about", ""),
|
56 |
+
"Experiences": summarize_generic(raw_profile.get("experiences", []), key='title'),
|
57 |
+
"Skills": summarize_skills(raw_profile.get("skills", [])),
|
58 |
+
"Educations": summarize_educations(raw_profile.get("educations", [])),
|
59 |
+
"Certifications": summarize_certs(raw_profile.get("licenseAndCertificates", [])),
|
60 |
+
"HonorsAndAwards": summarize_generic(raw_profile.get("honorsAndAwards", []), key='title'),
|
61 |
+
"Verifications": summarize_generic(raw_profile.get("verifications", []), key='title'),
|
62 |
+
"Highlights": summarize_generic(raw_profile.get("highlights", []), key='title'),
|
63 |
+
"Projects": summarize_projects(raw_profile.get("projects", [])),
|
64 |
+
"Publications": summarize_generic(raw_profile.get("publications", []), key='title'),
|
65 |
+
"Patents": summarize_generic(raw_profile.get("patents", []), key='title'),
|
66 |
+
"Courses": summarize_generic(raw_profile.get("courses", []), key='title'),
|
67 |
+
"TestScores": summarize_test_scores(raw_profile.get("testScores", []))
|
68 |
+
}
|
69 |
+
|
70 |
+
# === Create & fill state ===
|
71 |
+
|
72 |
+
|
73 |
+
def initialize_state(raw_profile: Dict[str, Any]) -> Dict[str,Any]:
|
74 |
+
"""
|
75 |
+
Initializes the chatbot state used in LangGraph:
|
76 |
+
- Keeps both raw and processed profile
|
77 |
+
- Splits important sections for quick access
|
78 |
+
- Initializes placeholders for tool outputs
|
79 |
+
- Adds empty chat history for conversation context
|
80 |
+
"""
|
81 |
+
# Your preprocessing function that cleans / normalizes scraped profile
|
82 |
+
profile = preprocess_profile(raw_profile)
|
83 |
+
print(f"initializing url as {profile['profile_url']}")
|
84 |
+
|
85 |
+
state: Dict[str, Any] = {
|
86 |
+
"profile": profile, # Cleaned & normalized profile
|
87 |
+
"profile_url": normalize_url(profile.get("profile_url","") or ""),
|
88 |
+
|
89 |
+
# === Separate sections (make sure all are strings, never None) ===
|
90 |
+
"sections": {
|
91 |
+
"about": profile.get("About", "") or "",
|
92 |
+
"headline": profile.get("Headline", "") or "",
|
93 |
+
"skills": profile.get("Skills", "") or "",
|
94 |
+
"projects": profile.get("Projects", "") or "",
|
95 |
+
"educations": profile.get("Educations", "") or "",
|
96 |
+
"certifications": profile.get("Certifications", "") or "",
|
97 |
+
"honors_and_awards": profile.get("HonorsAndAwards", "") or "",
|
98 |
+
"experiences": profile.get("Experiences", "") or "",
|
99 |
+
"publications": profile.get("Publications", "") or "",
|
100 |
+
"patents": profile.get("Patents", "") or "",
|
101 |
+
"courses": profile.get("Courses", "") or "",
|
102 |
+
"test_scores": profile.get("TestScores", "") or "",
|
103 |
+
"verifications": profile.get("Verifications", "") or "",
|
104 |
+
"highlights": profile.get("Highlights", "") or "",
|
105 |
+
"job_title": profile.get("JobTitle", "") or "",
|
106 |
+
"company_name": profile.get("CompanyName", "") or "",
|
107 |
+
"company_industry": profile.get("CompanyIndustry", "") or "",
|
108 |
+
"current_job_duration": profile.get("CurrentJobDuration", "") or "",
|
109 |
+
"full_name": profile.get("FullName", "") or ""
|
110 |
+
},
|
111 |
+
|
112 |
+
# === Placeholders populated by tools ===
|
113 |
+
"enhanced_content": {}, # Populated by ContentGenerator tool
|
114 |
+
"profile_analysis": None, # Can be None initially (Optional)
|
115 |
+
"job_fit": None, # Can be None initially (Optional)
|
116 |
+
"target_role": None, # Optional[str]
|
117 |
+
"editing_section": None, # Optional[str]
|
118 |
+
|
119 |
+
# === Chat history ===
|
120 |
+
# Pydantic expects list of dicts like {"role": "user", "content": "..."}
|
121 |
+
"messages": [],
|
122 |
+
"next_tool_name": None
|
123 |
+
}
|
124 |
+
|
125 |
+
|
126 |
+
return state
|
127 |
+
|
requirements.txt
ADDED
@@ -0,0 +1,31 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
1 |
+
apify-client
|
2 |
+
python-dotenv
|
3 |
+
streamlit
|
4 |
+
langchain
|
5 |
+
openai
|
6 |
+
python-dotenv
|
7 |
+
langchain_openai
|
8 |
+
# Core Python packages
|
9 |
+
python-dotenv
|
10 |
+
streamlit
|
11 |
+
pydantic
|
12 |
+
|
13 |
+
# LangChain ecosystem
|
14 |
+
langchain-core
|
15 |
+
langchain-openai
|
16 |
+
langgraph
|
17 |
+
langgraph-checkpoint
|
18 |
+
|
19 |
+
# For OpenAI-compatible LLMs (Groq, etc.)
|
20 |
+
openai
|
21 |
+
|
22 |
+
# For parsing "dirty" JSON
|
23 |
+
dirtyjson
|
24 |
+
|
25 |
+
typing-extensions
|
26 |
+
|
27 |
+
tqdm
|
28 |
+
|
29 |
+
|
30 |
+
|
31 |
+
|
scraping_profile.py
ADDED
@@ -0,0 +1,42 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
1 |
+
from apify_client import ApifyClient
|
2 |
+
from dotenv import load_dotenv
|
3 |
+
import os
|
4 |
+
import json
|
5 |
+
|
6 |
+
# Load environment variables
|
7 |
+
load_dotenv()
|
8 |
+
|
9 |
+
# Get API token
|
10 |
+
api_token = os.getenv("APIFY_API_TOKEN")
|
11 |
+
|
12 |
+
# Initialize client once (global)
|
13 |
+
client = ApifyClient(api_token)
|
14 |
+
|
15 |
+
|
16 |
+
def scrape_linkedin_profile(profile_url: str) -> dict:
|
17 |
+
"""
|
18 |
+
📄 Scrapes a LinkedIn profile using Apify and returns the data as a Python dict.
|
19 |
+
"""
|
20 |
+
try:
|
21 |
+
run_input = {"profileUrls": [profile_url]}
|
22 |
+
run = client.actor("dev_fusion/Linkedin-Profile-Scraper").call(run_input=run_input)
|
23 |
+
|
24 |
+
items = list(client.dataset(run["defaultDatasetId"]).iterate_items())
|
25 |
+
|
26 |
+
if items:
|
27 |
+
with open("scraped_profile.json", "w") as f:
|
28 |
+
json.dump(items[0], f, indent=2)
|
29 |
+
return items[0]
|
30 |
+
else:
|
31 |
+
print("⚠️ No data found in dataset.")
|
32 |
+
return {}
|
33 |
+
except Exception as e:
|
34 |
+
print(f"❌ Error during scraping: {e}")
|
35 |
+
return {}
|
36 |
+
|
37 |
+
|
38 |
+
# 🧪 OPTIONAL: test code only runs when this file is executed directly
|
39 |
+
if __name__ == "__main__":
|
40 |
+
test_url = "https://www.linkedin.com/in/sri-vallabh-tammireddy/"
|
41 |
+
profile = scrape_linkedin_profile(test_url)
|
42 |
+
print(json.dumps(profile, indent=2))
|