Spaces:
Sleeping
Sleeping
Upload 10 files
Browse files- .gitattributes +3 -35
- .gitignore +1 -0
- Dockerfile +28 -0
- README.md +406 -0
- app.py +643 -0
- chatbot_model.py +130 -0
- llm_utils.py +83 -0
- profile_preprocessing.py +127 -0
- requirements.txt +31 -0
- scraping_profile.py +42 -0
.gitattributes
CHANGED
|
@@ -1,35 +1,3 @@
|
|
| 1 |
-
|
| 2 |
-
|
| 3 |
-
*.
|
| 4 |
-
*.bz2 filter=lfs diff=lfs merge=lfs -text
|
| 5 |
-
*.ckpt filter=lfs diff=lfs merge=lfs -text
|
| 6 |
-
*.ftz filter=lfs diff=lfs merge=lfs -text
|
| 7 |
-
*.gz filter=lfs diff=lfs merge=lfs -text
|
| 8 |
-
*.h5 filter=lfs diff=lfs merge=lfs -text
|
| 9 |
-
*.joblib filter=lfs diff=lfs merge=lfs -text
|
| 10 |
-
*.lfs.* filter=lfs diff=lfs merge=lfs -text
|
| 11 |
-
*.mlmodel filter=lfs diff=lfs merge=lfs -text
|
| 12 |
-
*.model filter=lfs diff=lfs merge=lfs -text
|
| 13 |
-
*.msgpack filter=lfs diff=lfs merge=lfs -text
|
| 14 |
-
*.npy filter=lfs diff=lfs merge=lfs -text
|
| 15 |
-
*.npz filter=lfs diff=lfs merge=lfs -text
|
| 16 |
-
*.onnx filter=lfs diff=lfs merge=lfs -text
|
| 17 |
-
*.ot filter=lfs diff=lfs merge=lfs -text
|
| 18 |
-
*.parquet filter=lfs diff=lfs merge=lfs -text
|
| 19 |
-
*.pb filter=lfs diff=lfs merge=lfs -text
|
| 20 |
-
*.pickle filter=lfs diff=lfs merge=lfs -text
|
| 21 |
-
*.pkl filter=lfs diff=lfs merge=lfs -text
|
| 22 |
-
*.pt filter=lfs diff=lfs merge=lfs -text
|
| 23 |
-
*.pth filter=lfs diff=lfs merge=lfs -text
|
| 24 |
-
*.rar filter=lfs diff=lfs merge=lfs -text
|
| 25 |
-
*.safetensors filter=lfs diff=lfs merge=lfs -text
|
| 26 |
-
saved_model/**/* filter=lfs diff=lfs merge=lfs -text
|
| 27 |
-
*.tar.* filter=lfs diff=lfs merge=lfs -text
|
| 28 |
-
*.tar filter=lfs diff=lfs merge=lfs -text
|
| 29 |
-
*.tflite filter=lfs diff=lfs merge=lfs -text
|
| 30 |
-
*.tgz filter=lfs diff=lfs merge=lfs -text
|
| 31 |
-
*.wasm filter=lfs diff=lfs merge=lfs -text
|
| 32 |
-
*.xz filter=lfs diff=lfs merge=lfs -text
|
| 33 |
-
*.zip filter=lfs diff=lfs merge=lfs -text
|
| 34 |
-
*.zst filter=lfs diff=lfs merge=lfs -text
|
| 35 |
-
*tfevents* filter=lfs diff=lfs merge=lfs -text
|
|
|
|
| 1 |
+
# Auto detect text files and perform LF normalization
|
| 2 |
+
* text=auto
|
| 3 |
+
# *.png filter=lfs diff=lfs merge=lfs -text
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
.gitignore
ADDED
|
@@ -0,0 +1 @@
|
|
|
|
|
|
|
| 1 |
+
.env
|
Dockerfile
ADDED
|
@@ -0,0 +1,28 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
|
| 2 |
+
|
| 3 |
+
# Use an official Python runtime as a parent image
|
| 4 |
+
FROM python:3.11-slim
|
| 5 |
+
|
| 6 |
+
|
| 7 |
+
# Set the working directory in the container
|
| 8 |
+
WORKDIR /app
|
| 9 |
+
|
| 10 |
+
ENV HF_HOME=/data/hf_cache
|
| 11 |
+
ENV TRANSFORMERS_CACHE=/data/hf_cache/transformers
|
| 12 |
+
ENV HF_DATASETS_CACHE=/data/hf_cache/datasets
|
| 13 |
+
ENV HF_HUB_CACHE=/data/hf_cache/hub
|
| 14 |
+
|
| 15 |
+
RUN mkdir -p /data/hf_cache/transformers /data/hf_cache/datasets /data/hf_cache/hub && chmod -R 777 /data/hf_cache
|
| 16 |
+
|
| 17 |
+
# Copy requirements.txt and install dependencies
|
| 18 |
+
COPY requirements.txt .
|
| 19 |
+
RUN pip install --no-cache-dir -r requirements.txt
|
| 20 |
+
|
| 21 |
+
# Copy the rest of your app's code
|
| 22 |
+
COPY . .
|
| 23 |
+
|
| 24 |
+
# Expose the port Streamlit runs on
|
| 25 |
+
EXPOSE 8501
|
| 26 |
+
|
| 27 |
+
# Run Streamlit
|
| 28 |
+
CMD ["streamlit", "run", "app.py", "--server.port=8501", "--server.address=0.0.0.0"]
|
README.md
ADDED
|
@@ -0,0 +1,406 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
---
|
| 2 |
+
title: Linkedin Assistant
|
| 3 |
+
emoji: 🚀
|
| 4 |
+
colorFrom: red
|
| 5 |
+
colorTo: red
|
| 6 |
+
sdk: docker
|
| 7 |
+
app_port: 8501
|
| 8 |
+
tags:
|
| 9 |
+
- streamlit
|
| 10 |
+
pinned: false
|
| 11 |
+
short_description: Streamlit template space
|
| 12 |
+
license: mit
|
| 13 |
+
---
|
| 14 |
+
|
| 15 |
+
|
| 16 |
+
# 🤖 LinkedIn AI Career Assistant
|
| 17 |
+
|
| 18 |
+
[](https://sri-vallabh-linkedin-profile-ai-assistant-app-ffuh1c.streamlit.app/)
|
| 19 |
+
[](https://www.python.org/downloads/)
|
| 20 |
+
[](https://streamlit.io/)
|
| 21 |
+
[](https://langchain-ai.github.io/langgraph/)
|
| 22 |
+
[](https://groq.com/)
|
| 23 |
+
|
| 24 |
+
An intelligent AI-powered career assistant that analyzes LinkedIn profiles, provides job fit analysis, and offers personalized career guidance through an interactive chat interface powered by Groq's llama3-8b-8192 model.
|
| 25 |
+
|
| 26 |
+
## 🚀 **Live Demo**
|
| 27 |
+
|
| 28 |
+
Try the application live at: **https://sri-vallabh-linkedin-profile-ai-assistant-app-ffuh1c.streamlit.app/**
|
| 29 |
+
|
| 30 |
+
## 📋 **Table of Contents**
|
| 31 |
+
|
| 32 |
+
- [Overview](#overview)
|
| 33 |
+
- [Key Features](#key-features)
|
| 34 |
+
- [Architecture](#architecture)
|
| 35 |
+
- [Installation](#installation)
|
| 36 |
+
- [Usage](#usage)
|
| 37 |
+
- [Technical Implementation](#technical-implementation)
|
| 38 |
+
- [API Keys Setup](#api-keys-setup)
|
| 39 |
+
- [Session Management](#session-management)
|
| 40 |
+
- [Contributing](#contributing)
|
| 41 |
+
- [License](#license)
|
| 42 |
+
|
| 43 |
+
## 🎯 **Overview**
|
| 44 |
+
|
| 45 |
+
The LinkedIn AI Career Assistant is a sophisticated career optimization tool that combines Groq's powerful llama3-8b-8192 model with LangGraph's multi-agent framework to provide comprehensive LinkedIn profile analysis. Built using **Streamlit**, **LangGraph**, and **Groq API**, this application offers an interactive chat-based experience for professional career development.
|
| 46 |
+
|
| 47 |
+
### **What Makes This Special?**
|
| 48 |
+
|
| 49 |
+
- **🧠 Multi-Agent AI System**: Utilizes LangGraph to orchestrate specialized AI tools for different analysis tasks
|
| 50 |
+
- **💾 Thread-Based Sessions**: Maintains conversation context with intelligent thread management based on LinkedIn URLs
|
| 51 |
+
- **🎯 Job Fit Analysis**: Provides detailed match scores and improvement suggestions for target roles
|
| 52 |
+
- **📊 Profile Analysis**: Comprehensive strengths and weaknesses assessment
|
| 53 |
+
- **🔄 Real-time Scraping**: Fetches live LinkedIn profile data using Apify integration
|
| 54 |
+
- **⚡ Groq-Powered**: Lightning-fast responses using Groq's optimized llama3-8b-8192 model
|
| 55 |
+
|
| 56 |
+
## 🌟 **Key Features**
|
| 57 |
+
|
| 58 |
+
### 1. **Interactive Chat Interface**
|
| 59 |
+
- **LinkedIn URL Input**: Simply paste your LinkedIn profile URL to get started
|
| 60 |
+
- **Conversational AI**: Natural language interaction for profile optimization
|
| 61 |
+
- **Real-time Analysis**: Instant feedback and suggestions as you chat
|
| 62 |
+
- **Custom Styling**: Modern chat bubble interface with professional design
|
| 63 |
+
|
| 64 |
+
### 2. **Comprehensive Profile Analysis**
|
| 65 |
+
- **Strengths Identification**: Highlights technical skills, projects, education, and soft skills
|
| 66 |
+
- **Weakness Detection**: Identifies gaps in technical skills, experience, and missing context
|
| 67 |
+
- **Actionable Suggestions**: Provides specific recommendations for profile enhancement
|
| 68 |
+
- **Section-by-Section Access**: Detailed extraction of individual LinkedIn profile sections
|
| 69 |
+
|
| 70 |
+
### 3. **Advanced Job Fit Analysis**
|
| 71 |
+
- **Match Score Calculation**: Quantifies how well your profile fits target roles (0-100%)
|
| 72 |
+
- **Skill Gap Analysis**: Identifies missing skills required for your target position
|
| 73 |
+
- **Role-Specific Feedback**: Tailored suggestions for improving job compatibility
|
| 74 |
+
- **Visual Score Display**: Circular progress indicators for match percentages
|
| 75 |
+
|
| 76 |
+
### 4. **Intelligent Session Management**
|
| 77 |
+
- **URL-Based Threading**: Automatically finds existing conversations for the same LinkedIn profile
|
| 78 |
+
- **Session Continuity**: Choose to continue previous chats or start fresh
|
| 79 |
+
- **SQLite Persistence**: Robust conversation storage with automatic checkpointing
|
| 80 |
+
- **Thread Isolation**: Secure separation of different user sessions
|
| 81 |
+
|
| 82 |
+
### 5. **Professional Data Handling**
|
| 83 |
+
- **Pydantic Validation**: Robust data validation using structured schemas
|
| 84 |
+
- **State Management**: Comprehensive state tracking across conversation flows
|
| 85 |
+
- **Error Handling**: Graceful handling of API failures and data parsing issues
|
| 86 |
+
- **Memory Optimization**: Efficient storage and retrieval of conversation context
|
| 87 |
+
|
| 88 |
+
## 🏗️ **Architecture**
|
| 89 |
+
|
| 90 |
+
### **Multi-Agent System Design**
|
| 91 |
+
|
| 92 |
+
```
|
| 93 |
+
┌─────────────────────────────────────────────────────────────┐
|
| 94 |
+
│ User Interface (Streamlit) │
|
| 95 |
+
│ Custom Chat Interface │
|
| 96 |
+
└─────────────────────┬─���─────────────────────────────────────┘
|
| 97 |
+
│
|
| 98 |
+
┌─────────────────────┴───────────────────────────────────────┐
|
| 99 |
+
│ LangGraph Orchestrator │
|
| 100 |
+
│ (ChatbotState Schema) │
|
| 101 |
+
│ ┌─────────────────┬─────────────────┬─────────────────┐ │
|
| 102 |
+
│ │ Chatbot Node │ Profile Tool │ Job Match Tool │ │
|
| 103 |
+
│ │ (Router) │ (Analyzer) │ (Matcher) │ │
|
| 104 |
+
│ │ │ │ │ │
|
| 105 |
+
│ │ Extract Tool │ │ │ │
|
| 106 |
+
│ │ (Section Data) │ │ │ │
|
| 107 |
+
│ └─────────────────┴─────────────────┴─────────────────┘ │
|
| 108 |
+
└─────────────────────┬───────────────────────────────────────┘
|
| 109 |
+
│
|
| 110 |
+
┌─────────────────────┴───────────────────────────────────────┐
|
| 111 |
+
│ External Services │
|
| 112 |
+
│ ┌─────────────────┬─────────────────┬─────────────────┐ │
|
| 113 |
+
│ │ Apify LinkedIn │ Groq API │ SQLite │ │
|
| 114 |
+
│ │ Scraper │ (llama3-8b-8192)│ Checkpointer │ │
|
| 115 |
+
│ └─────────────────┴─────────────────┴─────────────────┘ │
|
| 116 |
+
└─────────────────────────────────────────────────────────────┘
|
| 117 |
+
```
|
| 118 |
+
|
| 119 |
+
### **Core Components**
|
| 120 |
+
|
| 121 |
+
1. **ChatBot Node**: Main conversation router with tool calling capabilities
|
| 122 |
+
2. **Profile Analyzer**: Comprehensive profile evaluation for strengths and weaknesses
|
| 123 |
+
3. **Job Matcher**: Role compatibility analysis with scoring and suggestions
|
| 124 |
+
4. **Extract Tool**: Granular access to specific profile sections
|
| 125 |
+
5. **State Management**: Pydantic-based ChatbotState with comprehensive field tracking
|
| 126 |
+
6. **Thread System**: URL-based session identification and management
|
| 127 |
+
|
| 128 |
+
## 🛠️ **Installation**
|
| 129 |
+
|
| 130 |
+
### **Prerequisites**
|
| 131 |
+
|
| 132 |
+
- Python 3.8 or higher
|
| 133 |
+
- pip package manager
|
| 134 |
+
- Groq API key
|
| 135 |
+
- Apify API token
|
| 136 |
+
|
| 137 |
+
### **Quick Start**
|
| 138 |
+
|
| 139 |
+
1. **Clone the Repository**
|
| 140 |
+
```bash
|
| 141 |
+
git clone https://github.com/Sri-Vallabh/Linkedin-Profile-AI-Assistant.git
|
| 142 |
+
cd Linkedin-Profile-AI-Assistant
|
| 143 |
+
```
|
| 144 |
+
|
| 145 |
+
2. **Install Dependencies**
|
| 146 |
+
```bash
|
| 147 |
+
pip install -r requirements.txt
|
| 148 |
+
```
|
| 149 |
+
|
| 150 |
+
3. **Set Up Environment Variables**
|
| 151 |
+
```bash
|
| 152 |
+
cp .env.example .env
|
| 153 |
+
# Edit .env with your API keys
|
| 154 |
+
```
|
| 155 |
+
|
| 156 |
+
4. **Run the Application**
|
| 157 |
+
```bash
|
| 158 |
+
streamlit run app.py
|
| 159 |
+
```
|
| 160 |
+
|
| 161 |
+
5. **Access the Application**
|
| 162 |
+
```
|
| 163 |
+
Open your browser and go to: http://localhost:8501
|
| 164 |
+
```
|
| 165 |
+
|
| 166 |
+
### **Requirements**
|
| 167 |
+
|
| 168 |
+
```txt
|
| 169 |
+
streamlit>=1.28.0
|
| 170 |
+
langchain>=0.0.350
|
| 171 |
+
langchain-openai>=0.0.8
|
| 172 |
+
langgraph>=0.0.55
|
| 173 |
+
openai>=1.3.0
|
| 174 |
+
pydantic>=2.0.0
|
| 175 |
+
python-dotenv>=1.0.0
|
| 176 |
+
apify-client>=1.0.0
|
| 177 |
+
dirtyjson>=1.0.8
|
| 178 |
+
```
|
| 179 |
+
|
| 180 |
+
## 📖 **Usage**
|
| 181 |
+
|
| 182 |
+
### **Getting Started**
|
| 183 |
+
|
| 184 |
+
1. **Launch the Application**
|
| 185 |
+
- Open the application in your browser
|
| 186 |
+
- You'll see the main interface with a LinkedIn URL input field
|
| 187 |
+
|
| 188 |
+
2. **Enter Your LinkedIn Profile**
|
| 189 |
+
- Paste your LinkedIn profile URL (e.g., `https://www.linkedin.com/in/your-profile/`)
|
| 190 |
+
- The system will automatically scrape and analyze your profile
|
| 191 |
+
|
| 192 |
+
3. **Choose Session Mode**
|
| 193 |
+
- If a previous session exists, choose to continue or start fresh
|
| 194 |
+
- New sessions initialize with full profile preprocessing
|
| 195 |
+
|
| 196 |
+
4. **Start Chatting**
|
| 197 |
+
- Begin conversations with queries like:
|
| 198 |
+
- "Analyze my profile strengths and weaknesses"
|
| 199 |
+
- "I want to apply for a Data Scientist role"
|
| 200 |
+
- "Show me my about section"
|
| 201 |
+
- "What skills am I missing for a Software Engineer position?"
|
| 202 |
+
|
| 203 |
+
### **Available Commands**
|
| 204 |
+
|
| 205 |
+
- **Profile Analysis**: "Analyze my profile" - Full strengths/weaknesses analysis
|
| 206 |
+
- **Job Matching**: "I want to apply for [role]" - Match score and skill gaps
|
| 207 |
+
- **Section Access**: "Show me my [section]" - Extract specific profile sections
|
| 208 |
+
- **General Queries**: Ask any career-related questions for guidance
|
| 209 |
+
|
| 210 |
+
### **Sample Conversations**
|
| 211 |
+
|
| 212 |
+
```
|
| 213 |
+
User: "Analyze my LinkedIn profile"
|
| 214 |
+
AI: ✅ Profile analysis complete!
|
| 215 |
+
|
| 216 |
+
💪 Strengths
|
| 217 |
+
- Technical: Python, Machine Learning, Data Analysis
|
| 218 |
+
- Projects: E-commerce recommendation system, Stock prediction model
|
| 219 |
+
- Education: Computer Science degree, Data Science certification
|
| 220 |
+
- Soft Skills: Problem-solving, Team collaboration
|
| 221 |
+
|
| 222 |
+
⚠️ Weaknesses
|
| 223 |
+
- Technical Gaps: Cloud computing platforms, MLOps tools
|
| 224 |
+
- Project/Experience Gaps: Limited production deployment experience
|
| 225 |
+
- Missing Context: Quantified project impacts and metrics
|
| 226 |
+
|
| 227 |
+
🛠 Suggestions to improve
|
| 228 |
+
- Add AWS/Azure cloud certifications
|
| 229 |
+
- Include specific metrics for project outcomes
|
| 230 |
+
- Highlight leadership or mentoring experiences
|
| 231 |
+
```
|
| 232 |
+
|
| 233 |
+
```
|
| 234 |
+
User: "I want to apply for a Senior Data Scientist role"
|
| 235 |
+
AI: 📊 Job Fit Analysis
|
| 236 |
+
|
| 237 |
+
🎯 Target Role: Senior Data Scientist
|
| 238 |
+
Match Score: 78%
|
| 239 |
+
|
| 240 |
+
Missing Skills:
|
| 241 |
+
• Deep Learning frameworks (TensorFlow, PyTorch)
|
| 242 |
+
• MLOps and model deployment
|
| 243 |
+
• Leadership and team management experience
|
| 244 |
+
|
| 245 |
+
Suggestions:
|
| 246 |
+
• Complete online courses in deep learning
|
| 247 |
+
• Build projects showcasing end-to-end ML pipelines
|
| 248 |
+
• Seek opportunities to lead junior team members
|
| 249 |
+
```
|
| 250 |
+
|
| 251 |
+
## 🔧 **Technical Implementation**
|
| 252 |
+
|
| 253 |
+
### **State Management**
|
| 254 |
+
|
| 255 |
+
The application uses a sophisticated Pydantic-based state management system:
|
| 256 |
+
|
| 257 |
+
```python
|
| 258 |
+
class ChatbotState(BaseModel):
|
| 259 |
+
profile: Dict[str, Any] # Processed LinkedIn profile data
|
| 260 |
+
profile_url: Optional[str] # Original LinkedIn URL
|
| 261 |
+
sections: Dict[str, str] # Individual profile sections
|
| 262 |
+
enhanced_content: Dict[str, str] # Future AI-generated improvements
|
| 263 |
+
profile_analysis: Optional[Dict[str, Any]] # Strengths/weaknesses
|
| 264 |
+
job_fit: Optional[Dict[str, Any]] # Job matching results
|
| 265 |
+
target_role: Optional[str] # User's target job role
|
| 266 |
+
messages: Annotated[List[BaseMessage], add_messages] # Chat history
|
| 267 |
+
next_tool_name: Optional[str] # Tool routing information
|
| 268 |
+
```
|
| 269 |
+
|
| 270 |
+
### **Tool Integration**
|
| 271 |
+
|
| 272 |
+
The system includes three specialized tools:
|
| 273 |
+
|
| 274 |
+
1. **Profile Analyzer Tool**:
|
| 275 |
+
- Comprehensive profile evaluation
|
| 276 |
+
- Structured output with strengths, weaknesses, suggestions
|
| 277 |
+
- Uses ProfileAnalysisModel for validation
|
| 278 |
+
|
| 279 |
+
2. **Job Matcher Tool**:
|
| 280 |
+
- Role-specific compatibility analysis
|
| 281 |
+
- Calculates match scores (0-100%)
|
| 282 |
+
- Identifies missing skills and provides suggestions
|
| 283 |
+
|
| 284 |
+
3. **Extract Tool**:
|
| 285 |
+
- Granular access to profile sections
|
| 286 |
+
- Supports nested data extraction with dot notation
|
| 287 |
+
- Returns structured results for specific queries
|
| 288 |
+
|
| 289 |
+
### **Session Architecture**
|
| 290 |
+
|
| 291 |
+
- **Thread Management**: URL-based thread identification for session continuity
|
| 292 |
+
- **Checkpointing**: SQLite-based persistent storage with automatic fallback
|
| 293 |
+
- **State Validation**: Comprehensive Pydantic validation for data integrity
|
| 294 |
+
- **Memory Optimization**: Efficient message history management
|
| 295 |
+
|
| 296 |
+
### **LLM Integration**
|
| 297 |
+
|
| 298 |
+
- **Model**: Groq's llama3-8b-8192 for fast, high-quality responses
|
| 299 |
+
- **API**: OpenAI-compatible interface through Groq
|
| 300 |
+
- **Tool Calling**: Native support for structured tool invocation
|
| 301 |
+
- **Error Handling**: Robust retry mechanisms and graceful degradation
|
| 302 |
+
|
| 303 |
+
## 🔑 **API Keys Setup**
|
| 304 |
+
|
| 305 |
+
Create a `.env` file in the root directory:
|
| 306 |
+
|
| 307 |
+
```env
|
| 308 |
+
# Groq API Key (required)
|
| 309 |
+
GROQ_API_KEY=your_groq_api_key_here
|
| 310 |
+
|
| 311 |
+
# Apify API Token (required for LinkedIn scraping)
|
| 312 |
+
APIFY_API_TOKEN=your_apify_token_here
|
| 313 |
+
```
|
| 314 |
+
|
| 315 |
+
### **Getting API Keys**
|
| 316 |
+
|
| 317 |
+
1. **Groq API Key**:
|
| 318 |
+
- Visit [Groq Console](https://console.groq.com/)
|
| 319 |
+
- Create an account and generate an API key
|
| 320 |
+
- Used for llama3-8b-8192 model inference
|
| 321 |
+
|
| 322 |
+
2. **Apify API Token**:
|
| 323 |
+
- Go to [Apify Console](https://console.apify.com/)
|
| 324 |
+
- Sign up and get your API token
|
| 325 |
+
- Used for LinkedIn profile scraping
|
| 326 |
+
|
| 327 |
+
## 💾 **Session Management**
|
| 328 |
+
|
| 329 |
+
The application implements intelligent session management:
|
| 330 |
+
|
| 331 |
+
### **Thread-Based System**
|
| 332 |
+
- Each LinkedIn profile URL gets a unique thread ID
|
| 333 |
+
- Automatic detection of existing conversations for the same profile
|
| 334 |
+
- Secure isolation between different user sessions
|
| 335 |
+
|
| 336 |
+
### **Conversation Persistence**
|
| 337 |
+
- SQLite-based storage for production environments
|
| 338 |
+
- Memory-based fallback for development/testing
|
| 339 |
+
- Automatic checkpointing after each interaction
|
| 340 |
+
- Recovery capability in case of interruptions
|
| 341 |
+
|
| 342 |
+
### **User Experience**
|
| 343 |
+
- Choice to continue previous conversations or start fresh
|
| 344 |
+
- Seamless transition between sessions
|
| 345 |
+
- Maintained conversation context across browser refreshes
|
| 346 |
+
|
| 347 |
+
## 🤝 **Contributing**
|
| 348 |
+
|
| 349 |
+
We welcome contributions to improve the LinkedIn AI Career Assistant! Here's how you can help:
|
| 350 |
+
|
| 351 |
+
### **Development Setup**
|
| 352 |
+
|
| 353 |
+
1. Fork the repository
|
| 354 |
+
2. Create a feature branch: `git checkout -b feature/your-feature-name`
|
| 355 |
+
3. Make your changes and test thoroughly
|
| 356 |
+
4. Submit a pull request with a clear description
|
| 357 |
+
|
| 358 |
+
### **Areas for Contribution**
|
| 359 |
+
|
| 360 |
+
- **Tool Enhancement**: Implement the commented-out content_generator tool
|
| 361 |
+
- **UI/UX Improvements**: Enhance the Streamlit interface design
|
| 362 |
+
- **Performance Optimization**: Improve response times and resource usage
|
| 363 |
+
- **Testing**: Add comprehensive test coverage
|
| 364 |
+
- **Documentation**: Expand examples and API documentation
|
| 365 |
+
|
| 366 |
+
### **Code Style**
|
| 367 |
+
|
| 368 |
+
- Follow PEP 8 guidelines for Python code
|
| 369 |
+
- Use meaningful variable and function names
|
| 370 |
+
- Add docstrings for all functions and classes
|
| 371 |
+
- Include type hints where appropriate
|
| 372 |
+
- Validate data models with Pydantic
|
| 373 |
+
|
| 374 |
+
## 📝 **License**
|
| 375 |
+
|
| 376 |
+
This project is licensed under the MIT License - see the [LICENSE](LICENSE) file for details.
|
| 377 |
+
|
| 378 |
+
## 🙏 **Acknowledgments**
|
| 379 |
+
|
| 380 |
+
- **Groq** for providing fast and efficient LLM inference
|
| 381 |
+
- **LangChain/LangGraph** for the multi-agent framework
|
| 382 |
+
- **Streamlit** for the web application framework
|
| 383 |
+
- **Apify** for LinkedIn scraping capabilities
|
| 384 |
+
- **Hugging Face** for hosting the live demo
|
| 385 |
+
|
| 386 |
+
## 📞 **Support**
|
| 387 |
+
|
| 388 |
+
For questions, issues, or suggestions:
|
| 389 |
+
|
| 390 |
+
- **Create an Issue**: [GitHub Issues](https://github.com/Sri-Vallabh/Linkedin-Profile-AI-Assistant/issues)
|
| 391 |
+
- **Discussions**: [GitHub Discussions](https://github.com/Sri-Vallabh/Linkedin-Profile-AI-Assistant/discussions)
|
| 392 |
+
- **Email**: [email protected]
|
| 393 |
+
|
| 394 |
+
## 🔄 **Recent Updates**
|
| 395 |
+
|
| 396 |
+
- **v2.0**: Migrated to Groq API for faster inference
|
| 397 |
+
- **Thread Management**: Implemented URL-based session tracking
|
| 398 |
+
- **Enhanced UI**: Custom chat interface with professional styling
|
| 399 |
+
- **Robust State**: Pydantic-based data validation and error handling
|
| 400 |
+
- **Tool Optimization**: Streamlined to three core analysis tools
|
| 401 |
+
|
| 402 |
+
---
|
| 403 |
+
|
| 404 |
+
**Built with ❤️ by Sri Vallabh**
|
| 405 |
+
|
| 406 |
+
*Empowering professionals to optimize their LinkedIn presence and advance their careers through AI-powered insights.*
|
app.py
ADDED
|
@@ -0,0 +1,643 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
import os
|
| 2 |
+
import json
|
| 3 |
+
import re
|
| 4 |
+
import time
|
| 5 |
+
from typing import Dict, Any, List, Optional, Annotated
|
| 6 |
+
from chatbot_model import (
|
| 7 |
+
UserMemory,
|
| 8 |
+
ChatbotState,
|
| 9 |
+
ProfileAnalysisModel,
|
| 10 |
+
JobFitModel,
|
| 11 |
+
ContentGenerationModel,
|
| 12 |
+
|
| 13 |
+
)
|
| 14 |
+
from llm_utils import call_llm_and_parse
|
| 15 |
+
from profile_preprocessing import (
|
| 16 |
+
preprocess_profile,
|
| 17 |
+
initialize_state,
|
| 18 |
+
normalize_url
|
| 19 |
+
)
|
| 20 |
+
from openai import OpenAI
|
| 21 |
+
import streamlit as st
|
| 22 |
+
import hashlib
|
| 23 |
+
from dotenv import load_dotenv
|
| 24 |
+
from pydantic import BaseModel, Field,ValidationError
|
| 25 |
+
# import pdb; pdb.set_trace()
|
| 26 |
+
from scraping_profile import scrape_linkedin_profile
|
| 27 |
+
from langchain_openai import ChatOpenAI
|
| 28 |
+
from langchain_core.messages import SystemMessage, HumanMessage, AIMessage,BaseMessage,ToolMessage
|
| 29 |
+
from langchain_core.tools import tool
|
| 30 |
+
from langgraph.graph import StateGraph, END,START
|
| 31 |
+
from langgraph.checkpoint.memory import MemorySaver
|
| 32 |
+
from langgraph.graph import add_messages # if your framework exposes this
|
| 33 |
+
from langgraph.prebuilt import ToolNode,tools_condition,InjectedState
|
| 34 |
+
import dirtyjson
|
| 35 |
+
import sqlite3
|
| 36 |
+
try:
|
| 37 |
+
from langgraph.checkpoint.sqlite import SqliteSaver
|
| 38 |
+
SQLITE_AVAILABLE = True
|
| 39 |
+
except ImportError:
|
| 40 |
+
SQLITE_AVAILABLE = False
|
| 41 |
+
|
| 42 |
+
|
| 43 |
+
|
| 44 |
+
# ========== 1. ENVIRONMENT & LLM SETUP ==========
|
| 45 |
+
load_dotenv()
|
| 46 |
+
groq_key = os.getenv("GROQ_API_KEY")
|
| 47 |
+
assert groq_key, "GROQ_API_KEY not found in environment!"
|
| 48 |
+
groq_client=OpenAI(
|
| 49 |
+
api_key=os.getenv("GROQ_API_KEY"),
|
| 50 |
+
base_url="https://api.groq.com/openai/v1"
|
| 51 |
+
)
|
| 52 |
+
|
| 53 |
+
def normalize_url(url):
|
| 54 |
+
return url.strip().rstrip('/')
|
| 55 |
+
|
| 56 |
+
def validate_state(state: dict) -> None:
|
| 57 |
+
"""
|
| 58 |
+
Validate given state dict against ChatbotState schema.
|
| 59 |
+
Displays result in Streamlit instead of printing.
|
| 60 |
+
"""
|
| 61 |
+
# st.write("=== Validating chatbot state ===")
|
| 62 |
+
try:
|
| 63 |
+
ChatbotState.model_validate(state)
|
| 64 |
+
# st.success("✅ State is valid!")
|
| 65 |
+
except ValidationError as e:
|
| 66 |
+
st.error("❌ Validation failed!")
|
| 67 |
+
errors_list = []
|
| 68 |
+
for error in e.errors():
|
| 69 |
+
loc = " → ".join(str(item) for item in error['loc'])
|
| 70 |
+
msg = error['msg']
|
| 71 |
+
errors_list.append(f"- At: {loc}\n Error: {msg}")
|
| 72 |
+
st.write("\n".join(errors_list))
|
| 73 |
+
# Optionally show raw validation error too:
|
| 74 |
+
st.expander("See raw validation error").write(str(e))
|
| 75 |
+
st.stop()
|
| 76 |
+
|
| 77 |
+
|
| 78 |
+
user_memory = UserMemory()
|
| 79 |
+
|
| 80 |
+
# ========== 7. AGENT FUNCTIONS ==========
|
| 81 |
+
|
| 82 |
+
def profile_analysis_prompt(profile: Dict[str, str]) -> str:
|
| 83 |
+
return f"""
|
| 84 |
+
You are a top-tier LinkedIn career coach and AI analyst.
|
| 85 |
+
|
| 86 |
+
Analyze the following candidate profile carefully.
|
| 87 |
+
|
| 88 |
+
Candidate profile data:
|
| 89 |
+
FullName: {profile.get("FullName", "")}
|
| 90 |
+
Headline: {profile.get("Headline", "")}
|
| 91 |
+
JobTitle: {profile.get("JobTitle", "")}
|
| 92 |
+
CompanyName: {profile.get("CompanyName", "")}
|
| 93 |
+
CompanyIndustry: {profile.get("CompanyIndustry", "")}
|
| 94 |
+
CurrentJobDuration: {profile.get("CurrentJobDuration", "")}
|
| 95 |
+
About: {profile.get("About", "")}
|
| 96 |
+
Experiences: {profile.get("Experiences", "")}
|
| 97 |
+
Skills: {profile.get("Skills", "")}
|
| 98 |
+
Educations: {profile.get("Educations", "")}
|
| 99 |
+
Certifications: {profile.get("Certifications", "")}
|
| 100 |
+
HonorsAndAwards: {profile.get("HonorsAndAwards", "")}
|
| 101 |
+
Verifications: {profile.get("Verifications", "")}
|
| 102 |
+
Highlights: {profile.get("Highlights", "")}
|
| 103 |
+
Projects: {profile.get("Projects", "")}
|
| 104 |
+
Publications: {profile.get("Publications", "")}
|
| 105 |
+
Patents: {profile.get("Patents", "")}
|
| 106 |
+
Courses: {profile.get("Courses", "")}
|
| 107 |
+
TestScores: {profile.get("TestScores", "")}
|
| 108 |
+
|
| 109 |
+
|
| 110 |
+
Identify and summarize:
|
| 111 |
+
1. strengths:
|
| 112 |
+
- technical strengths (skills, tools, frameworks)
|
| 113 |
+
- project strengths (impactful projects, innovation)
|
| 114 |
+
- educational strengths (degrees, certifications, awards)
|
| 115 |
+
- soft skills and personality traits (teamwork, leadership)
|
| 116 |
+
2. weaknesses:
|
| 117 |
+
- missing or weak technical skills
|
| 118 |
+
- gaps in projects, experience, or education
|
| 119 |
+
- unclear profile sections or missing context
|
| 120 |
+
3. actionable suggestions:
|
| 121 |
+
- concrete ways to improve profile headline, about section, or add projects
|
| 122 |
+
- suggestions to learn or highlight new skills
|
| 123 |
+
- ideas to make the profile more attractive for recruiters
|
| 124 |
+
|
| 125 |
+
Important instructions:
|
| 126 |
+
- Respond ONLY with valid JSON.
|
| 127 |
+
- Do NOT include text before or after JSON.
|
| 128 |
+
- Be concise but detailed.
|
| 129 |
+
|
| 130 |
+
|
| 131 |
+
|
| 132 |
+
Example JSON format:
|
| 133 |
+
{{
|
| 134 |
+
"strengths": {{
|
| 135 |
+
"technical": ["...", "..."],
|
| 136 |
+
"projects": ["...", "..."],
|
| 137 |
+
"education": ["...", "..."],
|
| 138 |
+
"soft_skills": ["...", "..."]
|
| 139 |
+
}},
|
| 140 |
+
"weaknesses": {{
|
| 141 |
+
"technical_gaps": ["...", "..."],
|
| 142 |
+
"project_or_experience_gaps": ["...", "..."],
|
| 143 |
+
"missing_context": ["...", "..."]
|
| 144 |
+
}},
|
| 145 |
+
"suggestions": [
|
| 146 |
+
"...",
|
| 147 |
+
"...",
|
| 148 |
+
"..."
|
| 149 |
+
]
|
| 150 |
+
}}
|
| 151 |
+
""".strip()
|
| 152 |
+
|
| 153 |
+
|
| 154 |
+
|
| 155 |
+
|
| 156 |
+
def job_fit_prompt(sections: Dict[str, str], target_role: str) -> str:
|
| 157 |
+
return f"""
|
| 158 |
+
You are an expert career coach and recruiter.
|
| 159 |
+
|
| 160 |
+
Compare the following candidate profile against the typical requirements for the role of "{target_role}".
|
| 161 |
+
|
| 162 |
+
Candidate Profile:
|
| 163 |
+
- Headline: {sections.get('headline', '')}
|
| 164 |
+
- About: {sections.get('about', '')}
|
| 165 |
+
- Job Title: {sections.get('job_title', '')}
|
| 166 |
+
- Company: {sections.get('company_name', '')}
|
| 167 |
+
- Industry: {sections.get('company_industry', '')}
|
| 168 |
+
- Current Job Duration: {sections.get('current_job_duration', '')}
|
| 169 |
+
- Skills: {sections.get('skills', '')}
|
| 170 |
+
- Projects: {sections.get('projects', '')}
|
| 171 |
+
- Educations: {sections.get('educations', '')}
|
| 172 |
+
- Certifications: {sections.get('certifications', '')}
|
| 173 |
+
- Honors & Awards: {sections.get('honors_and_awards', '')}
|
| 174 |
+
- Experiences: {sections.get('experiences', '')}
|
| 175 |
+
|
| 176 |
+
**Instructions:**
|
| 177 |
+
- Respond ONLY with valid JSON.
|
| 178 |
+
- Your JSON must exactly match the following schema:
|
| 179 |
+
{{
|
| 180 |
+
"match_score": 85,
|
| 181 |
+
"missing_skills": ["Skill1", "Skill2"],
|
| 182 |
+
"suggestions": ["...", "...", "..."]
|
| 183 |
+
}}
|
| 184 |
+
- "match_score": integer from 0–100 estimating how well the profile fits the target role.
|
| 185 |
+
- "missing_skills": key missing or weakly mentioned skills.
|
| 186 |
+
- "suggestions": 3 actionable recommendations to improve fit (e.g., learn tools, rewrite headline).
|
| 187 |
+
|
| 188 |
+
Do NOT include explanations, text outside JSON, or markdown.
|
| 189 |
+
Start with '{{' and end with '}}'.
|
| 190 |
+
The JSON must be directly parseable.
|
| 191 |
+
""".strip()
|
| 192 |
+
|
| 193 |
+
|
| 194 |
+
# --- Tool: Profile Analyzer ---
|
| 195 |
+
@tool
|
| 196 |
+
def profile_analyzer(state: Annotated[ChatbotState, InjectedState]) -> dict:
|
| 197 |
+
"""
|
| 198 |
+
Tool: Analyze the overall full user's profile to give strengths, weaknesses, suggestions.
|
| 199 |
+
This is needed only if full analysis of profile is needed.
|
| 200 |
+
Returns the full analysis in the form of a json.
|
| 201 |
+
|
| 202 |
+
- It takes no arguments
|
| 203 |
+
"""
|
| 204 |
+
|
| 205 |
+
|
| 206 |
+
# Get summarized profile (dictionary of strings)
|
| 207 |
+
profile = getattr(state, "profile", {}) or {}
|
| 208 |
+
|
| 209 |
+
# Build prompt
|
| 210 |
+
prompt = profile_analysis_prompt(profile)
|
| 211 |
+
|
| 212 |
+
# Call the LLM & parse structured result
|
| 213 |
+
analysis_model = call_llm_and_parse(groq_client,prompt, ProfileAnalysisModel)
|
| 214 |
+
analysis_dict = analysis_model.model_dump()
|
| 215 |
+
|
| 216 |
+
# Save to state and user memory
|
| 217 |
+
state.profile_analysis = analysis_dict
|
| 218 |
+
user_memory.save("profile_analysis", analysis_dict)
|
| 219 |
+
|
| 220 |
+
print("💾 [DEBUG] Saved analysis to user memory.")
|
| 221 |
+
print("📦 [DEBUG] Updated state.profile_analysis with analysis.")
|
| 222 |
+
|
| 223 |
+
return analysis_dict
|
| 224 |
+
|
| 225 |
+
# --- Tool: Job Matcher ---
|
| 226 |
+
|
| 227 |
+
|
| 228 |
+
@tool
|
| 229 |
+
def job_matcher(
|
| 230 |
+
state: Annotated[ChatbotState, InjectedState],
|
| 231 |
+
target_role: str = None
|
| 232 |
+
) -> dict:
|
| 233 |
+
"""
|
| 234 |
+
Tool: Analyze how well the user's profile fits the target role.
|
| 235 |
+
- If user is asking if he is a good fit for a certain role, or needs to see if his profile is compatible with a certain role, call this.
|
| 236 |
+
- Takes target_role as an argument.
|
| 237 |
+
- this tool is needed when match score, missing skills, suggestions are needed based on a job name given.
|
| 238 |
+
"""
|
| 239 |
+
print(f"target role is {target_role}")
|
| 240 |
+
# Update state.target_role if provided
|
| 241 |
+
|
| 242 |
+
sections = getattr(state, "sections", {})
|
| 243 |
+
|
| 244 |
+
# Build prompt
|
| 245 |
+
prompt = job_fit_prompt(sections, target_role)
|
| 246 |
+
|
| 247 |
+
# Call LLM and parse
|
| 248 |
+
try:
|
| 249 |
+
job_fit_model = call_llm_and_parse(groq_client,prompt, JobFitModel)
|
| 250 |
+
job_fit_dict = job_fit_model.model_dump()
|
| 251 |
+
job_fit_dict["target_role"] = target_role
|
| 252 |
+
except Exception as e:
|
| 253 |
+
job_fit_dict = {
|
| 254 |
+
"target_role":target_role,
|
| 255 |
+
"match_score": 0,
|
| 256 |
+
"missing_skills": [],
|
| 257 |
+
"suggestions": ["Parsing failed or incomplete response."]
|
| 258 |
+
}
|
| 259 |
+
|
| 260 |
+
# Save to state and user memory
|
| 261 |
+
state.job_fit = job_fit_dict
|
| 262 |
+
user_memory.save("job_fit", job_fit_dict)
|
| 263 |
+
|
| 264 |
+
return job_fit_dict
|
| 265 |
+
|
| 266 |
+
|
| 267 |
+
|
| 268 |
+
|
| 269 |
+
|
| 270 |
+
|
| 271 |
+
@tool
|
| 272 |
+
def extract_from_state_tool(
|
| 273 |
+
state: Annotated[ChatbotState, InjectedState],
|
| 274 |
+
key: str
|
| 275 |
+
) -> dict:
|
| 276 |
+
"""
|
| 277 |
+
This tool is used if user wants to ask about any particular part of this profile. Use this if a singe section is targeted. It expects key as an arguement, that represents what
|
| 278 |
+
the user is wanting to look at, from his profile.
|
| 279 |
+
Argument:
|
| 280 |
+
key: only pass one from the below list, identify one thing the user wants to look into and choose that:
|
| 281 |
+
"sections.about", "sections.headline", "sections.skills", "sections.projects",
|
| 282 |
+
"sections.educations", "sections.certifications", "sections.honors_and_awards",
|
| 283 |
+
"sections.experiences", "sections.publications", "sections.patents",
|
| 284 |
+
"sections.courses", "sections.test_scores", "sections.verifications",
|
| 285 |
+
"sections.highlights", "sections.job_title", "sections.company_name",
|
| 286 |
+
"sections.company_industry", "sections.current_job_duration", "sections.full_name",
|
| 287 |
+
"enhanced_content,"profile_analysis", "job_fit", "target_role", "editing_section"
|
| 288 |
+
"""
|
| 289 |
+
value = state
|
| 290 |
+
try:
|
| 291 |
+
for part in key.split('.'):
|
| 292 |
+
# Support both dict and Pydantic model
|
| 293 |
+
if isinstance(value, dict):
|
| 294 |
+
value = value.get(part)
|
| 295 |
+
elif hasattr(value, part):
|
| 296 |
+
value = getattr(value, part)
|
| 297 |
+
else:
|
| 298 |
+
value = None
|
| 299 |
+
if value is None:
|
| 300 |
+
break
|
| 301 |
+
except Exception:
|
| 302 |
+
value = None
|
| 303 |
+
return {"result": value}
|
| 304 |
+
|
| 305 |
+
|
| 306 |
+
tools = [
|
| 307 |
+
profile_analyzer,
|
| 308 |
+
job_matcher,
|
| 309 |
+
extract_from_state_tool
|
| 310 |
+
]
|
| 311 |
+
llm = ChatOpenAI(
|
| 312 |
+
api_key=groq_key,
|
| 313 |
+
base_url="https://api.groq.com/openai/v1",
|
| 314 |
+
model="llama3-8b-8192",
|
| 315 |
+
temperature=0
|
| 316 |
+
)
|
| 317 |
+
llm_with_tools = llm.bind_tools(tools)
|
| 318 |
+
|
| 319 |
+
|
| 320 |
+
|
| 321 |
+
# ========== 8. LANGGRAPH PIPELINE ==========
|
| 322 |
+
|
| 323 |
+
|
| 324 |
+
def chatbot_node(state: ChatbotState) -> ChatbotState:
|
| 325 |
+
validate_state(state)
|
| 326 |
+
|
| 327 |
+
messages = state.get("messages", [])
|
| 328 |
+
|
| 329 |
+
system_prompt = """
|
| 330 |
+
You are a helpful AI assistant specialized in LinkedIn profile coaching.
|
| 331 |
+
|
| 332 |
+
You can:
|
| 333 |
+
- Answer user questions.
|
| 334 |
+
- If user is greeting , greet him back also telling how you can help him.
|
| 335 |
+
- You should proactively use specialized tools whenever possible to give richer, data-driven answers.
|
| 336 |
+
IMPORTANT RULES:
|
| 337 |
+
- You must call at most one tool at a time.
|
| 338 |
+
- Never call multiple tools together in the same step.
|
| 339 |
+
- If user asks to show any section, use extract_from_state_tool, and after that, show the exact result from it.
|
| 340 |
+
- If information about that section is already known, dont call extract_from_state_tool, directly answer the user query.
|
| 341 |
+
- call profile_analyzer function only when full profile analysis is needed, otherwise rely on extract_from_state_tool.
|
| 342 |
+
- If user asks to enhance any section, check if it is there in history, otherwise call extract_from_state_tool first.
|
| 343 |
+
- Prefer to call a tool when answering instead of directly replying, especially if it can add new, useful insights or up-to-date data.
|
| 344 |
+
- If a tool has been recently used and new info isn’t needed, you may answer directly.
|
| 345 |
+
- Use tools to verify assumptions, enrich answers, or when the user asks about strengths, weaknesses, job fit, or wants improvements.
|
| 346 |
+
|
| 347 |
+
Always respond helpfully, clearly, and with actionable advice to guide the user in improving their LinkedIn profile.
|
| 348 |
+
"""
|
| 349 |
+
|
| 350 |
+
# Build messages & invoke LLM
|
| 351 |
+
messages = [SystemMessage(content=system_prompt)] + messages[-2:]
|
| 352 |
+
# messages = [SystemMessage(content=system_prompt)]
|
| 353 |
+
response = llm_with_tools.invoke(messages)
|
| 354 |
+
if hasattr(response, "tool_calls") and response.tool_calls:
|
| 355 |
+
first_tool = response.tool_calls[0]
|
| 356 |
+
tool_name = first_tool.get("name") if isinstance(first_tool, dict) else getattr(first_tool, "name", None)
|
| 357 |
+
tool_args = first_tool.get("args") if isinstance(first_tool, dict) else getattr(first_tool, "args", {})
|
| 358 |
+
print(f"[DEBBBBUUUUGGG] using tool {tool_name}")
|
| 359 |
+
|
| 360 |
+
# DEBUG
|
| 361 |
+
print("[DEBUG] LLM response:", response)
|
| 362 |
+
state.setdefault("messages", []).append(response)
|
| 363 |
+
|
| 364 |
+
return state
|
| 365 |
+
|
| 366 |
+
|
| 367 |
+
|
| 368 |
+
|
| 369 |
+
|
| 370 |
+
# --- Graph definition ---
|
| 371 |
+
graph = StateGraph(state_schema=ChatbotState)
|
| 372 |
+
graph.add_node("chatbot", chatbot_node)
|
| 373 |
+
graph.add_node("tools", ToolNode(tools))
|
| 374 |
+
graph.add_edge(START, "chatbot")
|
| 375 |
+
graph.add_conditional_edges("chatbot", tools_condition)
|
| 376 |
+
graph.add_edge("tools","chatbot")
|
| 377 |
+
graph.set_entry_point("chatbot")
|
| 378 |
+
|
| 379 |
+
# --- Streamlit UI ---
|
| 380 |
+
st.set_page_config(page_title="💼 LinkedIn AI Career Assistant", page_icon="🤖", layout="wide")
|
| 381 |
+
st.title("🧑💼 LinkedIn AI Career Assistant")
|
| 382 |
+
|
| 383 |
+
# --- Checkpointer and graph initialization ---
|
| 384 |
+
if "checkpointer" not in st.session_state:
|
| 385 |
+
if SQLITE_AVAILABLE:
|
| 386 |
+
conn = sqlite3.connect("checkpoints1.db", check_same_thread=False)
|
| 387 |
+
st.session_state["checkpointer"] = SqliteSaver(conn)
|
| 388 |
+
else:
|
| 389 |
+
st.session_state["checkpointer"] = MemorySaver()
|
| 390 |
+
checkpointer = st.session_state["checkpointer"]
|
| 391 |
+
|
| 392 |
+
if "app_graph" not in st.session_state:
|
| 393 |
+
st.session_state["app_graph"] = graph.compile(checkpointer=checkpointer)
|
| 394 |
+
app_graph = st.session_state["app_graph"]
|
| 395 |
+
# Find or create thread
|
| 396 |
+
def find_thread_id_for_url(checkpointer, url, max_threads=100):
|
| 397 |
+
search_url = normalize_url(url)
|
| 398 |
+
for tid in range(max_threads):
|
| 399 |
+
config = {"configurable": {"thread_id": str(tid), "checkpoint_ns": ""}}
|
| 400 |
+
state = checkpointer.get(config)
|
| 401 |
+
if state and "channel_values" in state:
|
| 402 |
+
user_state = state["channel_values"]
|
| 403 |
+
stored_url = normalize_url(user_state.get("profile_url", ""))
|
| 404 |
+
if stored_url == search_url:
|
| 405 |
+
return str(tid), user_state
|
| 406 |
+
return None, None
|
| 407 |
+
|
| 408 |
+
def delete_thread_checkpoint(checkpointer, thread_id):
|
| 409 |
+
# For SqliteSaver, use the delete_thread method if available
|
| 410 |
+
if hasattr(checkpointer, "delete_thread"):
|
| 411 |
+
checkpointer.delete_thread(thread_id)
|
| 412 |
+
else:
|
| 413 |
+
# For in-memory or custom checkpointers, implement as needed
|
| 414 |
+
pass
|
| 415 |
+
|
| 416 |
+
|
| 417 |
+
def get_next_thread_id(checkpointer, max_threads=100):
|
| 418 |
+
used = set()
|
| 419 |
+
for tid in range(max_threads):
|
| 420 |
+
config = {"configurable": {"thread_id": str(tid), "checkpoint_ns": ""}}
|
| 421 |
+
if checkpointer.get(config):
|
| 422 |
+
used.add(tid)
|
| 423 |
+
for tid in range(max_threads):
|
| 424 |
+
if tid not in used:
|
| 425 |
+
return str(tid)
|
| 426 |
+
raise RuntimeError("No available thread_id")
|
| 427 |
+
|
| 428 |
+
# --- Session selection and state initialization ---
|
| 429 |
+
|
| 430 |
+
if "chat_mode" not in st.session_state:
|
| 431 |
+
profile_url = st.text_input("Profile URL (e.g., https://www.linkedin.com/in/username/)")
|
| 432 |
+
if not profile_url:
|
| 433 |
+
st.info("Please enter a valid LinkedIn profile URL above to start.")
|
| 434 |
+
st.stop()
|
| 435 |
+
|
| 436 |
+
valid_pattern = r"^https://www\.linkedin\.com/in/[^/]+/?$"
|
| 437 |
+
if not re.match(valid_pattern, profile_url.strip()):
|
| 438 |
+
st.error("❌ Invalid LinkedIn profile URL. Make sure it matches the format.")
|
| 439 |
+
st.stop()
|
| 440 |
+
url = profile_url.strip()
|
| 441 |
+
|
| 442 |
+
existing_thread_id, previous_state = find_thread_id_for_url(checkpointer, url)
|
| 443 |
+
# Defensive: ensure required fields
|
| 444 |
+
required_fields = ["profile", "sections"]
|
| 445 |
+
if previous_state and not all(f in previous_state and previous_state[f] for f in required_fields):
|
| 446 |
+
st.warning("Previous session is missing required data. Please start a new chat.")
|
| 447 |
+
previous_state = None
|
| 448 |
+
|
| 449 |
+
if previous_state:
|
| 450 |
+
st.info("A previous session found. Choose:")
|
| 451 |
+
col1, col2 = st.columns(2)
|
| 452 |
+
if col1.button("Continue previous chat"):
|
| 453 |
+
st.session_state["chat_mode"] = "continue"
|
| 454 |
+
st.session_state["thread_id"] = existing_thread_id
|
| 455 |
+
st.session_state.state = previous_state
|
| 456 |
+
st.rerun()
|
| 457 |
+
elif col2.button("Start new chat"):
|
| 458 |
+
delete_thread_checkpoint(checkpointer, existing_thread_id)
|
| 459 |
+
with st.spinner("Fetching and processing profile... ⏳"):
|
| 460 |
+
raw=scrape_linkedin_profile(url)
|
| 461 |
+
thread_id = existing_thread_id
|
| 462 |
+
st.session_state["chat_mode"] = "new"
|
| 463 |
+
st.session_state["thread_id"] = thread_id
|
| 464 |
+
st.session_state.state = initialize_state(raw)
|
| 465 |
+
st.session_state.state["profile_url"] = normalize_url(url)
|
| 466 |
+
st.session_state.state["messages"] = []
|
| 467 |
+
st.rerun()
|
| 468 |
+
st.stop()
|
| 469 |
+
else:
|
| 470 |
+
with st.spinner("Fetching and processing profile... ⏳"):
|
| 471 |
+
raw=scrape_linkedin_profile(url)
|
| 472 |
+
thread_id = get_next_thread_id(checkpointer)
|
| 473 |
+
st.session_state["thread_id"] = thread_id
|
| 474 |
+
st.session_state["chat_mode"] = "new"
|
| 475 |
+
st.session_state.state = initialize_state(raw)
|
| 476 |
+
st.session_state.state["profile_url"] = normalize_url(url)
|
| 477 |
+
st.session_state.state["messages"] = []
|
| 478 |
+
st.rerun()
|
| 479 |
+
|
| 480 |
+
# --- Main chat UI (only after chat_mode is set) ---
|
| 481 |
+
state = st.session_state.state
|
| 482 |
+
thread_id = st.session_state.get("thread_id")
|
| 483 |
+
|
| 484 |
+
st.subheader("💬 Chat with your AI Assistant")
|
| 485 |
+
messages = state.get("messages", [])
|
| 486 |
+
chat_container = st.container()
|
| 487 |
+
|
| 488 |
+
with chat_container:
|
| 489 |
+
st.markdown(
|
| 490 |
+
"""
|
| 491 |
+
<style>
|
| 492 |
+
.chat-row { display: flex; width: 100%; margin-bottom: 12px; animation: fadeIn 0.5s; }
|
| 493 |
+
.chat-row.user { justify-content: flex-end; }
|
| 494 |
+
.chat-row.ai { justify-content: flex-start; }
|
| 495 |
+
.chat-bubble { font-family: 'Segoe UI', 'Roboto', 'Arial', sans-serif; font-size: 1.08rem; line-height: 1.65; padding: 14px 22px; border-radius: 20px; min-width: 60px; max-width: 75vw; box-shadow: 0 2px 12px rgba(0,0,0,0.10); word-break: break-word; display: inline-block; position: relative; margin-bottom: 2px; }
|
| 496 |
+
.bubble-user { background: linear-gradient(90deg, #43e97b 0%, #38f9d7 100%); color: #fff; border-bottom-right-radius: 6px; border-top-right-radius: 22px; text-align: right; box-shadow: 0 4px 16px rgba(67,233,123,0.13); }
|
| 497 |
+
.bubble-ai { background: linear-gradient(90deg, #e3f0ff 0%, #c9eaff 100%); color: #1a237e; border-bottom-left-radius: 6px; border-top-left-radius: 22px; text-align: left; border: 1.5px solid #b3e0fc; box-shadow: 0 4px 16px rgba(44, 62, 80, 0.08); }
|
| 498 |
+
.bubble-unknown { background: #fffbe6; color: #8a6d3b; border-radius: 14px; text-align: center; border: 1px solid #ffe082; display: inline-block; }
|
| 499 |
+
.sender-label { font-size: 0.93em; font-weight: 600; opacity: 0.7; margin-bottom: 4px; display: block; }
|
| 500 |
+
.avatar { width: 38px; height: 38px; border-radius: 50%; margin-right: 10px; margin-top: 2px; background: #e0e0e0; object-fit: cover; box-shadow: 0 2px 6px rgba(0,0,0,0.07); }
|
| 501 |
+
@keyframes fadeIn { from { opacity: 0; transform: translateY(12px);} to { opacity: 1; transform: translateY(0);} }
|
| 502 |
+
</style>
|
| 503 |
+
""",
|
| 504 |
+
unsafe_allow_html=True,
|
| 505 |
+
)
|
| 506 |
+
|
| 507 |
+
job_fit = state.get("job_fit")
|
| 508 |
+
for msg in messages:
|
| 509 |
+
if isinstance(msg, HumanMessage):
|
| 510 |
+
st.markdown(
|
| 511 |
+
f"""
|
| 512 |
+
<div class="chat-row user">
|
| 513 |
+
<div class="chat-bubble bubble-user">
|
| 514 |
+
<span class="sender-label">🧑💻 You</span>
|
| 515 |
+
{msg.content}
|
| 516 |
+
</div>
|
| 517 |
+
</div>
|
| 518 |
+
""",
|
| 519 |
+
unsafe_allow_html=True,
|
| 520 |
+
)
|
| 521 |
+
elif isinstance(msg, AIMessage):
|
| 522 |
+
if not msg.content or not msg.content.strip():
|
| 523 |
+
continue
|
| 524 |
+
st.markdown(
|
| 525 |
+
f"""
|
| 526 |
+
<div class="chat-row ai">
|
| 527 |
+
<img class="avatar" src="https://img.icons8.com/ios-filled/50/1a237e/robot-2.png" alt="AI"/>
|
| 528 |
+
<div class="chat-bubble bubble-ai">
|
| 529 |
+
<span class="sender-label">🤖 AI</span>
|
| 530 |
+
{msg.content}
|
| 531 |
+
</div>
|
| 532 |
+
</div>
|
| 533 |
+
""",
|
| 534 |
+
unsafe_allow_html=True,
|
| 535 |
+
)
|
| 536 |
+
elif isinstance(msg, ToolMessage):
|
| 537 |
+
raw_content = msg.content or "(no content)"
|
| 538 |
+
try:
|
| 539 |
+
parsed = json.loads(raw_content)
|
| 540 |
+
except Exception:
|
| 541 |
+
parsed = None
|
| 542 |
+
|
| 543 |
+
if parsed and isinstance(parsed, dict):
|
| 544 |
+
# --- Profile analysis format ---
|
| 545 |
+
if all(k in parsed for k in ("strengths", "weaknesses", "suggestions")):
|
| 546 |
+
strengths = parsed["strengths"]
|
| 547 |
+
weaknesses = parsed["weaknesses"]
|
| 548 |
+
suggestions = parsed["suggestions"]
|
| 549 |
+
|
| 550 |
+
formatted = (
|
| 551 |
+
"### 💪 **Strengths**\n"
|
| 552 |
+
f"- **Technical:** {', '.join(strengths.get('technical', []) or ['None'])}\n"
|
| 553 |
+
f"- **Projects:** {', '.join(strengths.get('projects', []) or ['None'])}\n"
|
| 554 |
+
f"- **Education:** {', '.join(strengths.get('education', []) or ['None'])}\n"
|
| 555 |
+
f"- **Soft Skills:** {', '.join(strengths.get('soft_skills', []) or ['None'])}\n\n"
|
| 556 |
+
"### ⚠️ **Weaknesses**\n"
|
| 557 |
+
f"- **Technical Gaps:** {', '.join(weaknesses.get('technical_gaps', []) or ['None'])}\n"
|
| 558 |
+
f"- **Project/Experience Gaps:** {', '.join(weaknesses.get('project_or_experience_gaps', []) or ['None'])}\n"
|
| 559 |
+
f"- **Missing Context:** {', '.join(weaknesses.get('missing_context', []) or ['None'])}\n\n"
|
| 560 |
+
"### 🛠 **Suggestions to improve**\n"
|
| 561 |
+
+ "\n".join(f"- {s}" for s in suggestions)
|
| 562 |
+
)
|
| 563 |
+
|
| 564 |
+
st.markdown(f"""
|
| 565 |
+
<div class="chat-row ai">
|
| 566 |
+
<img class="avatar" src="https://img.icons8.com/ios-filled/50/1a237e/robot-2.png" alt="Tool"/>
|
| 567 |
+
<div class="chat-bubble bubble-ai">
|
| 568 |
+
<span class="sender-label">📊 Profile Analysis</span>
|
| 569 |
+
{formatted}
|
| 570 |
+
</div>
|
| 571 |
+
</div>
|
| 572 |
+
""", unsafe_allow_html=True)
|
| 573 |
+
|
| 574 |
+
# --- Job fit format ---
|
| 575 |
+
elif "match_score" in parsed:
|
| 576 |
+
percent = parsed["match_score"]
|
| 577 |
+
suggestions = parsed.get("suggestions", [])
|
| 578 |
+
missing = parsed.get("missing_skills", [])
|
| 579 |
+
target_role = parsed.get('target_role', 'unspecified')
|
| 580 |
+
state["target_role"]=target_role
|
| 581 |
+
suggestions_html = "<br>".join(f"• {s}" for s in suggestions)
|
| 582 |
+
missing_html = "<br>".join(f"• {s}" for s in missing)
|
| 583 |
+
|
| 584 |
+
st.markdown(f"""
|
| 585 |
+
<div class="chat-row ai">
|
| 586 |
+
<img class="avatar" src="https://img.icons8.com/ios-filled/50/1a237e/robot-2.png" alt="Tool"/>
|
| 587 |
+
<div class="chat-bubble bubble-ai">
|
| 588 |
+
<span class="sender-label">📊 Job Fit</span>
|
| 589 |
+
<b>🎯 Target Role:</b> {target_role}<br>
|
| 590 |
+
<div style="
|
| 591 |
+
width: 120px; height: 120px; border-radius: 50%;
|
| 592 |
+
background: conic-gradient(#25D366 {percent * 3.6}deg, #e0e0e0 0deg);
|
| 593 |
+
display: flex; align-items: center; justify-content: center;
|
| 594 |
+
font-size: 1.8rem; color: #333; margin: 10px auto;">
|
| 595 |
+
{percent}%
|
| 596 |
+
</div>
|
| 597 |
+
<b>Missing Skills:</b><br>{missing_html}<br><br>
|
| 598 |
+
<b>Suggestions:</b><br>{suggestions_html}
|
| 599 |
+
</div>
|
| 600 |
+
</div>
|
| 601 |
+
""", unsafe_allow_html=True)
|
| 602 |
+
|
| 603 |
+
# --- Section text format ---
|
| 604 |
+
elif "result" in parsed:
|
| 605 |
+
text = parsed["result"]
|
| 606 |
+
st.markdown(f"""
|
| 607 |
+
<div class="chat-row ai">
|
| 608 |
+
<img class="avatar" src="https://img.icons8.com/ios-filled/50/1a237e/robot-2.png" alt="Tool"/>
|
| 609 |
+
<div class="chat-bubble bubble-ai">
|
| 610 |
+
<span class="sender-label">📄 Section Content</span>
|
| 611 |
+
{text}
|
| 612 |
+
</div>
|
| 613 |
+
</div>
|
| 614 |
+
""", unsafe_allow_html=True)
|
| 615 |
+
|
| 616 |
+
else:
|
| 617 |
+
st.markdown(
|
| 618 |
+
f"""
|
| 619 |
+
<div class="chat-row">
|
| 620 |
+
<div class="chat-bubble bubble-unknown">
|
| 621 |
+
<span class="sender-label">⚠️ Unknown</span>
|
| 622 |
+
{getattr(msg, 'content', str(msg))}
|
| 623 |
+
</div>
|
| 624 |
+
</div>
|
| 625 |
+
""",
|
| 626 |
+
unsafe_allow_html=True,
|
| 627 |
+
)
|
| 628 |
+
st.markdown('<div style="clear:both"></div>', unsafe_allow_html=True)
|
| 629 |
+
|
| 630 |
+
st.markdown("---")
|
| 631 |
+
|
| 632 |
+
user_input = st.chat_input(
|
| 633 |
+
placeholder="Ask about your LinkedIn profile, e.g., 'Analyze my profile, how do I fit for AI role, how is my about section?'"
|
| 634 |
+
)
|
| 635 |
+
|
| 636 |
+
if user_input and user_input.strip():
|
| 637 |
+
state.setdefault("messages", []).append(HumanMessage(content=user_input.strip()))
|
| 638 |
+
validate_state(state)
|
| 639 |
+
thread_id = st.session_state.get("thread_id")
|
| 640 |
+
config = {"configurable": {"thread_id": thread_id}}
|
| 641 |
+
with st.spinner("Processing your request..."):
|
| 642 |
+
st.session_state.state = app_graph.invoke(state, config)
|
| 643 |
+
st.rerun()
|
chatbot_model.py
ADDED
|
@@ -0,0 +1,130 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
from typing import List, Dict, Any, Optional, Annotated
|
| 2 |
+
from pydantic import BaseModel, Field
|
| 3 |
+
from langchain_core.messages import BaseMessage
|
| 4 |
+
from langgraph.graph import add_messages
|
| 5 |
+
class ChatbotState(BaseModel):
|
| 6 |
+
def get(self, key, default=None):
|
| 7 |
+
"""
|
| 8 |
+
Allow dict-like .get() access for compatibility.
|
| 9 |
+
"""
|
| 10 |
+
# First try attribute directly
|
| 11 |
+
if hasattr(self, key):
|
| 12 |
+
return getattr(self, key)
|
| 13 |
+
# Fallback: check if it's in __dict__
|
| 14 |
+
return self.__dict__.get(key, default)
|
| 15 |
+
def setdefault(self, key, default):
|
| 16 |
+
"""
|
| 17 |
+
Dict-like setdefault: if attribute is None, set it to default and return it.
|
| 18 |
+
Otherwise, return existing value.
|
| 19 |
+
"""
|
| 20 |
+
if hasattr(self, key):
|
| 21 |
+
value = getattr(self, key)
|
| 22 |
+
if value is None:
|
| 23 |
+
setattr(self, key, default)
|
| 24 |
+
return default
|
| 25 |
+
return value
|
| 26 |
+
else:
|
| 27 |
+
# attribute does not exist: set it
|
| 28 |
+
setattr(self, key, default)
|
| 29 |
+
return default
|
| 30 |
+
profile: Dict[str, Any] = Field(..., description="Preprocessed / summarized profile data")
|
| 31 |
+
profile_url: Optional[str] = Field(
|
| 32 |
+
default=None,
|
| 33 |
+
description="Original LinkedIn profile URL provided by the user."
|
| 34 |
+
)
|
| 35 |
+
|
| 36 |
+
# Quick access sections (about, headline, skills etc.)
|
| 37 |
+
sections: Dict[str, str] = Field(..., description="Flattened profile sections for quick access")
|
| 38 |
+
|
| 39 |
+
# Enhancements and analysis results
|
| 40 |
+
enhanced_content: Dict[str, str] = Field(
|
| 41 |
+
default_factory=dict,
|
| 42 |
+
description=(
|
| 43 |
+
"Map of improved or rewritten profile sections generated by the ContentGenerator tool. "
|
| 44 |
+
"Keys are section names (e.g., 'about', 'headline'); values are enhanced text."
|
| 45 |
+
)
|
| 46 |
+
)
|
| 47 |
+
|
| 48 |
+
profile_analysis: Optional[Dict[str, Any]] = Field(
|
| 49 |
+
None,
|
| 50 |
+
description=(
|
| 51 |
+
"Structured analysis of the user's profile produced by the ProfileAnalyzer tool, "
|
| 52 |
+
"including strengths, weaknesses, and actionable suggestions."
|
| 53 |
+
)
|
| 54 |
+
)
|
| 55 |
+
|
| 56 |
+
job_fit: Optional[Dict[str, Any]] = Field(
|
| 57 |
+
None,
|
| 58 |
+
description=(
|
| 59 |
+
"Assessment result from the JobMatcher tool, detailing how well the user's profile matches "
|
| 60 |
+
"the target role, including missing skills and match score."
|
| 61 |
+
)
|
| 62 |
+
)
|
| 63 |
+
|
| 64 |
+
target_role: Optional[str] = Field(
|
| 65 |
+
None,
|
| 66 |
+
description=(
|
| 67 |
+
"Target job role the user is aiming for. "
|
| 68 |
+
"Can be set by the user directly during the conversation or inferred by the chatbot."
|
| 69 |
+
)
|
| 70 |
+
)
|
| 71 |
+
|
| 72 |
+
editing_section: Optional[str] = Field(
|
| 73 |
+
None,
|
| 74 |
+
description=(
|
| 75 |
+
"Name of the profile section currently being edited or improved, "
|
| 76 |
+
"set dynamically when the ContentGenerator tool is invoked."
|
| 77 |
+
)
|
| 78 |
+
)
|
| 79 |
+
next_tool_name: Optional[str] = Field(
|
| 80 |
+
default=None,
|
| 81 |
+
description="Name of the next tool the chatbot wants to call, set dynamically after LLM response."
|
| 82 |
+
)
|
| 83 |
+
|
| 84 |
+
|
| 85 |
+
# Annotated chat history directly using BaseMessage
|
| 86 |
+
messages: Annotated[List[BaseMessage], add_messages] = Field(
|
| 87 |
+
default_factory=list,
|
| 88 |
+
description="List of user and assistant messages"
|
| 89 |
+
)
|
| 90 |
+
|
| 91 |
+
|
| 92 |
+
|
| 93 |
+
class ProfileAnalysisStrengths(BaseModel):
|
| 94 |
+
technical: List[str]
|
| 95 |
+
projects: List[str]
|
| 96 |
+
education: List[str]
|
| 97 |
+
soft_skills: List[str]
|
| 98 |
+
|
| 99 |
+
class ProfileAnalysisWeaknesses(BaseModel):
|
| 100 |
+
technical_gaps: List[str]
|
| 101 |
+
project_or_experience_gaps: List[str]
|
| 102 |
+
missing_context: List[str]
|
| 103 |
+
|
| 104 |
+
class ProfileAnalysisModel(BaseModel):
|
| 105 |
+
strengths: ProfileAnalysisStrengths
|
| 106 |
+
weaknesses: ProfileAnalysisWeaknesses
|
| 107 |
+
suggestions: List[str]
|
| 108 |
+
|
| 109 |
+
class JobFitModel(BaseModel):
|
| 110 |
+
match_score: int = Field(..., ge=0, le=100)
|
| 111 |
+
missing_skills: List[str]
|
| 112 |
+
suggestions: List[str]
|
| 113 |
+
|
| 114 |
+
class ContentGenerationModel(BaseModel):
|
| 115 |
+
new_content: str
|
| 116 |
+
|
| 117 |
+
|
| 118 |
+
# ========== 6. MEMORY SETUP ==========
|
| 119 |
+
|
| 120 |
+
class UserMemory:
|
| 121 |
+
def __init__(self):
|
| 122 |
+
self.profile = None
|
| 123 |
+
self.target_roles = []
|
| 124 |
+
self.history = []
|
| 125 |
+
|
| 126 |
+
def save(self, key, value):
|
| 127 |
+
self.history.append((key, value))
|
| 128 |
+
|
| 129 |
+
def get_history(self):
|
| 130 |
+
return self.history
|
llm_utils.py
ADDED
|
@@ -0,0 +1,83 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
import time
|
| 2 |
+
from typing import Type, Union, Dict, Any
|
| 3 |
+
from pydantic import BaseModel
|
| 4 |
+
import dirtyjson
|
| 5 |
+
import re
|
| 6 |
+
# Make sure you install dirtyjson: pip install dirtyjson
|
| 7 |
+
|
| 8 |
+
# === Optionally, import your Groq client from where you configure it ===
|
| 9 |
+
|
| 10 |
+
# === Helper function ===
|
| 11 |
+
|
| 12 |
+
def call_llm_and_parse(
|
| 13 |
+
groq_client,
|
| 14 |
+
prompt: str,
|
| 15 |
+
model: Type[BaseModel],
|
| 16 |
+
max_retries: int = 3,
|
| 17 |
+
delay: float = 1.0
|
| 18 |
+
) -> Union[BaseModel, Dict[str, Any]]:
|
| 19 |
+
"""
|
| 20 |
+
Call LLM with a prompt, parse the JSON response, and validate it using a Pydantic model.
|
| 21 |
+
|
| 22 |
+
Args:
|
| 23 |
+
prompt (str): The prompt to send to the LLM.
|
| 24 |
+
model (Type[BaseModel]): The Pydantic model to validate against.
|
| 25 |
+
max_retries (int, optional): Number of retries on failure. Default is 3.
|
| 26 |
+
delay (float, optional): Delay (in seconds) between retries, multiplied by attempt count.
|
| 27 |
+
|
| 28 |
+
Returns:
|
| 29 |
+
BaseModel: Validated Pydantic model instance if successful.
|
| 30 |
+
dict: Contains 'error' and 'raw' fields if validation fails after retries.
|
| 31 |
+
"""
|
| 32 |
+
for attempt in range(1, max_retries + 1):
|
| 33 |
+
try:
|
| 34 |
+
print(f"[call_llm_and_parse] Attempt {attempt}: sending prompt to LLM...")
|
| 35 |
+
|
| 36 |
+
completion = groq_client.chat.completions.create(
|
| 37 |
+
model="llama3-8b-8192",
|
| 38 |
+
messages=[{"role": "user", "content": prompt}],
|
| 39 |
+
temperature=0.3,
|
| 40 |
+
max_tokens=800
|
| 41 |
+
)
|
| 42 |
+
|
| 43 |
+
response_text = completion.choices[0].message.content
|
| 44 |
+
print(f"[call_llm_and_parse] Raw LLM response: {response_text[:200]}...") # first 200 chars
|
| 45 |
+
|
| 46 |
+
# Extract JSON (handle dirty or partial JSON)
|
| 47 |
+
json_str = extract_and_repair_json(response_text)
|
| 48 |
+
|
| 49 |
+
# Parse JSON using dirtyjson
|
| 50 |
+
parsed = dirtyjson.loads(json_str)
|
| 51 |
+
|
| 52 |
+
# Validate with Pydantic
|
| 53 |
+
validated = model.model_validate(parsed)
|
| 54 |
+
|
| 55 |
+
print("[call_llm_and_parse] Successfully parsed and validated.")
|
| 56 |
+
return validated
|
| 57 |
+
|
| 58 |
+
except Exception as e:
|
| 59 |
+
print(f"[Retry {attempt}] Error: {e}")
|
| 60 |
+
if attempt < max_retries:
|
| 61 |
+
time.sleep(delay * attempt)
|
| 62 |
+
else:
|
| 63 |
+
print("[call_llm_and_parse] Failed after retries.")
|
| 64 |
+
return {
|
| 65 |
+
"error": f"Validation failed after {max_retries} retries: {e}",
|
| 66 |
+
"raw": json_str if 'json_str' in locals() else response_text
|
| 67 |
+
}
|
| 68 |
+
|
| 69 |
+
|
| 70 |
+
def extract_and_repair_json(text: str) -> str:
|
| 71 |
+
"""
|
| 72 |
+
Extracts JSON starting from first '{' and balances braces.
|
| 73 |
+
"""
|
| 74 |
+
match = re.search(r'\{[\s\S]*', text)
|
| 75 |
+
if not match:
|
| 76 |
+
raise ValueError("No JSON object found.")
|
| 77 |
+
json_str = match.group()
|
| 78 |
+
# Fix unmatched braces
|
| 79 |
+
open_braces = json_str.count('{')
|
| 80 |
+
close_braces = json_str.count('}')
|
| 81 |
+
if open_braces > close_braces:
|
| 82 |
+
json_str += '}' * (open_braces - close_braces)
|
| 83 |
+
return json_str
|
profile_preprocessing.py
ADDED
|
@@ -0,0 +1,127 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
from typing import List, Dict, Any
|
| 2 |
+
from urllib.parse import urlparse
|
| 3 |
+
# ========== 3. PROFILE PREPROCESSING HELPERS ==========
|
| 4 |
+
def normalize_url(url):
|
| 5 |
+
return url.strip().rstrip('/')
|
| 6 |
+
|
| 7 |
+
def summarize_skills(skills: List[Dict]) -> str:
|
| 8 |
+
return ', '.join([s.get('title', '') for s in skills if s.get('title')])
|
| 9 |
+
|
| 10 |
+
def summarize_projects(projects: List[Dict]) -> str:
|
| 11 |
+
summaries = []
|
| 12 |
+
for p in projects:
|
| 13 |
+
title = p.get('title', '')
|
| 14 |
+
desc = ''
|
| 15 |
+
if p.get('subComponents'):
|
| 16 |
+
for comp in p['subComponents']:
|
| 17 |
+
for d in comp.get('description', []):
|
| 18 |
+
if d.get('type') == 'textComponent':
|
| 19 |
+
desc += d.get('text', '') + ' '
|
| 20 |
+
summaries.append(f"{title}: {desc.strip()}")
|
| 21 |
+
return '\n'.join(summaries)
|
| 22 |
+
|
| 23 |
+
def summarize_educations(educations: List[Dict]) -> str:
|
| 24 |
+
return ', '.join([
|
| 25 |
+
f"{e.get('title', '')} ({e.get('subtitle', '')}, {e.get('caption', '')})"
|
| 26 |
+
for e in educations if e.get('title')
|
| 27 |
+
])
|
| 28 |
+
|
| 29 |
+
def summarize_certs(certs: List[Dict]) -> str:
|
| 30 |
+
return ', '.join([
|
| 31 |
+
f"{c.get('title', '')} ({c.get('subtitle', '')}, {c.get('caption', '')})"
|
| 32 |
+
for c in certs if c.get('title')
|
| 33 |
+
])
|
| 34 |
+
|
| 35 |
+
def summarize_test_scores(scores: List[Dict]) -> str:
|
| 36 |
+
return ', '.join([
|
| 37 |
+
f"{s.get('title', '')} ({s.get('subtitle', '')})"
|
| 38 |
+
for s in scores if s.get('title')
|
| 39 |
+
])
|
| 40 |
+
|
| 41 |
+
def summarize_generic(items: List[Dict], key='title') -> str:
|
| 42 |
+
return ', '.join([item.get(key, '') for item in items if item.get(key)])
|
| 43 |
+
|
| 44 |
+
|
| 45 |
+
# === Preprocess raw profile into summarized profile ===
|
| 46 |
+
def preprocess_profile(raw_profile: Dict[str, Any]) -> Dict[str, str]:
|
| 47 |
+
return {
|
| 48 |
+
"FullName": raw_profile.get("fullName", ""),
|
| 49 |
+
"profile_url": raw_profile.get("linkedinUrl",""),
|
| 50 |
+
"Headline": raw_profile.get("headline", ""),
|
| 51 |
+
"JobTitle": raw_profile.get("jobTitle", ""),
|
| 52 |
+
"CompanyName": raw_profile.get("companyName", ""),
|
| 53 |
+
"CompanyIndustry": raw_profile.get("companyIndustry", ""),
|
| 54 |
+
"CurrentJobDuration": str(raw_profile.get("currentJobDuration", "")),
|
| 55 |
+
"About": raw_profile.get("about", ""),
|
| 56 |
+
"Experiences": summarize_generic(raw_profile.get("experiences", []), key='title'),
|
| 57 |
+
"Skills": summarize_skills(raw_profile.get("skills", [])),
|
| 58 |
+
"Educations": summarize_educations(raw_profile.get("educations", [])),
|
| 59 |
+
"Certifications": summarize_certs(raw_profile.get("licenseAndCertificates", [])),
|
| 60 |
+
"HonorsAndAwards": summarize_generic(raw_profile.get("honorsAndAwards", []), key='title'),
|
| 61 |
+
"Verifications": summarize_generic(raw_profile.get("verifications", []), key='title'),
|
| 62 |
+
"Highlights": summarize_generic(raw_profile.get("highlights", []), key='title'),
|
| 63 |
+
"Projects": summarize_projects(raw_profile.get("projects", [])),
|
| 64 |
+
"Publications": summarize_generic(raw_profile.get("publications", []), key='title'),
|
| 65 |
+
"Patents": summarize_generic(raw_profile.get("patents", []), key='title'),
|
| 66 |
+
"Courses": summarize_generic(raw_profile.get("courses", []), key='title'),
|
| 67 |
+
"TestScores": summarize_test_scores(raw_profile.get("testScores", []))
|
| 68 |
+
}
|
| 69 |
+
|
| 70 |
+
# === Create & fill state ===
|
| 71 |
+
|
| 72 |
+
|
| 73 |
+
def initialize_state(raw_profile: Dict[str, Any]) -> Dict[str,Any]:
|
| 74 |
+
"""
|
| 75 |
+
Initializes the chatbot state used in LangGraph:
|
| 76 |
+
- Keeps both raw and processed profile
|
| 77 |
+
- Splits important sections for quick access
|
| 78 |
+
- Initializes placeholders for tool outputs
|
| 79 |
+
- Adds empty chat history for conversation context
|
| 80 |
+
"""
|
| 81 |
+
# Your preprocessing function that cleans / normalizes scraped profile
|
| 82 |
+
profile = preprocess_profile(raw_profile)
|
| 83 |
+
print(f"initializing url as {profile['profile_url']}")
|
| 84 |
+
|
| 85 |
+
state: Dict[str, Any] = {
|
| 86 |
+
"profile": profile, # Cleaned & normalized profile
|
| 87 |
+
"profile_url": normalize_url(profile.get("profile_url","") or ""),
|
| 88 |
+
|
| 89 |
+
# === Separate sections (make sure all are strings, never None) ===
|
| 90 |
+
"sections": {
|
| 91 |
+
"about": profile.get("About", "") or "",
|
| 92 |
+
"headline": profile.get("Headline", "") or "",
|
| 93 |
+
"skills": profile.get("Skills", "") or "",
|
| 94 |
+
"projects": profile.get("Projects", "") or "",
|
| 95 |
+
"educations": profile.get("Educations", "") or "",
|
| 96 |
+
"certifications": profile.get("Certifications", "") or "",
|
| 97 |
+
"honors_and_awards": profile.get("HonorsAndAwards", "") or "",
|
| 98 |
+
"experiences": profile.get("Experiences", "") or "",
|
| 99 |
+
"publications": profile.get("Publications", "") or "",
|
| 100 |
+
"patents": profile.get("Patents", "") or "",
|
| 101 |
+
"courses": profile.get("Courses", "") or "",
|
| 102 |
+
"test_scores": profile.get("TestScores", "") or "",
|
| 103 |
+
"verifications": profile.get("Verifications", "") or "",
|
| 104 |
+
"highlights": profile.get("Highlights", "") or "",
|
| 105 |
+
"job_title": profile.get("JobTitle", "") or "",
|
| 106 |
+
"company_name": profile.get("CompanyName", "") or "",
|
| 107 |
+
"company_industry": profile.get("CompanyIndustry", "") or "",
|
| 108 |
+
"current_job_duration": profile.get("CurrentJobDuration", "") or "",
|
| 109 |
+
"full_name": profile.get("FullName", "") or ""
|
| 110 |
+
},
|
| 111 |
+
|
| 112 |
+
# === Placeholders populated by tools ===
|
| 113 |
+
"enhanced_content": {}, # Populated by ContentGenerator tool
|
| 114 |
+
"profile_analysis": None, # Can be None initially (Optional)
|
| 115 |
+
"job_fit": None, # Can be None initially (Optional)
|
| 116 |
+
"target_role": None, # Optional[str]
|
| 117 |
+
"editing_section": None, # Optional[str]
|
| 118 |
+
|
| 119 |
+
# === Chat history ===
|
| 120 |
+
# Pydantic expects list of dicts like {"role": "user", "content": "..."}
|
| 121 |
+
"messages": [],
|
| 122 |
+
"next_tool_name": None
|
| 123 |
+
}
|
| 124 |
+
|
| 125 |
+
|
| 126 |
+
return state
|
| 127 |
+
|
requirements.txt
ADDED
|
@@ -0,0 +1,31 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
apify-client
|
| 2 |
+
python-dotenv
|
| 3 |
+
streamlit
|
| 4 |
+
langchain
|
| 5 |
+
openai
|
| 6 |
+
python-dotenv
|
| 7 |
+
langchain_openai
|
| 8 |
+
# Core Python packages
|
| 9 |
+
python-dotenv
|
| 10 |
+
streamlit
|
| 11 |
+
pydantic
|
| 12 |
+
|
| 13 |
+
# LangChain ecosystem
|
| 14 |
+
langchain-core
|
| 15 |
+
langchain-openai
|
| 16 |
+
langgraph
|
| 17 |
+
langgraph-checkpoint
|
| 18 |
+
|
| 19 |
+
# For OpenAI-compatible LLMs (Groq, etc.)
|
| 20 |
+
openai
|
| 21 |
+
|
| 22 |
+
# For parsing "dirty" JSON
|
| 23 |
+
dirtyjson
|
| 24 |
+
|
| 25 |
+
typing-extensions
|
| 26 |
+
|
| 27 |
+
tqdm
|
| 28 |
+
|
| 29 |
+
|
| 30 |
+
|
| 31 |
+
|
scraping_profile.py
ADDED
|
@@ -0,0 +1,42 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
from apify_client import ApifyClient
|
| 2 |
+
from dotenv import load_dotenv
|
| 3 |
+
import os
|
| 4 |
+
import json
|
| 5 |
+
|
| 6 |
+
# Load environment variables
|
| 7 |
+
load_dotenv()
|
| 8 |
+
|
| 9 |
+
# Get API token
|
| 10 |
+
api_token = os.getenv("APIFY_API_TOKEN")
|
| 11 |
+
|
| 12 |
+
# Initialize client once (global)
|
| 13 |
+
client = ApifyClient(api_token)
|
| 14 |
+
|
| 15 |
+
|
| 16 |
+
def scrape_linkedin_profile(profile_url: str) -> dict:
|
| 17 |
+
"""
|
| 18 |
+
📄 Scrapes a LinkedIn profile using Apify and returns the data as a Python dict.
|
| 19 |
+
"""
|
| 20 |
+
try:
|
| 21 |
+
run_input = {"profileUrls": [profile_url]}
|
| 22 |
+
run = client.actor("dev_fusion/Linkedin-Profile-Scraper").call(run_input=run_input)
|
| 23 |
+
|
| 24 |
+
items = list(client.dataset(run["defaultDatasetId"]).iterate_items())
|
| 25 |
+
|
| 26 |
+
if items:
|
| 27 |
+
with open("scraped_profile.json", "w") as f:
|
| 28 |
+
json.dump(items[0], f, indent=2)
|
| 29 |
+
return items[0]
|
| 30 |
+
else:
|
| 31 |
+
print("⚠️ No data found in dataset.")
|
| 32 |
+
return {}
|
| 33 |
+
except Exception as e:
|
| 34 |
+
print(f"❌ Error during scraping: {e}")
|
| 35 |
+
return {}
|
| 36 |
+
|
| 37 |
+
|
| 38 |
+
# 🧪 OPTIONAL: test code only runs when this file is executed directly
|
| 39 |
+
if __name__ == "__main__":
|
| 40 |
+
test_url = "https://www.linkedin.com/in/sri-vallabh-tammireddy/"
|
| 41 |
+
profile = scrape_linkedin_profile(test_url)
|
| 42 |
+
print(json.dumps(profile, indent=2))
|