tsrivallabh commited on
Commit
5318b09
·
verified ·
1 Parent(s): c73d998

Upload 10 files

Browse files
Files changed (10) hide show
  1. .gitattributes +3 -35
  2. .gitignore +1 -0
  3. Dockerfile +28 -0
  4. README.md +406 -0
  5. app.py +643 -0
  6. chatbot_model.py +130 -0
  7. llm_utils.py +83 -0
  8. profile_preprocessing.py +127 -0
  9. requirements.txt +31 -0
  10. scraping_profile.py +42 -0
.gitattributes CHANGED
@@ -1,35 +1,3 @@
1
- *.7z filter=lfs diff=lfs merge=lfs -text
2
- *.arrow filter=lfs diff=lfs merge=lfs -text
3
- *.bin filter=lfs diff=lfs merge=lfs -text
4
- *.bz2 filter=lfs diff=lfs merge=lfs -text
5
- *.ckpt filter=lfs diff=lfs merge=lfs -text
6
- *.ftz filter=lfs diff=lfs merge=lfs -text
7
- *.gz filter=lfs diff=lfs merge=lfs -text
8
- *.h5 filter=lfs diff=lfs merge=lfs -text
9
- *.joblib filter=lfs diff=lfs merge=lfs -text
10
- *.lfs.* filter=lfs diff=lfs merge=lfs -text
11
- *.mlmodel filter=lfs diff=lfs merge=lfs -text
12
- *.model filter=lfs diff=lfs merge=lfs -text
13
- *.msgpack filter=lfs diff=lfs merge=lfs -text
14
- *.npy filter=lfs diff=lfs merge=lfs -text
15
- *.npz filter=lfs diff=lfs merge=lfs -text
16
- *.onnx filter=lfs diff=lfs merge=lfs -text
17
- *.ot filter=lfs diff=lfs merge=lfs -text
18
- *.parquet filter=lfs diff=lfs merge=lfs -text
19
- *.pb filter=lfs diff=lfs merge=lfs -text
20
- *.pickle filter=lfs diff=lfs merge=lfs -text
21
- *.pkl filter=lfs diff=lfs merge=lfs -text
22
- *.pt filter=lfs diff=lfs merge=lfs -text
23
- *.pth filter=lfs diff=lfs merge=lfs -text
24
- *.rar filter=lfs diff=lfs merge=lfs -text
25
- *.safetensors filter=lfs diff=lfs merge=lfs -text
26
- saved_model/**/* filter=lfs diff=lfs merge=lfs -text
27
- *.tar.* filter=lfs diff=lfs merge=lfs -text
28
- *.tar filter=lfs diff=lfs merge=lfs -text
29
- *.tflite filter=lfs diff=lfs merge=lfs -text
30
- *.tgz filter=lfs diff=lfs merge=lfs -text
31
- *.wasm filter=lfs diff=lfs merge=lfs -text
32
- *.xz filter=lfs diff=lfs merge=lfs -text
33
- *.zip filter=lfs diff=lfs merge=lfs -text
34
- *.zst filter=lfs diff=lfs merge=lfs -text
35
- *tfevents* filter=lfs diff=lfs merge=lfs -text
 
1
+ # Auto detect text files and perform LF normalization
2
+ * text=auto
3
+ # *.png filter=lfs diff=lfs merge=lfs -text
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
.gitignore ADDED
@@ -0,0 +1 @@
 
 
1
+ .env
Dockerfile ADDED
@@ -0,0 +1,28 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+
2
+
3
+ # Use an official Python runtime as a parent image
4
+ FROM python:3.11-slim
5
+
6
+
7
+ # Set the working directory in the container
8
+ WORKDIR /app
9
+
10
+ ENV HF_HOME=/data/hf_cache
11
+ ENV TRANSFORMERS_CACHE=/data/hf_cache/transformers
12
+ ENV HF_DATASETS_CACHE=/data/hf_cache/datasets
13
+ ENV HF_HUB_CACHE=/data/hf_cache/hub
14
+
15
+ RUN mkdir -p /data/hf_cache/transformers /data/hf_cache/datasets /data/hf_cache/hub && chmod -R 777 /data/hf_cache
16
+
17
+ # Copy requirements.txt and install dependencies
18
+ COPY requirements.txt .
19
+ RUN pip install --no-cache-dir -r requirements.txt
20
+
21
+ # Copy the rest of your app's code
22
+ COPY . .
23
+
24
+ # Expose the port Streamlit runs on
25
+ EXPOSE 8501
26
+
27
+ # Run Streamlit
28
+ CMD ["streamlit", "run", "app.py", "--server.port=8501", "--server.address=0.0.0.0"]
README.md ADDED
@@ -0,0 +1,406 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ ---
2
+ title: Linkedin Assistant
3
+ emoji: 🚀
4
+ colorFrom: red
5
+ colorTo: red
6
+ sdk: docker
7
+ app_port: 8501
8
+ tags:
9
+ - streamlit
10
+ pinned: false
11
+ short_description: Streamlit template space
12
+ license: mit
13
+ ---
14
+
15
+
16
+ # 🤖 LinkedIn AI Career Assistant
17
+
18
+ [![Hugging Face Spaces](https://img.shields.io/badge/🤗%20Hugging%20Face-Spaces-blue)](https://sri-vallabh-linkedin-profile-ai-assistant-app-ffuh1c.streamlit.app/)
19
+ [![Python](https://img.shields.io/badge/Python-3.8+-blue.svg)](https://www.python.org/downloads/)
20
+ [![Streamlit](https://img.shields.io/badge/Streamlit-1.28+-red.svg)](https://streamlit.io/)
21
+ [![LangGraph](https://img.shields.io/badge/LangGraph-latest-green.svg)](https://langchain-ai.github.io/langgraph/)
22
+ [![Groq](https://img.shields.io/badge/Groq-llama3--8b--8192-orange.svg)](https://groq.com/)
23
+
24
+ An intelligent AI-powered career assistant that analyzes LinkedIn profiles, provides job fit analysis, and offers personalized career guidance through an interactive chat interface powered by Groq's llama3-8b-8192 model.
25
+
26
+ ## 🚀 **Live Demo**
27
+
28
+ Try the application live at: **https://sri-vallabh-linkedin-profile-ai-assistant-app-ffuh1c.streamlit.app/**
29
+
30
+ ## 📋 **Table of Contents**
31
+
32
+ - [Overview](#overview)
33
+ - [Key Features](#key-features)
34
+ - [Architecture](#architecture)
35
+ - [Installation](#installation)
36
+ - [Usage](#usage)
37
+ - [Technical Implementation](#technical-implementation)
38
+ - [API Keys Setup](#api-keys-setup)
39
+ - [Session Management](#session-management)
40
+ - [Contributing](#contributing)
41
+ - [License](#license)
42
+
43
+ ## 🎯 **Overview**
44
+
45
+ The LinkedIn AI Career Assistant is a sophisticated career optimization tool that combines Groq's powerful llama3-8b-8192 model with LangGraph's multi-agent framework to provide comprehensive LinkedIn profile analysis. Built using **Streamlit**, **LangGraph**, and **Groq API**, this application offers an interactive chat-based experience for professional career development.
46
+
47
+ ### **What Makes This Special?**
48
+
49
+ - **🧠 Multi-Agent AI System**: Utilizes LangGraph to orchestrate specialized AI tools for different analysis tasks
50
+ - **💾 Thread-Based Sessions**: Maintains conversation context with intelligent thread management based on LinkedIn URLs
51
+ - **🎯 Job Fit Analysis**: Provides detailed match scores and improvement suggestions for target roles
52
+ - **📊 Profile Analysis**: Comprehensive strengths and weaknesses assessment
53
+ - **🔄 Real-time Scraping**: Fetches live LinkedIn profile data using Apify integration
54
+ - **⚡ Groq-Powered**: Lightning-fast responses using Groq's optimized llama3-8b-8192 model
55
+
56
+ ## 🌟 **Key Features**
57
+
58
+ ### 1. **Interactive Chat Interface**
59
+ - **LinkedIn URL Input**: Simply paste your LinkedIn profile URL to get started
60
+ - **Conversational AI**: Natural language interaction for profile optimization
61
+ - **Real-time Analysis**: Instant feedback and suggestions as you chat
62
+ - **Custom Styling**: Modern chat bubble interface with professional design
63
+
64
+ ### 2. **Comprehensive Profile Analysis**
65
+ - **Strengths Identification**: Highlights technical skills, projects, education, and soft skills
66
+ - **Weakness Detection**: Identifies gaps in technical skills, experience, and missing context
67
+ - **Actionable Suggestions**: Provides specific recommendations for profile enhancement
68
+ - **Section-by-Section Access**: Detailed extraction of individual LinkedIn profile sections
69
+
70
+ ### 3. **Advanced Job Fit Analysis**
71
+ - **Match Score Calculation**: Quantifies how well your profile fits target roles (0-100%)
72
+ - **Skill Gap Analysis**: Identifies missing skills required for your target position
73
+ - **Role-Specific Feedback**: Tailored suggestions for improving job compatibility
74
+ - **Visual Score Display**: Circular progress indicators for match percentages
75
+
76
+ ### 4. **Intelligent Session Management**
77
+ - **URL-Based Threading**: Automatically finds existing conversations for the same LinkedIn profile
78
+ - **Session Continuity**: Choose to continue previous chats or start fresh
79
+ - **SQLite Persistence**: Robust conversation storage with automatic checkpointing
80
+ - **Thread Isolation**: Secure separation of different user sessions
81
+
82
+ ### 5. **Professional Data Handling**
83
+ - **Pydantic Validation**: Robust data validation using structured schemas
84
+ - **State Management**: Comprehensive state tracking across conversation flows
85
+ - **Error Handling**: Graceful handling of API failures and data parsing issues
86
+ - **Memory Optimization**: Efficient storage and retrieval of conversation context
87
+
88
+ ## 🏗️ **Architecture**
89
+
90
+ ### **Multi-Agent System Design**
91
+
92
+ ```
93
+ ┌─────────────────────────────────────────────────────────────┐
94
+ │ User Interface (Streamlit) │
95
+ │ Custom Chat Interface │
96
+ └─────────────────────┬─���─────────────────────────────────────┘
97
+
98
+ ┌─────────────────────┴───────────────────────────────────────┐
99
+ │ LangGraph Orchestrator │
100
+ │ (ChatbotState Schema) │
101
+ │ ┌─────────────────┬─────────────────┬─────────────────┐ │
102
+ │ │ Chatbot Node │ Profile Tool │ Job Match Tool │ │
103
+ │ │ (Router) │ (Analyzer) │ (Matcher) │ │
104
+ │ │ │ │ │ │
105
+ │ │ Extract Tool │ │ │ │
106
+ │ │ (Section Data) │ │ │ │
107
+ │ └─────────────────┴─────────────────┴─────────────────┘ │
108
+ └─────────────────────┬───────────────────────────────────────┘
109
+
110
+ ┌─────────────────────┴───────────────────────────────────────┐
111
+ │ External Services │
112
+ │ ┌─────────────────┬─────────────────┬─────────────────┐ │
113
+ │ │ Apify LinkedIn │ Groq API │ SQLite │ │
114
+ │ │ Scraper │ (llama3-8b-8192)│ Checkpointer │ │
115
+ │ └─────────────────┴─────────────────┴─────────────────┘ │
116
+ └─────────────────────────────────────────────────────────────┘
117
+ ```
118
+
119
+ ### **Core Components**
120
+
121
+ 1. **ChatBot Node**: Main conversation router with tool calling capabilities
122
+ 2. **Profile Analyzer**: Comprehensive profile evaluation for strengths and weaknesses
123
+ 3. **Job Matcher**: Role compatibility analysis with scoring and suggestions
124
+ 4. **Extract Tool**: Granular access to specific profile sections
125
+ 5. **State Management**: Pydantic-based ChatbotState with comprehensive field tracking
126
+ 6. **Thread System**: URL-based session identification and management
127
+
128
+ ## 🛠️ **Installation**
129
+
130
+ ### **Prerequisites**
131
+
132
+ - Python 3.8 or higher
133
+ - pip package manager
134
+ - Groq API key
135
+ - Apify API token
136
+
137
+ ### **Quick Start**
138
+
139
+ 1. **Clone the Repository**
140
+ ```bash
141
+ git clone https://github.com/Sri-Vallabh/Linkedin-Profile-AI-Assistant.git
142
+ cd Linkedin-Profile-AI-Assistant
143
+ ```
144
+
145
+ 2. **Install Dependencies**
146
+ ```bash
147
+ pip install -r requirements.txt
148
+ ```
149
+
150
+ 3. **Set Up Environment Variables**
151
+ ```bash
152
+ cp .env.example .env
153
+ # Edit .env with your API keys
154
+ ```
155
+
156
+ 4. **Run the Application**
157
+ ```bash
158
+ streamlit run app.py
159
+ ```
160
+
161
+ 5. **Access the Application**
162
+ ```
163
+ Open your browser and go to: http://localhost:8501
164
+ ```
165
+
166
+ ### **Requirements**
167
+
168
+ ```txt
169
+ streamlit>=1.28.0
170
+ langchain>=0.0.350
171
+ langchain-openai>=0.0.8
172
+ langgraph>=0.0.55
173
+ openai>=1.3.0
174
+ pydantic>=2.0.0
175
+ python-dotenv>=1.0.0
176
+ apify-client>=1.0.0
177
+ dirtyjson>=1.0.8
178
+ ```
179
+
180
+ ## 📖 **Usage**
181
+
182
+ ### **Getting Started**
183
+
184
+ 1. **Launch the Application**
185
+ - Open the application in your browser
186
+ - You'll see the main interface with a LinkedIn URL input field
187
+
188
+ 2. **Enter Your LinkedIn Profile**
189
+ - Paste your LinkedIn profile URL (e.g., `https://www.linkedin.com/in/your-profile/`)
190
+ - The system will automatically scrape and analyze your profile
191
+
192
+ 3. **Choose Session Mode**
193
+ - If a previous session exists, choose to continue or start fresh
194
+ - New sessions initialize with full profile preprocessing
195
+
196
+ 4. **Start Chatting**
197
+ - Begin conversations with queries like:
198
+ - "Analyze my profile strengths and weaknesses"
199
+ - "I want to apply for a Data Scientist role"
200
+ - "Show me my about section"
201
+ - "What skills am I missing for a Software Engineer position?"
202
+
203
+ ### **Available Commands**
204
+
205
+ - **Profile Analysis**: "Analyze my profile" - Full strengths/weaknesses analysis
206
+ - **Job Matching**: "I want to apply for [role]" - Match score and skill gaps
207
+ - **Section Access**: "Show me my [section]" - Extract specific profile sections
208
+ - **General Queries**: Ask any career-related questions for guidance
209
+
210
+ ### **Sample Conversations**
211
+
212
+ ```
213
+ User: "Analyze my LinkedIn profile"
214
+ AI: ✅ Profile analysis complete!
215
+
216
+ 💪 Strengths
217
+ - Technical: Python, Machine Learning, Data Analysis
218
+ - Projects: E-commerce recommendation system, Stock prediction model
219
+ - Education: Computer Science degree, Data Science certification
220
+ - Soft Skills: Problem-solving, Team collaboration
221
+
222
+ ⚠️ Weaknesses
223
+ - Technical Gaps: Cloud computing platforms, MLOps tools
224
+ - Project/Experience Gaps: Limited production deployment experience
225
+ - Missing Context: Quantified project impacts and metrics
226
+
227
+ 🛠 Suggestions to improve
228
+ - Add AWS/Azure cloud certifications
229
+ - Include specific metrics for project outcomes
230
+ - Highlight leadership or mentoring experiences
231
+ ```
232
+
233
+ ```
234
+ User: "I want to apply for a Senior Data Scientist role"
235
+ AI: 📊 Job Fit Analysis
236
+
237
+ 🎯 Target Role: Senior Data Scientist
238
+ Match Score: 78%
239
+
240
+ Missing Skills:
241
+ • Deep Learning frameworks (TensorFlow, PyTorch)
242
+ • MLOps and model deployment
243
+ • Leadership and team management experience
244
+
245
+ Suggestions:
246
+ • Complete online courses in deep learning
247
+ • Build projects showcasing end-to-end ML pipelines
248
+ • Seek opportunities to lead junior team members
249
+ ```
250
+
251
+ ## 🔧 **Technical Implementation**
252
+
253
+ ### **State Management**
254
+
255
+ The application uses a sophisticated Pydantic-based state management system:
256
+
257
+ ```python
258
+ class ChatbotState(BaseModel):
259
+ profile: Dict[str, Any] # Processed LinkedIn profile data
260
+ profile_url: Optional[str] # Original LinkedIn URL
261
+ sections: Dict[str, str] # Individual profile sections
262
+ enhanced_content: Dict[str, str] # Future AI-generated improvements
263
+ profile_analysis: Optional[Dict[str, Any]] # Strengths/weaknesses
264
+ job_fit: Optional[Dict[str, Any]] # Job matching results
265
+ target_role: Optional[str] # User's target job role
266
+ messages: Annotated[List[BaseMessage], add_messages] # Chat history
267
+ next_tool_name: Optional[str] # Tool routing information
268
+ ```
269
+
270
+ ### **Tool Integration**
271
+
272
+ The system includes three specialized tools:
273
+
274
+ 1. **Profile Analyzer Tool**:
275
+ - Comprehensive profile evaluation
276
+ - Structured output with strengths, weaknesses, suggestions
277
+ - Uses ProfileAnalysisModel for validation
278
+
279
+ 2. **Job Matcher Tool**:
280
+ - Role-specific compatibility analysis
281
+ - Calculates match scores (0-100%)
282
+ - Identifies missing skills and provides suggestions
283
+
284
+ 3. **Extract Tool**:
285
+ - Granular access to profile sections
286
+ - Supports nested data extraction with dot notation
287
+ - Returns structured results for specific queries
288
+
289
+ ### **Session Architecture**
290
+
291
+ - **Thread Management**: URL-based thread identification for session continuity
292
+ - **Checkpointing**: SQLite-based persistent storage with automatic fallback
293
+ - **State Validation**: Comprehensive Pydantic validation for data integrity
294
+ - **Memory Optimization**: Efficient message history management
295
+
296
+ ### **LLM Integration**
297
+
298
+ - **Model**: Groq's llama3-8b-8192 for fast, high-quality responses
299
+ - **API**: OpenAI-compatible interface through Groq
300
+ - **Tool Calling**: Native support for structured tool invocation
301
+ - **Error Handling**: Robust retry mechanisms and graceful degradation
302
+
303
+ ## 🔑 **API Keys Setup**
304
+
305
+ Create a `.env` file in the root directory:
306
+
307
+ ```env
308
+ # Groq API Key (required)
309
+ GROQ_API_KEY=your_groq_api_key_here
310
+
311
+ # Apify API Token (required for LinkedIn scraping)
312
+ APIFY_API_TOKEN=your_apify_token_here
313
+ ```
314
+
315
+ ### **Getting API Keys**
316
+
317
+ 1. **Groq API Key**:
318
+ - Visit [Groq Console](https://console.groq.com/)
319
+ - Create an account and generate an API key
320
+ - Used for llama3-8b-8192 model inference
321
+
322
+ 2. **Apify API Token**:
323
+ - Go to [Apify Console](https://console.apify.com/)
324
+ - Sign up and get your API token
325
+ - Used for LinkedIn profile scraping
326
+
327
+ ## 💾 **Session Management**
328
+
329
+ The application implements intelligent session management:
330
+
331
+ ### **Thread-Based System**
332
+ - Each LinkedIn profile URL gets a unique thread ID
333
+ - Automatic detection of existing conversations for the same profile
334
+ - Secure isolation between different user sessions
335
+
336
+ ### **Conversation Persistence**
337
+ - SQLite-based storage for production environments
338
+ - Memory-based fallback for development/testing
339
+ - Automatic checkpointing after each interaction
340
+ - Recovery capability in case of interruptions
341
+
342
+ ### **User Experience**
343
+ - Choice to continue previous conversations or start fresh
344
+ - Seamless transition between sessions
345
+ - Maintained conversation context across browser refreshes
346
+
347
+ ## 🤝 **Contributing**
348
+
349
+ We welcome contributions to improve the LinkedIn AI Career Assistant! Here's how you can help:
350
+
351
+ ### **Development Setup**
352
+
353
+ 1. Fork the repository
354
+ 2. Create a feature branch: `git checkout -b feature/your-feature-name`
355
+ 3. Make your changes and test thoroughly
356
+ 4. Submit a pull request with a clear description
357
+
358
+ ### **Areas for Contribution**
359
+
360
+ - **Tool Enhancement**: Implement the commented-out content_generator tool
361
+ - **UI/UX Improvements**: Enhance the Streamlit interface design
362
+ - **Performance Optimization**: Improve response times and resource usage
363
+ - **Testing**: Add comprehensive test coverage
364
+ - **Documentation**: Expand examples and API documentation
365
+
366
+ ### **Code Style**
367
+
368
+ - Follow PEP 8 guidelines for Python code
369
+ - Use meaningful variable and function names
370
+ - Add docstrings for all functions and classes
371
+ - Include type hints where appropriate
372
+ - Validate data models with Pydantic
373
+
374
+ ## 📝 **License**
375
+
376
+ This project is licensed under the MIT License - see the [LICENSE](LICENSE) file for details.
377
+
378
+ ## 🙏 **Acknowledgments**
379
+
380
+ - **Groq** for providing fast and efficient LLM inference
381
+ - **LangChain/LangGraph** for the multi-agent framework
382
+ - **Streamlit** for the web application framework
383
+ - **Apify** for LinkedIn scraping capabilities
384
+ - **Hugging Face** for hosting the live demo
385
+
386
+ ## 📞 **Support**
387
+
388
+ For questions, issues, or suggestions:
389
+
390
+ - **Create an Issue**: [GitHub Issues](https://github.com/Sri-Vallabh/Linkedin-Profile-AI-Assistant/issues)
391
+ - **Discussions**: [GitHub Discussions](https://github.com/Sri-Vallabh/Linkedin-Profile-AI-Assistant/discussions)
392
+ - **Email**: [email protected]
393
+
394
+ ## 🔄 **Recent Updates**
395
+
396
+ - **v2.0**: Migrated to Groq API for faster inference
397
+ - **Thread Management**: Implemented URL-based session tracking
398
+ - **Enhanced UI**: Custom chat interface with professional styling
399
+ - **Robust State**: Pydantic-based data validation and error handling
400
+ - **Tool Optimization**: Streamlined to three core analysis tools
401
+
402
+ ---
403
+
404
+ **Built with ❤️ by Sri Vallabh**
405
+
406
+ *Empowering professionals to optimize their LinkedIn presence and advance their careers through AI-powered insights.*
app.py ADDED
@@ -0,0 +1,643 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ import os
2
+ import json
3
+ import re
4
+ import time
5
+ from typing import Dict, Any, List, Optional, Annotated
6
+ from chatbot_model import (
7
+ UserMemory,
8
+ ChatbotState,
9
+ ProfileAnalysisModel,
10
+ JobFitModel,
11
+ ContentGenerationModel,
12
+
13
+ )
14
+ from llm_utils import call_llm_and_parse
15
+ from profile_preprocessing import (
16
+ preprocess_profile,
17
+ initialize_state,
18
+ normalize_url
19
+ )
20
+ from openai import OpenAI
21
+ import streamlit as st
22
+ import hashlib
23
+ from dotenv import load_dotenv
24
+ from pydantic import BaseModel, Field,ValidationError
25
+ # import pdb; pdb.set_trace()
26
+ from scraping_profile import scrape_linkedin_profile
27
+ from langchain_openai import ChatOpenAI
28
+ from langchain_core.messages import SystemMessage, HumanMessage, AIMessage,BaseMessage,ToolMessage
29
+ from langchain_core.tools import tool
30
+ from langgraph.graph import StateGraph, END,START
31
+ from langgraph.checkpoint.memory import MemorySaver
32
+ from langgraph.graph import add_messages # if your framework exposes this
33
+ from langgraph.prebuilt import ToolNode,tools_condition,InjectedState
34
+ import dirtyjson
35
+ import sqlite3
36
+ try:
37
+ from langgraph.checkpoint.sqlite import SqliteSaver
38
+ SQLITE_AVAILABLE = True
39
+ except ImportError:
40
+ SQLITE_AVAILABLE = False
41
+
42
+
43
+
44
+ # ========== 1. ENVIRONMENT & LLM SETUP ==========
45
+ load_dotenv()
46
+ groq_key = os.getenv("GROQ_API_KEY")
47
+ assert groq_key, "GROQ_API_KEY not found in environment!"
48
+ groq_client=OpenAI(
49
+ api_key=os.getenv("GROQ_API_KEY"),
50
+ base_url="https://api.groq.com/openai/v1"
51
+ )
52
+
53
+ def normalize_url(url):
54
+ return url.strip().rstrip('/')
55
+
56
+ def validate_state(state: dict) -> None:
57
+ """
58
+ Validate given state dict against ChatbotState schema.
59
+ Displays result in Streamlit instead of printing.
60
+ """
61
+ # st.write("=== Validating chatbot state ===")
62
+ try:
63
+ ChatbotState.model_validate(state)
64
+ # st.success("✅ State is valid!")
65
+ except ValidationError as e:
66
+ st.error("❌ Validation failed!")
67
+ errors_list = []
68
+ for error in e.errors():
69
+ loc = " → ".join(str(item) for item in error['loc'])
70
+ msg = error['msg']
71
+ errors_list.append(f"- At: {loc}\n Error: {msg}")
72
+ st.write("\n".join(errors_list))
73
+ # Optionally show raw validation error too:
74
+ st.expander("See raw validation error").write(str(e))
75
+ st.stop()
76
+
77
+
78
+ user_memory = UserMemory()
79
+
80
+ # ========== 7. AGENT FUNCTIONS ==========
81
+
82
+ def profile_analysis_prompt(profile: Dict[str, str]) -> str:
83
+ return f"""
84
+ You are a top-tier LinkedIn career coach and AI analyst.
85
+
86
+ Analyze the following candidate profile carefully.
87
+
88
+ Candidate profile data:
89
+ FullName: {profile.get("FullName", "")}
90
+ Headline: {profile.get("Headline", "")}
91
+ JobTitle: {profile.get("JobTitle", "")}
92
+ CompanyName: {profile.get("CompanyName", "")}
93
+ CompanyIndustry: {profile.get("CompanyIndustry", "")}
94
+ CurrentJobDuration: {profile.get("CurrentJobDuration", "")}
95
+ About: {profile.get("About", "")}
96
+ Experiences: {profile.get("Experiences", "")}
97
+ Skills: {profile.get("Skills", "")}
98
+ Educations: {profile.get("Educations", "")}
99
+ Certifications: {profile.get("Certifications", "")}
100
+ HonorsAndAwards: {profile.get("HonorsAndAwards", "")}
101
+ Verifications: {profile.get("Verifications", "")}
102
+ Highlights: {profile.get("Highlights", "")}
103
+ Projects: {profile.get("Projects", "")}
104
+ Publications: {profile.get("Publications", "")}
105
+ Patents: {profile.get("Patents", "")}
106
+ Courses: {profile.get("Courses", "")}
107
+ TestScores: {profile.get("TestScores", "")}
108
+
109
+
110
+ Identify and summarize:
111
+ 1. strengths:
112
+ - technical strengths (skills, tools, frameworks)
113
+ - project strengths (impactful projects, innovation)
114
+ - educational strengths (degrees, certifications, awards)
115
+ - soft skills and personality traits (teamwork, leadership)
116
+ 2. weaknesses:
117
+ - missing or weak technical skills
118
+ - gaps in projects, experience, or education
119
+ - unclear profile sections or missing context
120
+ 3. actionable suggestions:
121
+ - concrete ways to improve profile headline, about section, or add projects
122
+ - suggestions to learn or highlight new skills
123
+ - ideas to make the profile more attractive for recruiters
124
+
125
+ Important instructions:
126
+ - Respond ONLY with valid JSON.
127
+ - Do NOT include text before or after JSON.
128
+ - Be concise but detailed.
129
+
130
+
131
+
132
+ Example JSON format:
133
+ {{
134
+ "strengths": {{
135
+ "technical": ["...", "..."],
136
+ "projects": ["...", "..."],
137
+ "education": ["...", "..."],
138
+ "soft_skills": ["...", "..."]
139
+ }},
140
+ "weaknesses": {{
141
+ "technical_gaps": ["...", "..."],
142
+ "project_or_experience_gaps": ["...", "..."],
143
+ "missing_context": ["...", "..."]
144
+ }},
145
+ "suggestions": [
146
+ "...",
147
+ "...",
148
+ "..."
149
+ ]
150
+ }}
151
+ """.strip()
152
+
153
+
154
+
155
+
156
+ def job_fit_prompt(sections: Dict[str, str], target_role: str) -> str:
157
+ return f"""
158
+ You are an expert career coach and recruiter.
159
+
160
+ Compare the following candidate profile against the typical requirements for the role of "{target_role}".
161
+
162
+ Candidate Profile:
163
+ - Headline: {sections.get('headline', '')}
164
+ - About: {sections.get('about', '')}
165
+ - Job Title: {sections.get('job_title', '')}
166
+ - Company: {sections.get('company_name', '')}
167
+ - Industry: {sections.get('company_industry', '')}
168
+ - Current Job Duration: {sections.get('current_job_duration', '')}
169
+ - Skills: {sections.get('skills', '')}
170
+ - Projects: {sections.get('projects', '')}
171
+ - Educations: {sections.get('educations', '')}
172
+ - Certifications: {sections.get('certifications', '')}
173
+ - Honors & Awards: {sections.get('honors_and_awards', '')}
174
+ - Experiences: {sections.get('experiences', '')}
175
+
176
+ **Instructions:**
177
+ - Respond ONLY with valid JSON.
178
+ - Your JSON must exactly match the following schema:
179
+ {{
180
+ "match_score": 85,
181
+ "missing_skills": ["Skill1", "Skill2"],
182
+ "suggestions": ["...", "...", "..."]
183
+ }}
184
+ - "match_score": integer from 0–100 estimating how well the profile fits the target role.
185
+ - "missing_skills": key missing or weakly mentioned skills.
186
+ - "suggestions": 3 actionable recommendations to improve fit (e.g., learn tools, rewrite headline).
187
+
188
+ Do NOT include explanations, text outside JSON, or markdown.
189
+ Start with '{{' and end with '}}'.
190
+ The JSON must be directly parseable.
191
+ """.strip()
192
+
193
+
194
+ # --- Tool: Profile Analyzer ---
195
+ @tool
196
+ def profile_analyzer(state: Annotated[ChatbotState, InjectedState]) -> dict:
197
+ """
198
+ Tool: Analyze the overall full user's profile to give strengths, weaknesses, suggestions.
199
+ This is needed only if full analysis of profile is needed.
200
+ Returns the full analysis in the form of a json.
201
+
202
+ - It takes no arguments
203
+ """
204
+
205
+
206
+ # Get summarized profile (dictionary of strings)
207
+ profile = getattr(state, "profile", {}) or {}
208
+
209
+ # Build prompt
210
+ prompt = profile_analysis_prompt(profile)
211
+
212
+ # Call the LLM & parse structured result
213
+ analysis_model = call_llm_and_parse(groq_client,prompt, ProfileAnalysisModel)
214
+ analysis_dict = analysis_model.model_dump()
215
+
216
+ # Save to state and user memory
217
+ state.profile_analysis = analysis_dict
218
+ user_memory.save("profile_analysis", analysis_dict)
219
+
220
+ print("💾 [DEBUG] Saved analysis to user memory.")
221
+ print("📦 [DEBUG] Updated state.profile_analysis with analysis.")
222
+
223
+ return analysis_dict
224
+
225
+ # --- Tool: Job Matcher ---
226
+
227
+
228
+ @tool
229
+ def job_matcher(
230
+ state: Annotated[ChatbotState, InjectedState],
231
+ target_role: str = None
232
+ ) -> dict:
233
+ """
234
+ Tool: Analyze how well the user's profile fits the target role.
235
+ - If user is asking if he is a good fit for a certain role, or needs to see if his profile is compatible with a certain role, call this.
236
+ - Takes target_role as an argument.
237
+ - this tool is needed when match score, missing skills, suggestions are needed based on a job name given.
238
+ """
239
+ print(f"target role is {target_role}")
240
+ # Update state.target_role if provided
241
+
242
+ sections = getattr(state, "sections", {})
243
+
244
+ # Build prompt
245
+ prompt = job_fit_prompt(sections, target_role)
246
+
247
+ # Call LLM and parse
248
+ try:
249
+ job_fit_model = call_llm_and_parse(groq_client,prompt, JobFitModel)
250
+ job_fit_dict = job_fit_model.model_dump()
251
+ job_fit_dict["target_role"] = target_role
252
+ except Exception as e:
253
+ job_fit_dict = {
254
+ "target_role":target_role,
255
+ "match_score": 0,
256
+ "missing_skills": [],
257
+ "suggestions": ["Parsing failed or incomplete response."]
258
+ }
259
+
260
+ # Save to state and user memory
261
+ state.job_fit = job_fit_dict
262
+ user_memory.save("job_fit", job_fit_dict)
263
+
264
+ return job_fit_dict
265
+
266
+
267
+
268
+
269
+
270
+
271
+ @tool
272
+ def extract_from_state_tool(
273
+ state: Annotated[ChatbotState, InjectedState],
274
+ key: str
275
+ ) -> dict:
276
+ """
277
+ This tool is used if user wants to ask about any particular part of this profile. Use this if a singe section is targeted. It expects key as an arguement, that represents what
278
+ the user is wanting to look at, from his profile.
279
+ Argument:
280
+ key: only pass one from the below list, identify one thing the user wants to look into and choose that:
281
+ "sections.about", "sections.headline", "sections.skills", "sections.projects",
282
+ "sections.educations", "sections.certifications", "sections.honors_and_awards",
283
+ "sections.experiences", "sections.publications", "sections.patents",
284
+ "sections.courses", "sections.test_scores", "sections.verifications",
285
+ "sections.highlights", "sections.job_title", "sections.company_name",
286
+ "sections.company_industry", "sections.current_job_duration", "sections.full_name",
287
+ "enhanced_content,"profile_analysis", "job_fit", "target_role", "editing_section"
288
+ """
289
+ value = state
290
+ try:
291
+ for part in key.split('.'):
292
+ # Support both dict and Pydantic model
293
+ if isinstance(value, dict):
294
+ value = value.get(part)
295
+ elif hasattr(value, part):
296
+ value = getattr(value, part)
297
+ else:
298
+ value = None
299
+ if value is None:
300
+ break
301
+ except Exception:
302
+ value = None
303
+ return {"result": value}
304
+
305
+
306
+ tools = [
307
+ profile_analyzer,
308
+ job_matcher,
309
+ extract_from_state_tool
310
+ ]
311
+ llm = ChatOpenAI(
312
+ api_key=groq_key,
313
+ base_url="https://api.groq.com/openai/v1",
314
+ model="llama3-8b-8192",
315
+ temperature=0
316
+ )
317
+ llm_with_tools = llm.bind_tools(tools)
318
+
319
+
320
+
321
+ # ========== 8. LANGGRAPH PIPELINE ==========
322
+
323
+
324
+ def chatbot_node(state: ChatbotState) -> ChatbotState:
325
+ validate_state(state)
326
+
327
+ messages = state.get("messages", [])
328
+
329
+ system_prompt = """
330
+ You are a helpful AI assistant specialized in LinkedIn profile coaching.
331
+
332
+ You can:
333
+ - Answer user questions.
334
+ - If user is greeting , greet him back also telling how you can help him.
335
+ - You should proactively use specialized tools whenever possible to give richer, data-driven answers.
336
+ IMPORTANT RULES:
337
+ - You must call at most one tool at a time.
338
+ - Never call multiple tools together in the same step.
339
+ - If user asks to show any section, use extract_from_state_tool, and after that, show the exact result from it.
340
+ - If information about that section is already known, dont call extract_from_state_tool, directly answer the user query.
341
+ - call profile_analyzer function only when full profile analysis is needed, otherwise rely on extract_from_state_tool.
342
+ - If user asks to enhance any section, check if it is there in history, otherwise call extract_from_state_tool first.
343
+ - Prefer to call a tool when answering instead of directly replying, especially if it can add new, useful insights or up-to-date data.
344
+ - If a tool has been recently used and new info isn’t needed, you may answer directly.
345
+ - Use tools to verify assumptions, enrich answers, or when the user asks about strengths, weaknesses, job fit, or wants improvements.
346
+
347
+ Always respond helpfully, clearly, and with actionable advice to guide the user in improving their LinkedIn profile.
348
+ """
349
+
350
+ # Build messages & invoke LLM
351
+ messages = [SystemMessage(content=system_prompt)] + messages[-2:]
352
+ # messages = [SystemMessage(content=system_prompt)]
353
+ response = llm_with_tools.invoke(messages)
354
+ if hasattr(response, "tool_calls") and response.tool_calls:
355
+ first_tool = response.tool_calls[0]
356
+ tool_name = first_tool.get("name") if isinstance(first_tool, dict) else getattr(first_tool, "name", None)
357
+ tool_args = first_tool.get("args") if isinstance(first_tool, dict) else getattr(first_tool, "args", {})
358
+ print(f"[DEBBBBUUUUGGG] using tool {tool_name}")
359
+
360
+ # DEBUG
361
+ print("[DEBUG] LLM response:", response)
362
+ state.setdefault("messages", []).append(response)
363
+
364
+ return state
365
+
366
+
367
+
368
+
369
+
370
+ # --- Graph definition ---
371
+ graph = StateGraph(state_schema=ChatbotState)
372
+ graph.add_node("chatbot", chatbot_node)
373
+ graph.add_node("tools", ToolNode(tools))
374
+ graph.add_edge(START, "chatbot")
375
+ graph.add_conditional_edges("chatbot", tools_condition)
376
+ graph.add_edge("tools","chatbot")
377
+ graph.set_entry_point("chatbot")
378
+
379
+ # --- Streamlit UI ---
380
+ st.set_page_config(page_title="💼 LinkedIn AI Career Assistant", page_icon="🤖", layout="wide")
381
+ st.title("🧑‍💼 LinkedIn AI Career Assistant")
382
+
383
+ # --- Checkpointer and graph initialization ---
384
+ if "checkpointer" not in st.session_state:
385
+ if SQLITE_AVAILABLE:
386
+ conn = sqlite3.connect("checkpoints1.db", check_same_thread=False)
387
+ st.session_state["checkpointer"] = SqliteSaver(conn)
388
+ else:
389
+ st.session_state["checkpointer"] = MemorySaver()
390
+ checkpointer = st.session_state["checkpointer"]
391
+
392
+ if "app_graph" not in st.session_state:
393
+ st.session_state["app_graph"] = graph.compile(checkpointer=checkpointer)
394
+ app_graph = st.session_state["app_graph"]
395
+ # Find or create thread
396
+ def find_thread_id_for_url(checkpointer, url, max_threads=100):
397
+ search_url = normalize_url(url)
398
+ for tid in range(max_threads):
399
+ config = {"configurable": {"thread_id": str(tid), "checkpoint_ns": ""}}
400
+ state = checkpointer.get(config)
401
+ if state and "channel_values" in state:
402
+ user_state = state["channel_values"]
403
+ stored_url = normalize_url(user_state.get("profile_url", ""))
404
+ if stored_url == search_url:
405
+ return str(tid), user_state
406
+ return None, None
407
+
408
+ def delete_thread_checkpoint(checkpointer, thread_id):
409
+ # For SqliteSaver, use the delete_thread method if available
410
+ if hasattr(checkpointer, "delete_thread"):
411
+ checkpointer.delete_thread(thread_id)
412
+ else:
413
+ # For in-memory or custom checkpointers, implement as needed
414
+ pass
415
+
416
+
417
+ def get_next_thread_id(checkpointer, max_threads=100):
418
+ used = set()
419
+ for tid in range(max_threads):
420
+ config = {"configurable": {"thread_id": str(tid), "checkpoint_ns": ""}}
421
+ if checkpointer.get(config):
422
+ used.add(tid)
423
+ for tid in range(max_threads):
424
+ if tid not in used:
425
+ return str(tid)
426
+ raise RuntimeError("No available thread_id")
427
+
428
+ # --- Session selection and state initialization ---
429
+
430
+ if "chat_mode" not in st.session_state:
431
+ profile_url = st.text_input("Profile URL (e.g., https://www.linkedin.com/in/username/)")
432
+ if not profile_url:
433
+ st.info("Please enter a valid LinkedIn profile URL above to start.")
434
+ st.stop()
435
+
436
+ valid_pattern = r"^https://www\.linkedin\.com/in/[^/]+/?$"
437
+ if not re.match(valid_pattern, profile_url.strip()):
438
+ st.error("❌ Invalid LinkedIn profile URL. Make sure it matches the format.")
439
+ st.stop()
440
+ url = profile_url.strip()
441
+
442
+ existing_thread_id, previous_state = find_thread_id_for_url(checkpointer, url)
443
+ # Defensive: ensure required fields
444
+ required_fields = ["profile", "sections"]
445
+ if previous_state and not all(f in previous_state and previous_state[f] for f in required_fields):
446
+ st.warning("Previous session is missing required data. Please start a new chat.")
447
+ previous_state = None
448
+
449
+ if previous_state:
450
+ st.info("A previous session found. Choose:")
451
+ col1, col2 = st.columns(2)
452
+ if col1.button("Continue previous chat"):
453
+ st.session_state["chat_mode"] = "continue"
454
+ st.session_state["thread_id"] = existing_thread_id
455
+ st.session_state.state = previous_state
456
+ st.rerun()
457
+ elif col2.button("Start new chat"):
458
+ delete_thread_checkpoint(checkpointer, existing_thread_id)
459
+ with st.spinner("Fetching and processing profile... ⏳"):
460
+ raw=scrape_linkedin_profile(url)
461
+ thread_id = existing_thread_id
462
+ st.session_state["chat_mode"] = "new"
463
+ st.session_state["thread_id"] = thread_id
464
+ st.session_state.state = initialize_state(raw)
465
+ st.session_state.state["profile_url"] = normalize_url(url)
466
+ st.session_state.state["messages"] = []
467
+ st.rerun()
468
+ st.stop()
469
+ else:
470
+ with st.spinner("Fetching and processing profile... ⏳"):
471
+ raw=scrape_linkedin_profile(url)
472
+ thread_id = get_next_thread_id(checkpointer)
473
+ st.session_state["thread_id"] = thread_id
474
+ st.session_state["chat_mode"] = "new"
475
+ st.session_state.state = initialize_state(raw)
476
+ st.session_state.state["profile_url"] = normalize_url(url)
477
+ st.session_state.state["messages"] = []
478
+ st.rerun()
479
+
480
+ # --- Main chat UI (only after chat_mode is set) ---
481
+ state = st.session_state.state
482
+ thread_id = st.session_state.get("thread_id")
483
+
484
+ st.subheader("💬 Chat with your AI Assistant")
485
+ messages = state.get("messages", [])
486
+ chat_container = st.container()
487
+
488
+ with chat_container:
489
+ st.markdown(
490
+ """
491
+ <style>
492
+ .chat-row { display: flex; width: 100%; margin-bottom: 12px; animation: fadeIn 0.5s; }
493
+ .chat-row.user { justify-content: flex-end; }
494
+ .chat-row.ai { justify-content: flex-start; }
495
+ .chat-bubble { font-family: 'Segoe UI', 'Roboto', 'Arial', sans-serif; font-size: 1.08rem; line-height: 1.65; padding: 14px 22px; border-radius: 20px; min-width: 60px; max-width: 75vw; box-shadow: 0 2px 12px rgba(0,0,0,0.10); word-break: break-word; display: inline-block; position: relative; margin-bottom: 2px; }
496
+ .bubble-user { background: linear-gradient(90deg, #43e97b 0%, #38f9d7 100%); color: #fff; border-bottom-right-radius: 6px; border-top-right-radius: 22px; text-align: right; box-shadow: 0 4px 16px rgba(67,233,123,0.13); }
497
+ .bubble-ai { background: linear-gradient(90deg, #e3f0ff 0%, #c9eaff 100%); color: #1a237e; border-bottom-left-radius: 6px; border-top-left-radius: 22px; text-align: left; border: 1.5px solid #b3e0fc; box-shadow: 0 4px 16px rgba(44, 62, 80, 0.08); }
498
+ .bubble-unknown { background: #fffbe6; color: #8a6d3b; border-radius: 14px; text-align: center; border: 1px solid #ffe082; display: inline-block; }
499
+ .sender-label { font-size: 0.93em; font-weight: 600; opacity: 0.7; margin-bottom: 4px; display: block; }
500
+ .avatar { width: 38px; height: 38px; border-radius: 50%; margin-right: 10px; margin-top: 2px; background: #e0e0e0; object-fit: cover; box-shadow: 0 2px 6px rgba(0,0,0,0.07); }
501
+ @keyframes fadeIn { from { opacity: 0; transform: translateY(12px);} to { opacity: 1; transform: translateY(0);} }
502
+ </style>
503
+ """,
504
+ unsafe_allow_html=True,
505
+ )
506
+
507
+ job_fit = state.get("job_fit")
508
+ for msg in messages:
509
+ if isinstance(msg, HumanMessage):
510
+ st.markdown(
511
+ f"""
512
+ <div class="chat-row user">
513
+ <div class="chat-bubble bubble-user">
514
+ <span class="sender-label">🧑‍💻 You</span>
515
+ {msg.content}
516
+ </div>
517
+ </div>
518
+ """,
519
+ unsafe_allow_html=True,
520
+ )
521
+ elif isinstance(msg, AIMessage):
522
+ if not msg.content or not msg.content.strip():
523
+ continue
524
+ st.markdown(
525
+ f"""
526
+ <div class="chat-row ai">
527
+ <img class="avatar" src="https://img.icons8.com/ios-filled/50/1a237e/robot-2.png" alt="AI"/>
528
+ <div class="chat-bubble bubble-ai">
529
+ <span class="sender-label">🤖 AI</span>
530
+ {msg.content}
531
+ </div>
532
+ </div>
533
+ """,
534
+ unsafe_allow_html=True,
535
+ )
536
+ elif isinstance(msg, ToolMessage):
537
+ raw_content = msg.content or "(no content)"
538
+ try:
539
+ parsed = json.loads(raw_content)
540
+ except Exception:
541
+ parsed = None
542
+
543
+ if parsed and isinstance(parsed, dict):
544
+ # --- Profile analysis format ---
545
+ if all(k in parsed for k in ("strengths", "weaknesses", "suggestions")):
546
+ strengths = parsed["strengths"]
547
+ weaknesses = parsed["weaknesses"]
548
+ suggestions = parsed["suggestions"]
549
+
550
+ formatted = (
551
+ "### 💪 **Strengths**\n"
552
+ f"- **Technical:** {', '.join(strengths.get('technical', []) or ['None'])}\n"
553
+ f"- **Projects:** {', '.join(strengths.get('projects', []) or ['None'])}\n"
554
+ f"- **Education:** {', '.join(strengths.get('education', []) or ['None'])}\n"
555
+ f"- **Soft Skills:** {', '.join(strengths.get('soft_skills', []) or ['None'])}\n\n"
556
+ "### ⚠️ **Weaknesses**\n"
557
+ f"- **Technical Gaps:** {', '.join(weaknesses.get('technical_gaps', []) or ['None'])}\n"
558
+ f"- **Project/Experience Gaps:** {', '.join(weaknesses.get('project_or_experience_gaps', []) or ['None'])}\n"
559
+ f"- **Missing Context:** {', '.join(weaknesses.get('missing_context', []) or ['None'])}\n\n"
560
+ "### 🛠 **Suggestions to improve**\n"
561
+ + "\n".join(f"- {s}" for s in suggestions)
562
+ )
563
+
564
+ st.markdown(f"""
565
+ <div class="chat-row ai">
566
+ <img class="avatar" src="https://img.icons8.com/ios-filled/50/1a237e/robot-2.png" alt="Tool"/>
567
+ <div class="chat-bubble bubble-ai">
568
+ <span class="sender-label">📊 Profile Analysis</span>
569
+ {formatted}
570
+ </div>
571
+ </div>
572
+ """, unsafe_allow_html=True)
573
+
574
+ # --- Job fit format ---
575
+ elif "match_score" in parsed:
576
+ percent = parsed["match_score"]
577
+ suggestions = parsed.get("suggestions", [])
578
+ missing = parsed.get("missing_skills", [])
579
+ target_role = parsed.get('target_role', 'unspecified')
580
+ state["target_role"]=target_role
581
+ suggestions_html = "<br>".join(f"• {s}" for s in suggestions)
582
+ missing_html = "<br>".join(f"• {s}" for s in missing)
583
+
584
+ st.markdown(f"""
585
+ <div class="chat-row ai">
586
+ <img class="avatar" src="https://img.icons8.com/ios-filled/50/1a237e/robot-2.png" alt="Tool"/>
587
+ <div class="chat-bubble bubble-ai">
588
+ <span class="sender-label">📊 Job Fit</span>
589
+ <b>🎯 Target Role:</b> {target_role}<br>
590
+ <div style="
591
+ width: 120px; height: 120px; border-radius: 50%;
592
+ background: conic-gradient(#25D366 {percent * 3.6}deg, #e0e0e0 0deg);
593
+ display: flex; align-items: center; justify-content: center;
594
+ font-size: 1.8rem; color: #333; margin: 10px auto;">
595
+ {percent}%
596
+ </div>
597
+ <b>Missing Skills:</b><br>{missing_html}<br><br>
598
+ <b>Suggestions:</b><br>{suggestions_html}
599
+ </div>
600
+ </div>
601
+ """, unsafe_allow_html=True)
602
+
603
+ # --- Section text format ---
604
+ elif "result" in parsed:
605
+ text = parsed["result"]
606
+ st.markdown(f"""
607
+ <div class="chat-row ai">
608
+ <img class="avatar" src="https://img.icons8.com/ios-filled/50/1a237e/robot-2.png" alt="Tool"/>
609
+ <div class="chat-bubble bubble-ai">
610
+ <span class="sender-label">📄 Section Content</span>
611
+ {text}
612
+ </div>
613
+ </div>
614
+ """, unsafe_allow_html=True)
615
+
616
+ else:
617
+ st.markdown(
618
+ f"""
619
+ <div class="chat-row">
620
+ <div class="chat-bubble bubble-unknown">
621
+ <span class="sender-label">⚠️ Unknown</span>
622
+ {getattr(msg, 'content', str(msg))}
623
+ </div>
624
+ </div>
625
+ """,
626
+ unsafe_allow_html=True,
627
+ )
628
+ st.markdown('<div style="clear:both"></div>', unsafe_allow_html=True)
629
+
630
+ st.markdown("---")
631
+
632
+ user_input = st.chat_input(
633
+ placeholder="Ask about your LinkedIn profile, e.g., 'Analyze my profile, how do I fit for AI role, how is my about section?'"
634
+ )
635
+
636
+ if user_input and user_input.strip():
637
+ state.setdefault("messages", []).append(HumanMessage(content=user_input.strip()))
638
+ validate_state(state)
639
+ thread_id = st.session_state.get("thread_id")
640
+ config = {"configurable": {"thread_id": thread_id}}
641
+ with st.spinner("Processing your request..."):
642
+ st.session_state.state = app_graph.invoke(state, config)
643
+ st.rerun()
chatbot_model.py ADDED
@@ -0,0 +1,130 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ from typing import List, Dict, Any, Optional, Annotated
2
+ from pydantic import BaseModel, Field
3
+ from langchain_core.messages import BaseMessage
4
+ from langgraph.graph import add_messages
5
+ class ChatbotState(BaseModel):
6
+ def get(self, key, default=None):
7
+ """
8
+ Allow dict-like .get() access for compatibility.
9
+ """
10
+ # First try attribute directly
11
+ if hasattr(self, key):
12
+ return getattr(self, key)
13
+ # Fallback: check if it's in __dict__
14
+ return self.__dict__.get(key, default)
15
+ def setdefault(self, key, default):
16
+ """
17
+ Dict-like setdefault: if attribute is None, set it to default and return it.
18
+ Otherwise, return existing value.
19
+ """
20
+ if hasattr(self, key):
21
+ value = getattr(self, key)
22
+ if value is None:
23
+ setattr(self, key, default)
24
+ return default
25
+ return value
26
+ else:
27
+ # attribute does not exist: set it
28
+ setattr(self, key, default)
29
+ return default
30
+ profile: Dict[str, Any] = Field(..., description="Preprocessed / summarized profile data")
31
+ profile_url: Optional[str] = Field(
32
+ default=None,
33
+ description="Original LinkedIn profile URL provided by the user."
34
+ )
35
+
36
+ # Quick access sections (about, headline, skills etc.)
37
+ sections: Dict[str, str] = Field(..., description="Flattened profile sections for quick access")
38
+
39
+ # Enhancements and analysis results
40
+ enhanced_content: Dict[str, str] = Field(
41
+ default_factory=dict,
42
+ description=(
43
+ "Map of improved or rewritten profile sections generated by the ContentGenerator tool. "
44
+ "Keys are section names (e.g., 'about', 'headline'); values are enhanced text."
45
+ )
46
+ )
47
+
48
+ profile_analysis: Optional[Dict[str, Any]] = Field(
49
+ None,
50
+ description=(
51
+ "Structured analysis of the user's profile produced by the ProfileAnalyzer tool, "
52
+ "including strengths, weaknesses, and actionable suggestions."
53
+ )
54
+ )
55
+
56
+ job_fit: Optional[Dict[str, Any]] = Field(
57
+ None,
58
+ description=(
59
+ "Assessment result from the JobMatcher tool, detailing how well the user's profile matches "
60
+ "the target role, including missing skills and match score."
61
+ )
62
+ )
63
+
64
+ target_role: Optional[str] = Field(
65
+ None,
66
+ description=(
67
+ "Target job role the user is aiming for. "
68
+ "Can be set by the user directly during the conversation or inferred by the chatbot."
69
+ )
70
+ )
71
+
72
+ editing_section: Optional[str] = Field(
73
+ None,
74
+ description=(
75
+ "Name of the profile section currently being edited or improved, "
76
+ "set dynamically when the ContentGenerator tool is invoked."
77
+ )
78
+ )
79
+ next_tool_name: Optional[str] = Field(
80
+ default=None,
81
+ description="Name of the next tool the chatbot wants to call, set dynamically after LLM response."
82
+ )
83
+
84
+
85
+ # Annotated chat history directly using BaseMessage
86
+ messages: Annotated[List[BaseMessage], add_messages] = Field(
87
+ default_factory=list,
88
+ description="List of user and assistant messages"
89
+ )
90
+
91
+
92
+
93
+ class ProfileAnalysisStrengths(BaseModel):
94
+ technical: List[str]
95
+ projects: List[str]
96
+ education: List[str]
97
+ soft_skills: List[str]
98
+
99
+ class ProfileAnalysisWeaknesses(BaseModel):
100
+ technical_gaps: List[str]
101
+ project_or_experience_gaps: List[str]
102
+ missing_context: List[str]
103
+
104
+ class ProfileAnalysisModel(BaseModel):
105
+ strengths: ProfileAnalysisStrengths
106
+ weaknesses: ProfileAnalysisWeaknesses
107
+ suggestions: List[str]
108
+
109
+ class JobFitModel(BaseModel):
110
+ match_score: int = Field(..., ge=0, le=100)
111
+ missing_skills: List[str]
112
+ suggestions: List[str]
113
+
114
+ class ContentGenerationModel(BaseModel):
115
+ new_content: str
116
+
117
+
118
+ # ========== 6. MEMORY SETUP ==========
119
+
120
+ class UserMemory:
121
+ def __init__(self):
122
+ self.profile = None
123
+ self.target_roles = []
124
+ self.history = []
125
+
126
+ def save(self, key, value):
127
+ self.history.append((key, value))
128
+
129
+ def get_history(self):
130
+ return self.history
llm_utils.py ADDED
@@ -0,0 +1,83 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ import time
2
+ from typing import Type, Union, Dict, Any
3
+ from pydantic import BaseModel
4
+ import dirtyjson
5
+ import re
6
+ # Make sure you install dirtyjson: pip install dirtyjson
7
+
8
+ # === Optionally, import your Groq client from where you configure it ===
9
+
10
+ # === Helper function ===
11
+
12
+ def call_llm_and_parse(
13
+ groq_client,
14
+ prompt: str,
15
+ model: Type[BaseModel],
16
+ max_retries: int = 3,
17
+ delay: float = 1.0
18
+ ) -> Union[BaseModel, Dict[str, Any]]:
19
+ """
20
+ Call LLM with a prompt, parse the JSON response, and validate it using a Pydantic model.
21
+
22
+ Args:
23
+ prompt (str): The prompt to send to the LLM.
24
+ model (Type[BaseModel]): The Pydantic model to validate against.
25
+ max_retries (int, optional): Number of retries on failure. Default is 3.
26
+ delay (float, optional): Delay (in seconds) between retries, multiplied by attempt count.
27
+
28
+ Returns:
29
+ BaseModel: Validated Pydantic model instance if successful.
30
+ dict: Contains 'error' and 'raw' fields if validation fails after retries.
31
+ """
32
+ for attempt in range(1, max_retries + 1):
33
+ try:
34
+ print(f"[call_llm_and_parse] Attempt {attempt}: sending prompt to LLM...")
35
+
36
+ completion = groq_client.chat.completions.create(
37
+ model="llama3-8b-8192",
38
+ messages=[{"role": "user", "content": prompt}],
39
+ temperature=0.3,
40
+ max_tokens=800
41
+ )
42
+
43
+ response_text = completion.choices[0].message.content
44
+ print(f"[call_llm_and_parse] Raw LLM response: {response_text[:200]}...") # first 200 chars
45
+
46
+ # Extract JSON (handle dirty or partial JSON)
47
+ json_str = extract_and_repair_json(response_text)
48
+
49
+ # Parse JSON using dirtyjson
50
+ parsed = dirtyjson.loads(json_str)
51
+
52
+ # Validate with Pydantic
53
+ validated = model.model_validate(parsed)
54
+
55
+ print("[call_llm_and_parse] Successfully parsed and validated.")
56
+ return validated
57
+
58
+ except Exception as e:
59
+ print(f"[Retry {attempt}] Error: {e}")
60
+ if attempt < max_retries:
61
+ time.sleep(delay * attempt)
62
+ else:
63
+ print("[call_llm_and_parse] Failed after retries.")
64
+ return {
65
+ "error": f"Validation failed after {max_retries} retries: {e}",
66
+ "raw": json_str if 'json_str' in locals() else response_text
67
+ }
68
+
69
+
70
+ def extract_and_repair_json(text: str) -> str:
71
+ """
72
+ Extracts JSON starting from first '{' and balances braces.
73
+ """
74
+ match = re.search(r'\{[\s\S]*', text)
75
+ if not match:
76
+ raise ValueError("No JSON object found.")
77
+ json_str = match.group()
78
+ # Fix unmatched braces
79
+ open_braces = json_str.count('{')
80
+ close_braces = json_str.count('}')
81
+ if open_braces > close_braces:
82
+ json_str += '}' * (open_braces - close_braces)
83
+ return json_str
profile_preprocessing.py ADDED
@@ -0,0 +1,127 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ from typing import List, Dict, Any
2
+ from urllib.parse import urlparse
3
+ # ========== 3. PROFILE PREPROCESSING HELPERS ==========
4
+ def normalize_url(url):
5
+ return url.strip().rstrip('/')
6
+
7
+ def summarize_skills(skills: List[Dict]) -> str:
8
+ return ', '.join([s.get('title', '') for s in skills if s.get('title')])
9
+
10
+ def summarize_projects(projects: List[Dict]) -> str:
11
+ summaries = []
12
+ for p in projects:
13
+ title = p.get('title', '')
14
+ desc = ''
15
+ if p.get('subComponents'):
16
+ for comp in p['subComponents']:
17
+ for d in comp.get('description', []):
18
+ if d.get('type') == 'textComponent':
19
+ desc += d.get('text', '') + ' '
20
+ summaries.append(f"{title}: {desc.strip()}")
21
+ return '\n'.join(summaries)
22
+
23
+ def summarize_educations(educations: List[Dict]) -> str:
24
+ return ', '.join([
25
+ f"{e.get('title', '')} ({e.get('subtitle', '')}, {e.get('caption', '')})"
26
+ for e in educations if e.get('title')
27
+ ])
28
+
29
+ def summarize_certs(certs: List[Dict]) -> str:
30
+ return ', '.join([
31
+ f"{c.get('title', '')} ({c.get('subtitle', '')}, {c.get('caption', '')})"
32
+ for c in certs if c.get('title')
33
+ ])
34
+
35
+ def summarize_test_scores(scores: List[Dict]) -> str:
36
+ return ', '.join([
37
+ f"{s.get('title', '')} ({s.get('subtitle', '')})"
38
+ for s in scores if s.get('title')
39
+ ])
40
+
41
+ def summarize_generic(items: List[Dict], key='title') -> str:
42
+ return ', '.join([item.get(key, '') for item in items if item.get(key)])
43
+
44
+
45
+ # === Preprocess raw profile into summarized profile ===
46
+ def preprocess_profile(raw_profile: Dict[str, Any]) -> Dict[str, str]:
47
+ return {
48
+ "FullName": raw_profile.get("fullName", ""),
49
+ "profile_url": raw_profile.get("linkedinUrl",""),
50
+ "Headline": raw_profile.get("headline", ""),
51
+ "JobTitle": raw_profile.get("jobTitle", ""),
52
+ "CompanyName": raw_profile.get("companyName", ""),
53
+ "CompanyIndustry": raw_profile.get("companyIndustry", ""),
54
+ "CurrentJobDuration": str(raw_profile.get("currentJobDuration", "")),
55
+ "About": raw_profile.get("about", ""),
56
+ "Experiences": summarize_generic(raw_profile.get("experiences", []), key='title'),
57
+ "Skills": summarize_skills(raw_profile.get("skills", [])),
58
+ "Educations": summarize_educations(raw_profile.get("educations", [])),
59
+ "Certifications": summarize_certs(raw_profile.get("licenseAndCertificates", [])),
60
+ "HonorsAndAwards": summarize_generic(raw_profile.get("honorsAndAwards", []), key='title'),
61
+ "Verifications": summarize_generic(raw_profile.get("verifications", []), key='title'),
62
+ "Highlights": summarize_generic(raw_profile.get("highlights", []), key='title'),
63
+ "Projects": summarize_projects(raw_profile.get("projects", [])),
64
+ "Publications": summarize_generic(raw_profile.get("publications", []), key='title'),
65
+ "Patents": summarize_generic(raw_profile.get("patents", []), key='title'),
66
+ "Courses": summarize_generic(raw_profile.get("courses", []), key='title'),
67
+ "TestScores": summarize_test_scores(raw_profile.get("testScores", []))
68
+ }
69
+
70
+ # === Create & fill state ===
71
+
72
+
73
+ def initialize_state(raw_profile: Dict[str, Any]) -> Dict[str,Any]:
74
+ """
75
+ Initializes the chatbot state used in LangGraph:
76
+ - Keeps both raw and processed profile
77
+ - Splits important sections for quick access
78
+ - Initializes placeholders for tool outputs
79
+ - Adds empty chat history for conversation context
80
+ """
81
+ # Your preprocessing function that cleans / normalizes scraped profile
82
+ profile = preprocess_profile(raw_profile)
83
+ print(f"initializing url as {profile['profile_url']}")
84
+
85
+ state: Dict[str, Any] = {
86
+ "profile": profile, # Cleaned & normalized profile
87
+ "profile_url": normalize_url(profile.get("profile_url","") or ""),
88
+
89
+ # === Separate sections (make sure all are strings, never None) ===
90
+ "sections": {
91
+ "about": profile.get("About", "") or "",
92
+ "headline": profile.get("Headline", "") or "",
93
+ "skills": profile.get("Skills", "") or "",
94
+ "projects": profile.get("Projects", "") or "",
95
+ "educations": profile.get("Educations", "") or "",
96
+ "certifications": profile.get("Certifications", "") or "",
97
+ "honors_and_awards": profile.get("HonorsAndAwards", "") or "",
98
+ "experiences": profile.get("Experiences", "") or "",
99
+ "publications": profile.get("Publications", "") or "",
100
+ "patents": profile.get("Patents", "") or "",
101
+ "courses": profile.get("Courses", "") or "",
102
+ "test_scores": profile.get("TestScores", "") or "",
103
+ "verifications": profile.get("Verifications", "") or "",
104
+ "highlights": profile.get("Highlights", "") or "",
105
+ "job_title": profile.get("JobTitle", "") or "",
106
+ "company_name": profile.get("CompanyName", "") or "",
107
+ "company_industry": profile.get("CompanyIndustry", "") or "",
108
+ "current_job_duration": profile.get("CurrentJobDuration", "") or "",
109
+ "full_name": profile.get("FullName", "") or ""
110
+ },
111
+
112
+ # === Placeholders populated by tools ===
113
+ "enhanced_content": {}, # Populated by ContentGenerator tool
114
+ "profile_analysis": None, # Can be None initially (Optional)
115
+ "job_fit": None, # Can be None initially (Optional)
116
+ "target_role": None, # Optional[str]
117
+ "editing_section": None, # Optional[str]
118
+
119
+ # === Chat history ===
120
+ # Pydantic expects list of dicts like {"role": "user", "content": "..."}
121
+ "messages": [],
122
+ "next_tool_name": None
123
+ }
124
+
125
+
126
+ return state
127
+
requirements.txt ADDED
@@ -0,0 +1,31 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ apify-client
2
+ python-dotenv
3
+ streamlit
4
+ langchain
5
+ openai
6
+ python-dotenv
7
+ langchain_openai
8
+ # Core Python packages
9
+ python-dotenv
10
+ streamlit
11
+ pydantic
12
+
13
+ # LangChain ecosystem
14
+ langchain-core
15
+ langchain-openai
16
+ langgraph
17
+ langgraph-checkpoint
18
+
19
+ # For OpenAI-compatible LLMs (Groq, etc.)
20
+ openai
21
+
22
+ # For parsing "dirty" JSON
23
+ dirtyjson
24
+
25
+ typing-extensions
26
+
27
+ tqdm
28
+
29
+
30
+
31
+
scraping_profile.py ADDED
@@ -0,0 +1,42 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ from apify_client import ApifyClient
2
+ from dotenv import load_dotenv
3
+ import os
4
+ import json
5
+
6
+ # Load environment variables
7
+ load_dotenv()
8
+
9
+ # Get API token
10
+ api_token = os.getenv("APIFY_API_TOKEN")
11
+
12
+ # Initialize client once (global)
13
+ client = ApifyClient(api_token)
14
+
15
+
16
+ def scrape_linkedin_profile(profile_url: str) -> dict:
17
+ """
18
+ 📄 Scrapes a LinkedIn profile using Apify and returns the data as a Python dict.
19
+ """
20
+ try:
21
+ run_input = {"profileUrls": [profile_url]}
22
+ run = client.actor("dev_fusion/Linkedin-Profile-Scraper").call(run_input=run_input)
23
+
24
+ items = list(client.dataset(run["defaultDatasetId"]).iterate_items())
25
+
26
+ if items:
27
+ with open("scraped_profile.json", "w") as f:
28
+ json.dump(items[0], f, indent=2)
29
+ return items[0]
30
+ else:
31
+ print("⚠️ No data found in dataset.")
32
+ return {}
33
+ except Exception as e:
34
+ print(f"❌ Error during scraping: {e}")
35
+ return {}
36
+
37
+
38
+ # 🧪 OPTIONAL: test code only runs when this file is executed directly
39
+ if __name__ == "__main__":
40
+ test_url = "https://www.linkedin.com/in/sri-vallabh-tammireddy/"
41
+ profile = scrape_linkedin_profile(test_url)
42
+ print(json.dumps(profile, indent=2))