Spaces:

tsrivallabh
/

Linkedin-Assistant

Sleeping

App Files Files Community

tsrivallabh commited on Jul 10

Commit

5318b09

verified ·

1 Parent(s): c73d998

Upload 10 files

Browse files

Files changed (10) hide show

.gitattributes +3 -35
.gitignore +1 -0
Dockerfile +28 -0
README.md +406 -0
app.py +643 -0
chatbot_model.py +130 -0
llm_utils.py +83 -0
profile_preprocessing.py +127 -0
requirements.txt +31 -0
scraping_profile.py +42 -0

.gitattributes CHANGED Viewed

@@ -1,35 +1,3 @@
-*.7z filter=lfs diff=lfs merge=lfs -text
-*.arrow filter=lfs diff=lfs merge=lfs -text
-*.bin filter=lfs diff=lfs merge=lfs -text
-*.bz2 filter=lfs diff=lfs merge=lfs -text
-*.ckpt filter=lfs diff=lfs merge=lfs -text
-*.ftz filter=lfs diff=lfs merge=lfs -text
-*.gz filter=lfs diff=lfs merge=lfs -text
-*.h5 filter=lfs diff=lfs merge=lfs -text
-*.joblib filter=lfs diff=lfs merge=lfs -text
-*.lfs.* filter=lfs diff=lfs merge=lfs -text
-*.mlmodel filter=lfs diff=lfs merge=lfs -text
-*.model filter=lfs diff=lfs merge=lfs -text
-*.msgpack filter=lfs diff=lfs merge=lfs -text
-*.npy filter=lfs diff=lfs merge=lfs -text
-*.npz filter=lfs diff=lfs merge=lfs -text
-*.onnx filter=lfs diff=lfs merge=lfs -text
-*.ot filter=lfs diff=lfs merge=lfs -text
-*.parquet filter=lfs diff=lfs merge=lfs -text
-*.pb filter=lfs diff=lfs merge=lfs -text
-*.pickle filter=lfs diff=lfs merge=lfs -text
-*.pkl filter=lfs diff=lfs merge=lfs -text
-*.pt filter=lfs diff=lfs merge=lfs -text
-*.pth filter=lfs diff=lfs merge=lfs -text
-*.rar filter=lfs diff=lfs merge=lfs -text
-*.safetensors filter=lfs diff=lfs merge=lfs -text
-saved_model/**/* filter=lfs diff=lfs merge=lfs -text
-*.tar.* filter=lfs diff=lfs merge=lfs -text
-*.tar filter=lfs diff=lfs merge=lfs -text
-*.tflite filter=lfs diff=lfs merge=lfs -text
-*.tgz filter=lfs diff=lfs merge=lfs -text
-*.wasm filter=lfs diff=lfs merge=lfs -text
-*.xz filter=lfs diff=lfs merge=lfs -text
-*.zip filter=lfs diff=lfs merge=lfs -text
-*.zst filter=lfs diff=lfs merge=lfs -text
-*tfevents* filter=lfs diff=lfs merge=lfs -text

+# Auto detect text files and perform LF normalization
+* text=auto
+# *.png filter=lfs diff=lfs merge=lfs -text

.gitignore ADDED Viewed

	@@ -0,0 +1 @@


1	+ .env

Dockerfile ADDED Viewed

	@@ -0,0 +1,28 @@

+# Use an official Python runtime as a parent image
+FROM python:3.11-slim
+# Set the working directory in the container
+WORKDIR /app
+ENV HF_HOME=/data/hf_cache
+ENV TRANSFORMERS_CACHE=/data/hf_cache/transformers
+ENV HF_DATASETS_CACHE=/data/hf_cache/datasets
+ENV HF_HUB_CACHE=/data/hf_cache/hub
+RUN mkdir -p /data/hf_cache/transformers /data/hf_cache/datasets /data/hf_cache/hub && chmod -R 777 /data/hf_cache
+# Copy requirements.txt and install dependencies
+COPY requirements.txt .
+RUN pip install --no-cache-dir -r requirements.txt
+# Copy the rest of your app's code
+COPY . .
+# Expose the port Streamlit runs on
+EXPOSE 8501
+# Run Streamlit
+CMD ["streamlit", "run", "app.py", "--server.port=8501", "--server.address=0.0.0.0"]

README.md ADDED Viewed

	@@ -0,0 +1,406 @@

+---
+title: Linkedin Assistant
+emoji: 🚀
+colorFrom: red
+colorTo: red
+sdk: docker
+app_port: 8501
+tags:
+  - streamlit
+pinned: false
+short_description: Streamlit template space
+license: mit
+---
+# 🤖 LinkedIn AI Career Assistant
+[![Hugging Face Spaces](https://img.shields.io/badge/🤗%20Hugging%20Face-Spaces-blue)](https://sri-vallabh-linkedin-profile-ai-assistant-app-ffuh1c.streamlit.app/)
+[![Python](https://img.shields.io/badge/Python-3.8+-blue.svg)](https://www.python.org/downloads/)
+[![Streamlit](https://img.shields.io/badge/Streamlit-1.28+-red.svg)](https://streamlit.io/)
+[![LangGraph](https://img.shields.io/badge/LangGraph-latest-green.svg)](https://langchain-ai.github.io/langgraph/)
+[![Groq](https://img.shields.io/badge/Groq-llama3--8b--8192-orange.svg)](https://groq.com/)
+An intelligent AI-powered career assistant that analyzes LinkedIn profiles, provides job fit analysis, and offers personalized career guidance through an interactive chat interface powered by Groq's llama3-8b-8192 model.
+## 🚀 **Live Demo**
+Try the application live at: **https://sri-vallabh-linkedin-profile-ai-assistant-app-ffuh1c.streamlit.app/**
+## 📋 **Table of Contents**
+- [Overview](#overview)
+- [Key Features](#key-features)
+- [Architecture](#architecture)
+- [Installation](#installation)
+- [Usage](#usage)
+- [Technical Implementation](#technical-implementation)
+- [API Keys Setup](#api-keys-setup)
+- [Session Management](#session-management)
+- [Contributing](#contributing)
+- [License](#license)
+## 🎯 **Overview**
+The LinkedIn AI Career Assistant is a sophisticated career optimization tool that combines Groq's powerful llama3-8b-8192 model with LangGraph's multi-agent framework to provide comprehensive LinkedIn profile analysis. Built using **Streamlit**, **LangGraph**, and **Groq API**, this application offers an interactive chat-based experience for professional career development.
+### **What Makes This Special?**
+- **🧠 Multi-Agent AI System**: Utilizes LangGraph to orchestrate specialized AI tools for different analysis tasks
+- **💾 Thread-Based Sessions**: Maintains conversation context with intelligent thread management based on LinkedIn URLs
+- **🎯 Job Fit Analysis**: Provides detailed match scores and improvement suggestions for target roles
+- **📊 Profile Analysis**: Comprehensive strengths and weaknesses assessment
+- **🔄 Real-time Scraping**: Fetches live LinkedIn profile data using Apify integration
+- **⚡ Groq-Powered**: Lightning-fast responses using Groq's optimized llama3-8b-8192 model
+## 🌟 **Key Features**
+### 1. **Interactive Chat Interface**
+- **LinkedIn URL Input**: Simply paste your LinkedIn profile URL to get started
+- **Conversational AI**: Natural language interaction for profile optimization
+- **Real-time Analysis**: Instant feedback and suggestions as you chat
+- **Custom Styling**: Modern chat bubble interface with professional design
+### 2. **Comprehensive Profile Analysis**
+- **Strengths Identification**: Highlights technical skills, projects, education, and soft skills
+- **Weakness Detection**: Identifies gaps in technical skills, experience, and missing context
+- **Actionable Suggestions**: Provides specific recommendations for profile enhancement
+- **Section-by-Section Access**: Detailed extraction of individual LinkedIn profile sections
+### 3. **Advanced Job Fit Analysis**
+- **Match Score Calculation**: Quantifies how well your profile fits target roles (0-100%)
+- **Skill Gap Analysis**: Identifies missing skills required for your target position
+- **Role-Specific Feedback**: Tailored suggestions for improving job compatibility
+- **Visual Score Display**: Circular progress indicators for match percentages
+### 4. **Intelligent Session Management**
+- **URL-Based Threading**: Automatically finds existing conversations for the same LinkedIn profile
+- **Session Continuity**: Choose to continue previous chats or start fresh
+- **SQLite Persistence**: Robust conversation storage with automatic checkpointing
+- **Thread Isolation**: Secure separation of different user sessions
+### 5. **Professional Data Handling**
+- **Pydantic Validation**: Robust data validation using structured schemas
+- **State Management**: Comprehensive state tracking across conversation flows
+- **Error Handling**: Graceful handling of API failures and data parsing issues
+- **Memory Optimization**: Efficient storage and retrieval of conversation context
+## 🏗️ **Architecture**
+### **Multi-Agent System Design**
+```
+┌─────────────────────────────────────────────────────────────┐
+│                    User Interface (Streamlit)               │
+│                     Custom Chat Interface                   │
+└─────────────────────┬─���─────────────────────────────────────┘
+                      │
+┌─────────────────────┴───────────────────────────────────────┐
+│                   LangGraph Orchestrator                    │
+│                    (ChatbotState Schema)                    │
+│  ┌─────────────────┬─────────────────┬─────────────────┐    │
+│  │   Chatbot Node  │  Profile Tool   │  Job Match Tool │    │
+│  │   (Router)      │   (Analyzer)    │   (Matcher)     │    │
+│  │                 │                 │                 │    │
+│  │  Extract Tool   │                 │                 │    │
+│  │  (Section Data) │                 │                 │    │
+│  └─────────────────┴─────────────────┴─────────────────┘    │
+└─────────────────────┬───────────────────────────────────────┘
+                      │
+┌─────────────────────┴───────────────────────────────────────┐
+│                    External Services                        │
+│  ┌─────────────────┬─────────────────┬─────────────────┐    │
+│  │  Apify LinkedIn │    Groq API     │   SQLite        │    │
+│  │    Scraper      │ (llama3-8b-8192)│  Checkpointer   │    │
+│  └─────────────────┴─────────────────┴─────────────────┘    │
+└─────────────────────────────────────────────────────────────┘
+```
+### **Core Components**
+1. **ChatBot Node**: Main conversation router with tool calling capabilities
+2. **Profile Analyzer**: Comprehensive profile evaluation for strengths and weaknesses
+3. **Job Matcher**: Role compatibility analysis with scoring and suggestions
+4. **Extract Tool**: Granular access to specific profile sections
+5. **State Management**: Pydantic-based ChatbotState with comprehensive field tracking
+6. **Thread System**: URL-based session identification and management
+## 🛠️ **Installation**
+### **Prerequisites**
+- Python 3.8 or higher
+- pip package manager
+- Groq API key
+- Apify API token
+### **Quick Start**
+1. **Clone the Repository**
+```bash
+git clone https://github.com/Sri-Vallabh/Linkedin-Profile-AI-Assistant.git
+cd Linkedin-Profile-AI-Assistant
+```
+2. **Install Dependencies**
+```bash
+pip install -r requirements.txt
+```
+3. **Set Up Environment Variables**
+```bash
+cp .env.example .env
+# Edit .env with your API keys
+```
+4. **Run the Application**
+```bash
+streamlit run app.py
+```
+5. **Access the Application**
+```
+Open your browser and go to: http://localhost:8501
+```
+### **Requirements**
+```txt
+streamlit>=1.28.0
+langchain>=0.0.350
+langchain-openai>=0.0.8
+langgraph>=0.0.55
+openai>=1.3.0
+pydantic>=2.0.0
+python-dotenv>=1.0.0
+apify-client>=1.0.0
+dirtyjson>=1.0.8
+```
+## 📖 **Usage**
+### **Getting Started**
+1. **Launch the Application**
+   - Open the application in your browser
+   - You'll see the main interface with a LinkedIn URL input field
+2. **Enter Your LinkedIn Profile**
+   - Paste your LinkedIn profile URL (e.g., `https://www.linkedin.com/in/your-profile/`)
+   - The system will automatically scrape and analyze your profile
+3. **Choose Session Mode**
+   - If a previous session exists, choose to continue or start fresh
+   - New sessions initialize with full profile preprocessing
+4. **Start Chatting**
+   - Begin conversations with queries like:
+     - "Analyze my profile strengths and weaknesses"
+     - "I want to apply for a Data Scientist role"
+     - "Show me my about section"
+     - "What skills am I missing for a Software Engineer position?"
+### **Available Commands**
+- **Profile Analysis**: "Analyze my profile" - Full strengths/weaknesses analysis
+- **Job Matching**: "I want to apply for [role]" - Match score and skill gaps
+- **Section Access**: "Show me my [section]" - Extract specific profile sections
+- **General Queries**: Ask any career-related questions for guidance
+### **Sample Conversations**
+```
+User: "Analyze my LinkedIn profile"
+AI: ✅ Profile analysis complete!
+💪 Strengths
+- Technical: Python, Machine Learning, Data Analysis
+- Projects: E-commerce recommendation system, Stock prediction model
+- Education: Computer Science degree, Data Science certification
+- Soft Skills: Problem-solving, Team collaboration
+⚠️ Weaknesses
+- Technical Gaps: Cloud computing platforms, MLOps tools
+- Project/Experience Gaps: Limited production deployment experience
+- Missing Context: Quantified project impacts and metrics
+🛠 Suggestions to improve
+- Add AWS/Azure cloud certifications
+- Include specific metrics for project outcomes
+- Highlight leadership or mentoring experiences
+```
+```
+User: "I want to apply for a Senior Data Scientist role"
+AI: 📊 Job Fit Analysis
+🎯 Target Role: Senior Data Scientist
+Match Score: 78%
+Missing Skills:
+• Deep Learning frameworks (TensorFlow, PyTorch)
+• MLOps and model deployment
+• Leadership and team management experience
+Suggestions:
+• Complete online courses in deep learning
+• Build projects showcasing end-to-end ML pipelines
+• Seek opportunities to lead junior team members
+```
+## 🔧 **Technical Implementation**
+### **State Management**
+The application uses a sophisticated Pydantic-based state management system:
+```python
+class ChatbotState(BaseModel):
+    profile: Dict[str, Any]  # Processed LinkedIn profile data
+    profile_url: Optional[str]  # Original LinkedIn URL
+    sections: Dict[str, str]  # Individual profile sections
+    enhanced_content: Dict[str, str]  # Future AI-generated improvements
+    profile_analysis: Optional[Dict[str, Any]]  # Strengths/weaknesses
+    job_fit: Optional[Dict[str, Any]]  # Job matching results
+    target_role: Optional[str]  # User's target job role
+    messages: Annotated[List[BaseMessage], add_messages]  # Chat history
+    next_tool_name: Optional[str]  # Tool routing information
+```
+### **Tool Integration**
+The system includes three specialized tools:
+1. **Profile Analyzer Tool**:
+   - Comprehensive profile evaluation
+   - Structured output with strengths, weaknesses, suggestions
+   - Uses ProfileAnalysisModel for validation
+2. **Job Matcher Tool**:
+   - Role-specific compatibility analysis
+   - Calculates match scores (0-100%)
+   - Identifies missing skills and provides suggestions
+3. **Extract Tool**:
+   - Granular access to profile sections
+   - Supports nested data extraction with dot notation
+   - Returns structured results for specific queries
+### **Session Architecture**
+- **Thread Management**: URL-based thread identification for session continuity
+- **Checkpointing**: SQLite-based persistent storage with automatic fallback
+- **State Validation**: Comprehensive Pydantic validation for data integrity
+- **Memory Optimization**: Efficient message history management
+### **LLM Integration**
+- **Model**: Groq's llama3-8b-8192 for fast, high-quality responses
+- **API**: OpenAI-compatible interface through Groq
+- **Tool Calling**: Native support for structured tool invocation
+- **Error Handling**: Robust retry mechanisms and graceful degradation
+## 🔑 **API Keys Setup**
+Create a `.env` file in the root directory:
+```env
+# Groq API Key (required)
+GROQ_API_KEY=your_groq_api_key_here
+# Apify API Token (required for LinkedIn scraping)
+APIFY_API_TOKEN=your_apify_token_here
+```
+### **Getting API Keys**
+1. **Groq API Key**:
+   - Visit [Groq Console](https://console.groq.com/)
+   - Create an account and generate an API key
+   - Used for llama3-8b-8192 model inference
+2. **Apify API Token**:
+   - Go to [Apify Console](https://console.apify.com/)
+   - Sign up and get your API token
+   - Used for LinkedIn profile scraping
+## 💾 **Session Management**
+The application implements intelligent session management:
+### **Thread-Based System**
+- Each LinkedIn profile URL gets a unique thread ID
+- Automatic detection of existing conversations for the same profile
+- Secure isolation between different user sessions
+### **Conversation Persistence**
+- SQLite-based storage for production environments
+- Memory-based fallback for development/testing
+- Automatic checkpointing after each interaction
+- Recovery capability in case of interruptions
+### **User Experience**
+- Choice to continue previous conversations or start fresh
+- Seamless transition between sessions
+- Maintained conversation context across browser refreshes
+## 🤝 **Contributing**
+We welcome contributions to improve the LinkedIn AI Career Assistant! Here's how you can help:
+### **Development Setup**
+1. Fork the repository
+2. Create a feature branch: `git checkout -b feature/your-feature-name`
+3. Make your changes and test thoroughly
+4. Submit a pull request with a clear description
+### **Areas for Contribution**
+- **Tool Enhancement**: Implement the commented-out content_generator tool
+- **UI/UX Improvements**: Enhance the Streamlit interface design
+- **Performance Optimization**: Improve response times and resource usage
+- **Testing**: Add comprehensive test coverage
+- **Documentation**: Expand examples and API documentation
+### **Code Style**
+- Follow PEP 8 guidelines for Python code
+- Use meaningful variable and function names
+- Add docstrings for all functions and classes
+- Include type hints where appropriate
+- Validate data models with Pydantic
+## 📝 **License**
+This project is licensed under the MIT License - see the [LICENSE](LICENSE) file for details.
+## 🙏 **Acknowledgments**
+- **Groq** for providing fast and efficient LLM inference
+- **LangChain/LangGraph** for the multi-agent framework
+- **Streamlit** for the web application framework
+- **Apify** for LinkedIn scraping capabilities
+- **Hugging Face** for hosting the live demo
+## 📞 **Support**
+For questions, issues, or suggestions:
+- **Create an Issue**: [GitHub Issues](https://github.com/Sri-Vallabh/Linkedin-Profile-AI-Assistant/issues)
+- **Discussions**: [GitHub Discussions](https://github.com/Sri-Vallabh/Linkedin-Profile-AI-Assistant/discussions)
+- **Email**: [email protected]
+## 🔄 **Recent Updates**
+- **v2.0**: Migrated to Groq API for faster inference
+- **Thread Management**: Implemented URL-based session tracking
+- **Enhanced UI**: Custom chat interface with professional styling
+- **Robust State**: Pydantic-based data validation and error handling
+- **Tool Optimization**: Streamlined to three core analysis tools
+---
+**Built with ❤️ by Sri Vallabh**
+*Empowering professionals to optimize their LinkedIn presence and advance their careers through AI-powered insights.*

app.py ADDED Viewed

	@@ -0,0 +1,643 @@

+import os
+import json
+import re
+import time
+from typing import Dict, Any, List, Optional, Annotated
+from chatbot_model import (
+    UserMemory,
+    ChatbotState,
+    ProfileAnalysisModel,
+    JobFitModel,
+    ContentGenerationModel,
+)
+from llm_utils import call_llm_and_parse
+from profile_preprocessing import (
+    preprocess_profile,
+    initialize_state,
+    normalize_url
+)
+from openai import OpenAI
+import streamlit as st
+import hashlib
+from dotenv import load_dotenv
+from pydantic import BaseModel, Field,ValidationError
+# import pdb; pdb.set_trace()
+from scraping_profile import scrape_linkedin_profile
+from langchain_openai import ChatOpenAI
+from langchain_core.messages import SystemMessage, HumanMessage, AIMessage,BaseMessage,ToolMessage
+from langchain_core.tools import tool
+from langgraph.graph import StateGraph, END,START
+from langgraph.checkpoint.memory import MemorySaver
+from langgraph.graph import add_messages  # if your framework exposes this
+from langgraph.prebuilt import ToolNode,tools_condition,InjectedState
+import dirtyjson
+import sqlite3
+try:
+    from langgraph.checkpoint.sqlite import SqliteSaver
+    SQLITE_AVAILABLE = True
+except ImportError:
+    SQLITE_AVAILABLE = False
+# ========== 1. ENVIRONMENT & LLM SETUP ==========
+load_dotenv()
+groq_key = os.getenv("GROQ_API_KEY")
+assert groq_key, "GROQ_API_KEY not found in environment!"
+groq_client=OpenAI(
+            api_key=os.getenv("GROQ_API_KEY"),
+            base_url="https://api.groq.com/openai/v1"
+        )
+def normalize_url(url):
+    return url.strip().rstrip('/')
+def validate_state(state: dict) -> None:
+    """
+    Validate given state dict against ChatbotState schema.
+    Displays result in Streamlit instead of printing.
+    """
+    # st.write("=== Validating chatbot state ===")
+    try:
+        ChatbotState.model_validate(state)
+        # st.success("✅ State is valid!")
+    except ValidationError as e:
+        st.error("❌ Validation failed!")
+        errors_list = []
+        for error in e.errors():
+            loc = " → ".join(str(item) for item in error['loc'])
+            msg = error['msg']
+            errors_list.append(f"- At: {loc}\n  Error: {msg}")
+        st.write("\n".join(errors_list))
+        # Optionally show raw validation error too:
+        st.expander("See raw validation error").write(str(e))
+        st.stop()
+user_memory = UserMemory()
+# ========== 7. AGENT FUNCTIONS ==========
+def profile_analysis_prompt(profile: Dict[str, str]) -> str:
+    return f"""
+You are a top-tier LinkedIn career coach and AI analyst.
+Analyze the following candidate profile carefully.
+Candidate profile data:
+FullName: {profile.get("FullName", "")}
+Headline: {profile.get("Headline", "")}
+JobTitle: {profile.get("JobTitle", "")}
+CompanyName: {profile.get("CompanyName", "")}
+CompanyIndustry: {profile.get("CompanyIndustry", "")}
+CurrentJobDuration: {profile.get("CurrentJobDuration", "")}
+About: {profile.get("About", "")}
+Experiences: {profile.get("Experiences", "")}
+Skills: {profile.get("Skills", "")}
+Educations: {profile.get("Educations", "")}
+Certifications: {profile.get("Certifications", "")}
+HonorsAndAwards: {profile.get("HonorsAndAwards", "")}
+Verifications: {profile.get("Verifications", "")}
+Highlights: {profile.get("Highlights", "")}
+Projects: {profile.get("Projects", "")}
+Publications: {profile.get("Publications", "")}
+Patents: {profile.get("Patents", "")}
+Courses: {profile.get("Courses", "")}
+TestScores: {profile.get("TestScores", "")}
+Identify and summarize:
+1. strengths:
+    - technical strengths (skills, tools, frameworks)
+    - project strengths (impactful projects, innovation)
+    - educational strengths (degrees, certifications, awards)
+    - soft skills and personality traits (teamwork, leadership)
+2. weaknesses:
+    - missing or weak technical skills
+    - gaps in projects, experience, or education
+    - unclear profile sections or missing context
+3. actionable suggestions:
+    - concrete ways to improve profile headline, about section, or add projects
+    - suggestions to learn or highlight new skills
+    - ideas to make the profile more attractive for recruiters
+Important instructions:
+- Respond ONLY with valid JSON.
+- Do NOT include text before or after JSON.
+- Be concise but detailed.
+Example JSON format:
+{{
+  "strengths": {{
+    "technical": ["...", "..."],
+    "projects": ["...", "..."],
+    "education": ["...", "..."],
+    "soft_skills": ["...", "..."]
+  }},
+  "weaknesses": {{
+    "technical_gaps": ["...", "..."],
+    "project_or_experience_gaps": ["...", "..."],
+    "missing_context": ["...", "..."]
+  }},
+  "suggestions": [
+    "...",
+    "...",
+    "..."
+  ]
+}}
+""".strip()
+def job_fit_prompt(sections: Dict[str, str], target_role: str) -> str:
+    return f"""
+You are an expert career coach and recruiter.
+Compare the following candidate profile against the typical requirements for the role of "{target_role}".
+Candidate Profile:
+- Headline: {sections.get('headline', '')}
+- About: {sections.get('about', '')}
+- Job Title: {sections.get('job_title', '')}
+- Company: {sections.get('company_name', '')}
+- Industry: {sections.get('company_industry', '')}
+- Current Job Duration: {sections.get('current_job_duration', '')}
+- Skills: {sections.get('skills', '')}
+- Projects: {sections.get('projects', '')}
+- Educations: {sections.get('educations', '')}
+- Certifications: {sections.get('certifications', '')}
+- Honors & Awards: {sections.get('honors_and_awards', '')}
+- Experiences: {sections.get('experiences', '')}
+**Instructions:**
+- Respond ONLY with valid JSON.
+- Your JSON must exactly match the following schema:
+{{
+  "match_score": 85,
+  "missing_skills": ["Skill1", "Skill2"],
+  "suggestions": ["...", "...", "..."]
+}}
+- "match_score": integer from 0–100 estimating how well the profile fits the target role.
+- "missing_skills": key missing or weakly mentioned skills.
+- "suggestions": 3 actionable recommendations to improve fit (e.g., learn tools, rewrite headline).
+Do NOT include explanations, text outside JSON, or markdown.
+Start with '{{' and end with '}}'.
+The JSON must be directly parseable.
+""".strip()
+# --- Tool: Profile Analyzer ---
+@tool
+def profile_analyzer(state: Annotated[ChatbotState, InjectedState]) -> dict:
+    """
+    Tool: Analyze the overall full user's profile to give strengths, weaknesses, suggestions.
+    This is needed only if full analysis of profile is needed.
+    Returns the full analysis in the form of a json.
+    - It takes no arguments
+    """
+    # Get summarized profile (dictionary of strings)
+    profile = getattr(state, "profile", {}) or {}
+    # Build prompt
+    prompt = profile_analysis_prompt(profile)
+    # Call the LLM & parse structured result
+    analysis_model = call_llm_and_parse(groq_client,prompt, ProfileAnalysisModel)
+    analysis_dict = analysis_model.model_dump()
+    # Save to state and user memory
+    state.profile_analysis = analysis_dict
+    user_memory.save("profile_analysis", analysis_dict)
+    print("💾 [DEBUG] Saved analysis to user memory.")
+    print("📦 [DEBUG] Updated state.profile_analysis with analysis.")
+    return analysis_dict
+# --- Tool: Job Matcher ---
+@tool
+def job_matcher(
+    state: Annotated[ChatbotState, InjectedState],
+    target_role: str = None
+) -> dict:
+    """
+    Tool: Analyze how well the user's profile fits the target role.
+    - If user is asking if he is a good fit for a certain role, or needs to see if his profile is compatible with a certain role, call this.
+    - Takes target_role as an argument.
+    - this tool is needed when match score, missing skills, suggestions are needed based on a job name given.
+    """
+    print(f"target role is {target_role}")
+    # Update state.target_role if provided
+    sections = getattr(state, "sections", {})
+    # Build prompt
+    prompt = job_fit_prompt(sections, target_role)
+    # Call LLM and parse
+    try:
+        job_fit_model = call_llm_and_parse(groq_client,prompt, JobFitModel)
+        job_fit_dict = job_fit_model.model_dump()
+        job_fit_dict["target_role"] = target_role
+    except Exception as e:
+        job_fit_dict = {
+            "target_role":target_role,
+            "match_score": 0,
+            "missing_skills": [],
+            "suggestions": ["Parsing failed or incomplete response."]
+        }
+    # Save to state and user memory
+    state.job_fit = job_fit_dict
+    user_memory.save("job_fit", job_fit_dict)
+    return job_fit_dict
+@tool
+def extract_from_state_tool(
+    state: Annotated[ChatbotState, InjectedState],
+    key: str
+) -> dict:
+    """
+    This tool is used if user wants to ask about any particular part of this profile. Use this if a singe section is targeted. It expects key as an arguement, that represents what
+    the user is wanting to look at, from his profile.
+    Argument:
+      key: only pass one from the below list, identify one thing the user wants to look into and choose that:
+        "sections.about", "sections.headline", "sections.skills", "sections.projects",
+        "sections.educations", "sections.certifications", "sections.honors_and_awards",
+        "sections.experiences", "sections.publications", "sections.patents",
+        "sections.courses", "sections.test_scores", "sections.verifications",
+        "sections.highlights", "sections.job_title", "sections.company_name",
+        "sections.company_industry", "sections.current_job_duration", "sections.full_name",
+        "enhanced_content,"profile_analysis", "job_fit", "target_role", "editing_section"
+    """
+    value = state
+    try:
+        for part in key.split('.'):
+            # Support both dict and Pydantic model
+            if isinstance(value, dict):
+                value = value.get(part)
+            elif hasattr(value, part):
+                value = getattr(value, part)
+            else:
+                value = None
+            if value is None:
+                break
+    except Exception:
+        value = None
+    return {"result": value}
+tools = [
+    profile_analyzer,
+   job_matcher,
+    extract_from_state_tool
+]
+llm = ChatOpenAI(
+    api_key=groq_key,
+    base_url="https://api.groq.com/openai/v1",
+    model="llama3-8b-8192",
+    temperature=0
+)
+llm_with_tools = llm.bind_tools(tools)
+# ========== 8. LANGGRAPH PIPELINE ==========
+def chatbot_node(state: ChatbotState) -> ChatbotState:
+    validate_state(state)
+    messages = state.get("messages", [])
+    system_prompt = """
+You are a helpful AI assistant specialized in LinkedIn profile coaching.
+You can:
+- Answer user questions.
+- If user is greeting , greet him back also telling how you can help him.
+- You should proactively use specialized tools whenever possible to give richer, data-driven answers.
+IMPORTANT RULES:
+- You must call at most one tool at a time.
+- Never call multiple tools together in the same step.
+- If user asks to show any section, use extract_from_state_tool, and after that, show the exact result from it.
+- If information about that section is already known, dont call extract_from_state_tool, directly answer the user query.
+- call profile_analyzer function only when full profile analysis is needed, otherwise rely on extract_from_state_tool.
+- If user asks to enhance any section, check if it is there in history, otherwise call extract_from_state_tool first.
+- Prefer to call a tool when answering instead of directly replying, especially if it can add new, useful insights or up-to-date data.
+- If a tool has been recently used and new info isn’t needed, you may answer directly.
+- Use tools to verify assumptions, enrich answers, or when the user asks about strengths, weaknesses, job fit, or wants improvements.
+Always respond helpfully, clearly, and with actionable advice to guide the user in improving their LinkedIn profile.
+"""
+    # Build messages & invoke LLM
+    messages = [SystemMessage(content=system_prompt)] + messages[-2:]
+    # messages = [SystemMessage(content=system_prompt)]
+    response = llm_with_tools.invoke(messages)
+    if hasattr(response, "tool_calls") and response.tool_calls:
+        first_tool = response.tool_calls[0]
+        tool_name = first_tool.get("name") if isinstance(first_tool, dict) else getattr(first_tool, "name", None)
+        tool_args = first_tool.get("args") if isinstance(first_tool, dict) else getattr(first_tool, "args", {})
+        print(f"[DEBBBBUUUUGGG] using tool {tool_name}")
+    # DEBUG
+    print("[DEBUG] LLM response:", response)
+    state.setdefault("messages", []).append(response)
+    return state
+# --- Graph definition ---
+graph = StateGraph(state_schema=ChatbotState)
+graph.add_node("chatbot", chatbot_node)
+graph.add_node("tools", ToolNode(tools))
+graph.add_edge(START, "chatbot")
+graph.add_conditional_edges("chatbot", tools_condition)
+graph.add_edge("tools","chatbot")
+graph.set_entry_point("chatbot")
+# --- Streamlit UI ---
+st.set_page_config(page_title="💼 LinkedIn AI Career Assistant", page_icon="🤖", layout="wide")
+st.title("🧑‍💼 LinkedIn AI Career Assistant")
+# --- Checkpointer and graph initialization ---
+if "checkpointer" not in st.session_state:
+    if SQLITE_AVAILABLE:
+        conn = sqlite3.connect("checkpoints1.db", check_same_thread=False)
+        st.session_state["checkpointer"] = SqliteSaver(conn)
+    else:
+        st.session_state["checkpointer"] = MemorySaver()
+checkpointer = st.session_state["checkpointer"]
+if "app_graph" not in st.session_state:
+    st.session_state["app_graph"] = graph.compile(checkpointer=checkpointer)
+app_graph = st.session_state["app_graph"]
+# Find or create thread
+def find_thread_id_for_url(checkpointer, url, max_threads=100):
+    search_url = normalize_url(url)
+    for tid in range(max_threads):
+        config = {"configurable": {"thread_id": str(tid), "checkpoint_ns": ""}}
+        state = checkpointer.get(config)
+        if state and "channel_values" in state:
+            user_state = state["channel_values"]
+            stored_url = normalize_url(user_state.get("profile_url", ""))
+            if stored_url == search_url:
+                return str(tid), user_state
+    return None, None
+def delete_thread_checkpoint(checkpointer, thread_id):
+    # For SqliteSaver, use the delete_thread method if available
+    if hasattr(checkpointer, "delete_thread"):
+        checkpointer.delete_thread(thread_id)
+    else:
+        # For in-memory or custom checkpointers, implement as needed
+        pass
+def get_next_thread_id(checkpointer, max_threads=100):
+    used = set()
+    for tid in range(max_threads):
+        config = {"configurable": {"thread_id": str(tid), "checkpoint_ns": ""}}
+        if checkpointer.get(config):
+            used.add(tid)
+    for tid in range(max_threads):
+        if tid not in used:
+            return str(tid)
+    raise RuntimeError("No available thread_id")
+# --- Session selection and state initialization ---
+if "chat_mode" not in st.session_state:
+    profile_url = st.text_input("Profile URL (e.g., https://www.linkedin.com/in/username/)")
+    if not profile_url:
+        st.info("Please enter a valid LinkedIn profile URL above to start.")
+        st.stop()
+    valid_pattern = r"^https://www\.linkedin\.com/in/[^/]+/?$"
+    if not re.match(valid_pattern, profile_url.strip()):
+        st.error("❌ Invalid LinkedIn profile URL. Make sure it matches the format.")
+        st.stop()
+    url = profile_url.strip()
+    existing_thread_id, previous_state = find_thread_id_for_url(checkpointer, url)
+    # Defensive: ensure required fields
+    required_fields = ["profile", "sections"]
+    if previous_state and not all(f in previous_state and previous_state[f] for f in required_fields):
+        st.warning("Previous session is missing required data. Please start a new chat.")
+        previous_state = None
+    if previous_state:
+        st.info("A previous session found. Choose:")
+        col1, col2 = st.columns(2)
+        if col1.button("Continue previous chat"):
+            st.session_state["chat_mode"] = "continue"
+            st.session_state["thread_id"] = existing_thread_id
+            st.session_state.state = previous_state
+            st.rerun()
+        elif col2.button("Start new chat"):
+            delete_thread_checkpoint(checkpointer, existing_thread_id)
+            with st.spinner("Fetching and processing profile... ⏳"):
+                raw=scrape_linkedin_profile(url)
+            thread_id = existing_thread_id
+            st.session_state["chat_mode"] = "new"
+            st.session_state["thread_id"] = thread_id
+            st.session_state.state = initialize_state(raw)
+            st.session_state.state["profile_url"] = normalize_url(url)
+            st.session_state.state["messages"] = []
+            st.rerun()
+        st.stop()
+    else:
+        with st.spinner("Fetching and processing profile... ⏳"):
+                raw=scrape_linkedin_profile(url)
+        thread_id = get_next_thread_id(checkpointer)
+        st.session_state["thread_id"] = thread_id
+        st.session_state["chat_mode"] = "new"
+        st.session_state.state = initialize_state(raw)
+        st.session_state.state["profile_url"] = normalize_url(url)
+        st.session_state.state["messages"] = []
+        st.rerun()
+# --- Main chat UI (only after chat_mode is set) ---
+state = st.session_state.state
+thread_id = st.session_state.get("thread_id")
+st.subheader("💬 Chat with your AI Assistant")
+messages = state.get("messages", [])
+chat_container = st.container()
+with chat_container:
+    st.markdown(
+        """
+        <style>
+        .chat-row { display: flex; width: 100%; margin-bottom: 12px; animation: fadeIn 0.5s; }
+        .chat-row.user { justify-content: flex-end; }
+        .chat-row.ai { justify-content: flex-start; }
+        .chat-bubble { font-family: 'Segoe UI', 'Roboto', 'Arial', sans-serif; font-size: 1.08rem; line-height: 1.65; padding: 14px 22px; border-radius: 20px; min-width: 60px; max-width: 75vw; box-shadow: 0 2px 12px rgba(0,0,0,0.10); word-break: break-word; display: inline-block; position: relative; margin-bottom: 2px; }
+        .bubble-user { background: linear-gradient(90deg, #43e97b 0%, #38f9d7 100%); color: #fff; border-bottom-right-radius: 6px; border-top-right-radius: 22px; text-align: right; box-shadow: 0 4px 16px rgba(67,233,123,0.13); }
+        .bubble-ai { background: linear-gradient(90deg, #e3f0ff 0%, #c9eaff 100%); color: #1a237e; border-bottom-left-radius: 6px; border-top-left-radius: 22px; text-align: left; border: 1.5px solid #b3e0fc; box-shadow: 0 4px 16px rgba(44, 62, 80, 0.08); }
+        .bubble-unknown { background: #fffbe6; color: #8a6d3b; border-radius: 14px; text-align: center; border: 1px solid #ffe082; display: inline-block; }
+        .sender-label { font-size: 0.93em; font-weight: 600; opacity: 0.7; margin-bottom: 4px; display: block; }
+        .avatar { width: 38px; height: 38px; border-radius: 50%; margin-right: 10px; margin-top: 2px; background: #e0e0e0; object-fit: cover; box-shadow: 0 2px 6px rgba(0,0,0,0.07); }
+        @keyframes fadeIn { from { opacity: 0; transform: translateY(12px);} to { opacity: 1; transform: translateY(0);} }
+        </style>
+        """,
+        unsafe_allow_html=True,
+    )
+    job_fit = state.get("job_fit")
+    for msg in messages:
+        if isinstance(msg, HumanMessage):
+            st.markdown(
+                f"""
+                <div class="chat-row user">
+                    <div class="chat-bubble bubble-user">
+                        <span class="sender-label">🧑‍💻 You</span>
+                        {msg.content}
+                    </div>
+                </div>
+                """,
+                unsafe_allow_html=True,
+            )
+        elif isinstance(msg, AIMessage):
+            if not msg.content or not msg.content.strip():
+                continue
+            st.markdown(
+                f"""
+                <div class="chat-row ai">
+                    <img class="avatar" src="https://img.icons8.com/ios-filled/50/1a237e/robot-2.png" alt="AI"/>
+                    <div class="chat-bubble bubble-ai">
+                        <span class="sender-label">🤖 AI</span>
+                        {msg.content}
+                    </div>
+                </div>
+                """,
+                unsafe_allow_html=True,
+            )
+        elif isinstance(msg, ToolMessage):
+            raw_content = msg.content or "(no content)"
+            try:
+                parsed = json.loads(raw_content)
+            except Exception:
+                parsed = None
+            if parsed and isinstance(parsed, dict):
+                # --- Profile analysis format ---
+                if all(k in parsed for k in ("strengths", "weaknesses", "suggestions")):
+                    strengths = parsed["strengths"]
+                    weaknesses = parsed["weaknesses"]
+                    suggestions = parsed["suggestions"]
+                    formatted = (
+                        "### 💪 **Strengths**\n"
+                        f"- **Technical:** {', '.join(strengths.get('technical', []) or ['None'])}\n"
+                        f"- **Projects:** {', '.join(strengths.get('projects', []) or ['None'])}\n"
+                        f"- **Education:** {', '.join(strengths.get('education', []) or ['None'])}\n"
+                        f"- **Soft Skills:** {', '.join(strengths.get('soft_skills', []) or ['None'])}\n\n"
+                        "### ⚠️ **Weaknesses**\n"
+                        f"- **Technical Gaps:** {', '.join(weaknesses.get('technical_gaps', []) or ['None'])}\n"
+                        f"- **Project/Experience Gaps:** {', '.join(weaknesses.get('project_or_experience_gaps', []) or ['None'])}\n"
+                        f"- **Missing Context:** {', '.join(weaknesses.get('missing_context', []) or ['None'])}\n\n"
+                        "### 🛠 **Suggestions to improve**\n"
+                        + "\n".join(f"- {s}" for s in suggestions)
+                    )
+                    st.markdown(f"""
+                        <div class="chat-row ai">
+                            <img class="avatar" src="https://img.icons8.com/ios-filled/50/1a237e/robot-2.png" alt="Tool"/>
+                            <div class="chat-bubble bubble-ai">
+                                <span class="sender-label">📊 Profile Analysis</span>
+                                {formatted}
+                            </div>
+                        </div>
+                    """, unsafe_allow_html=True)
+                # --- Job fit format ---
+                elif "match_score" in parsed:
+                    percent = parsed["match_score"]
+                    suggestions = parsed.get("suggestions", [])
+                    missing = parsed.get("missing_skills", [])
+                    target_role = parsed.get('target_role', 'unspecified')
+                    state["target_role"]=target_role
+                    suggestions_html = "<br>".join(f"• {s}" for s in suggestions)
+                    missing_html = "<br>".join(f"• {s}" for s in missing)
+                    st.markdown(f"""
+                        <div class="chat-row ai">
+                            <img class="avatar" src="https://img.icons8.com/ios-filled/50/1a237e/robot-2.png" alt="Tool"/>
+                            <div class="chat-bubble bubble-ai">
+                                <span class="sender-label">📊 Job Fit</span>
+                                <b>🎯 Target Role:</b> {target_role}<br>
+                                <div style="
+                                    width: 120px; height: 120px; border-radius: 50%;
+                                    background: conic-gradient(#25D366 {percent * 3.6}deg, #e0e0e0 0deg);
+                                    display: flex; align-items: center; justify-content: center;
+                                    font-size: 1.8rem; color: #333; margin: 10px auto;">
+                                    {percent}%
+                                </div>
+                                <b>Missing Skills:</b><br>{missing_html}<br><br>
+                                <b>Suggestions:</b><br>{suggestions_html}
+                            </div>
+                        </div>
+                    """, unsafe_allow_html=True)
+                # --- Section text format ---
+                elif "result" in parsed:
+                    text = parsed["result"]
+                    st.markdown(f"""
+                        <div class="chat-row ai">
+                            <img class="avatar" src="https://img.icons8.com/ios-filled/50/1a237e/robot-2.png" alt="Tool"/>
+                            <div class="chat-bubble bubble-ai">
+                                <span class="sender-label">📄 Section Content</span>
+                                {text}
+                            </div>
+                        </div>
+                    """, unsafe_allow_html=True)
+        else:
+            st.markdown(
+                f"""
+                <div class="chat-row">
+                    <div class="chat-bubble bubble-unknown">
+                        <span class="sender-label">⚠️ Unknown</span>
+                        {getattr(msg, 'content', str(msg))}
+                    </div>
+                </div>
+                """,
+                unsafe_allow_html=True,
+            )
+    st.markdown('<div style="clear:both"></div>', unsafe_allow_html=True)
+st.markdown("---")
+user_input = st.chat_input(
+    placeholder="Ask about your LinkedIn profile, e.g., 'Analyze my profile, how do I fit for AI role, how is my about section?'"
+)
+if user_input and user_input.strip():
+    state.setdefault("messages", []).append(HumanMessage(content=user_input.strip()))
+    validate_state(state)
+    thread_id = st.session_state.get("thread_id")
+    config = {"configurable": {"thread_id": thread_id}}
+    with st.spinner("Processing your request..."):
+        st.session_state.state = app_graph.invoke(state, config)
+    st.rerun()

chatbot_model.py ADDED Viewed

	@@ -0,0 +1,130 @@

+from typing import List, Dict, Any, Optional, Annotated
+from pydantic import BaseModel, Field
+from langchain_core.messages import BaseMessage
+from langgraph.graph import add_messages
+class ChatbotState(BaseModel):
+    def get(self, key, default=None):
+        """
+        Allow dict-like .get() access for compatibility.
+        """
+        # First try attribute directly
+        if hasattr(self, key):
+            return getattr(self, key)
+        # Fallback: check if it's in __dict__
+        return self.__dict__.get(key, default)
+    def setdefault(self, key, default):
+        """
+        Dict-like setdefault: if attribute is None, set it to default and return it.
+        Otherwise, return existing value.
+        """
+        if hasattr(self, key):
+            value = getattr(self, key)
+            if value is None:
+                setattr(self, key, default)
+                return default
+            return value
+        else:
+            # attribute does not exist: set it
+            setattr(self, key, default)
+            return default
+    profile: Dict[str, Any] = Field(..., description="Preprocessed / summarized profile data")
+    profile_url: Optional[str] = Field(
+        default=None,
+        description="Original LinkedIn profile URL provided by the user."
+    )
+    # Quick access sections (about, headline, skills etc.)
+    sections: Dict[str, str] = Field(..., description="Flattened profile sections for quick access")
+    # Enhancements and analysis results
+    enhanced_content: Dict[str, str] = Field(
+    default_factory=dict,
+    description=(
+        "Map of improved or rewritten profile sections generated by the ContentGenerator tool. "
+        "Keys are section names (e.g., 'about', 'headline'); values are enhanced text."
+    )
+)
+    profile_analysis: Optional[Dict[str, Any]] = Field(
+        None,
+        description=(
+            "Structured analysis of the user's profile produced by the ProfileAnalyzer tool, "
+            "including strengths, weaknesses, and actionable suggestions."
+        )
+    )
+    job_fit: Optional[Dict[str, Any]] = Field(
+        None,
+        description=(
+            "Assessment result from the JobMatcher tool, detailing how well the user's profile matches "
+            "the target role, including missing skills and match score."
+        )
+    )
+    target_role: Optional[str] = Field(
+        None,
+        description=(
+            "Target job role the user is aiming for. "
+            "Can be set by the user directly during the conversation or inferred by the chatbot."
+        )
+    )
+    editing_section: Optional[str] = Field(
+        None,
+        description=(
+            "Name of the profile section currently being edited or improved, "
+            "set dynamically when the ContentGenerator tool is invoked."
+        )
+    )
+    next_tool_name: Optional[str] = Field(
+        default=None,
+        description="Name of the next tool the chatbot wants to call, set dynamically after LLM response."
+    )
+    # Annotated chat history directly using BaseMessage
+    messages: Annotated[List[BaseMessage], add_messages] = Field(
+    default_factory=list,
+    description="List of user and assistant messages"
+)
+class ProfileAnalysisStrengths(BaseModel):
+    technical: List[str]
+    projects: List[str]
+    education: List[str]
+    soft_skills: List[str]
+class ProfileAnalysisWeaknesses(BaseModel):
+    technical_gaps: List[str]
+    project_or_experience_gaps: List[str]
+    missing_context: List[str]
+class ProfileAnalysisModel(BaseModel):
+    strengths: ProfileAnalysisStrengths
+    weaknesses: ProfileAnalysisWeaknesses
+    suggestions: List[str]
+class JobFitModel(BaseModel):
+    match_score: int = Field(..., ge=0, le=100)
+    missing_skills: List[str]
+    suggestions: List[str]
+class ContentGenerationModel(BaseModel):
+    new_content: str
+# ========== 6. MEMORY SETUP ==========
+class UserMemory:
+    def __init__(self):
+        self.profile = None
+        self.target_roles = []
+        self.history = []
+    def save(self, key, value):
+        self.history.append((key, value))
+    def get_history(self):
+        return self.history

llm_utils.py ADDED Viewed

	@@ -0,0 +1,83 @@

+import time
+from typing import Type, Union, Dict, Any
+from pydantic import BaseModel
+import dirtyjson
+import re
+# Make sure you install dirtyjson: pip install dirtyjson
+# === Optionally, import your Groq client from where you configure it ===
+# === Helper function ===
+def call_llm_and_parse(
+    groq_client,
+    prompt: str,
+    model: Type[BaseModel],
+    max_retries: int = 3,
+    delay: float = 1.0
+) -> Union[BaseModel, Dict[str, Any]]:
+    """
+    Call LLM with a prompt, parse the JSON response, and validate it using a Pydantic model.
+    Args:
+        prompt (str): The prompt to send to the LLM.
+        model (Type[BaseModel]): The Pydantic model to validate against.
+        max_retries (int, optional): Number of retries on failure. Default is 3.
+        delay (float, optional): Delay (in seconds) between retries, multiplied by attempt count.
+    Returns:
+        BaseModel: Validated Pydantic model instance if successful.
+        dict: Contains 'error' and 'raw' fields if validation fails after retries.
+    """
+    for attempt in range(1, max_retries + 1):
+        try:
+            print(f"[call_llm_and_parse] Attempt {attempt}: sending prompt to LLM...")
+            completion = groq_client.chat.completions.create(
+                model="llama3-8b-8192",
+                messages=[{"role": "user", "content": prompt}],
+                temperature=0.3,
+                max_tokens=800
+            )
+            response_text = completion.choices[0].message.content
+            print(f"[call_llm_and_parse] Raw LLM response: {response_text[:200]}...")  # first 200 chars
+            # Extract JSON (handle dirty or partial JSON)
+            json_str = extract_and_repair_json(response_text)
+            # Parse JSON using dirtyjson
+            parsed = dirtyjson.loads(json_str)
+            # Validate with Pydantic
+            validated = model.model_validate(parsed)
+            print("[call_llm_and_parse] Successfully parsed and validated.")
+            return validated
+        except Exception as e:
+            print(f"[Retry {attempt}] Error: {e}")
+            if attempt < max_retries:
+                time.sleep(delay * attempt)
+            else:
+                print("[call_llm_and_parse] Failed after retries.")
+                return {
+                    "error": f"Validation failed after {max_retries} retries: {e}",
+                    "raw": json_str if 'json_str' in locals() else response_text
+                }
+def extract_and_repair_json(text: str) -> str:
+    """
+    Extracts JSON starting from first '{' and balances braces.
+    """
+    match = re.search(r'\{[\s\S]*', text)
+    if not match:
+        raise ValueError("No JSON object found.")
+    json_str = match.group()
+    # Fix unmatched braces
+    open_braces = json_str.count('{')
+    close_braces = json_str.count('}')
+    if open_braces > close_braces:
+        json_str += '}' * (open_braces - close_braces)
+    return json_str

profile_preprocessing.py ADDED Viewed

	@@ -0,0 +1,127 @@

+from typing import List, Dict, Any
+from urllib.parse import urlparse
+# ========== 3. PROFILE PREPROCESSING HELPERS ==========
+def normalize_url(url):
+    return url.strip().rstrip('/')
+def summarize_skills(skills: List[Dict]) -> str:
+    return ', '.join([s.get('title', '') for s in skills if s.get('title')])
+def summarize_projects(projects: List[Dict]) -> str:
+    summaries = []
+    for p in projects:
+        title = p.get('title', '')
+        desc = ''
+        if p.get('subComponents'):
+            for comp in p['subComponents']:
+                for d in comp.get('description', []):
+                    if d.get('type') == 'textComponent':
+                        desc += d.get('text', '') + ' '
+        summaries.append(f"{title}: {desc.strip()}")
+    return '\n'.join(summaries)
+def summarize_educations(educations: List[Dict]) -> str:
+    return ', '.join([
+        f"{e.get('title', '')} ({e.get('subtitle', '')}, {e.get('caption', '')})"
+        for e in educations if e.get('title')
+    ])
+def summarize_certs(certs: List[Dict]) -> str:
+    return ', '.join([
+        f"{c.get('title', '')} ({c.get('subtitle', '')}, {c.get('caption', '')})"
+        for c in certs if c.get('title')
+    ])
+def summarize_test_scores(scores: List[Dict]) -> str:
+    return ', '.join([
+        f"{s.get('title', '')} ({s.get('subtitle', '')})"
+        for s in scores if s.get('title')
+    ])
+def summarize_generic(items: List[Dict], key='title') -> str:
+    return ', '.join([item.get(key, '') for item in items if item.get(key)])
+# === Preprocess raw profile into summarized profile ===
+def preprocess_profile(raw_profile: Dict[str, Any]) -> Dict[str, str]:
+    return {
+        "FullName": raw_profile.get("fullName", ""),
+        "profile_url": raw_profile.get("linkedinUrl",""),
+        "Headline": raw_profile.get("headline", ""),
+        "JobTitle": raw_profile.get("jobTitle", ""),
+        "CompanyName": raw_profile.get("companyName", ""),
+        "CompanyIndustry": raw_profile.get("companyIndustry", ""),
+        "CurrentJobDuration": str(raw_profile.get("currentJobDuration", "")),
+        "About": raw_profile.get("about", ""),
+        "Experiences": summarize_generic(raw_profile.get("experiences", []), key='title'),
+        "Skills": summarize_skills(raw_profile.get("skills", [])),
+        "Educations": summarize_educations(raw_profile.get("educations", [])),
+        "Certifications": summarize_certs(raw_profile.get("licenseAndCertificates", [])),
+        "HonorsAndAwards": summarize_generic(raw_profile.get("honorsAndAwards", []), key='title'),
+        "Verifications": summarize_generic(raw_profile.get("verifications", []), key='title'),
+        "Highlights": summarize_generic(raw_profile.get("highlights", []), key='title'),
+        "Projects": summarize_projects(raw_profile.get("projects", [])),
+        "Publications": summarize_generic(raw_profile.get("publications", []), key='title'),
+        "Patents": summarize_generic(raw_profile.get("patents", []), key='title'),
+        "Courses": summarize_generic(raw_profile.get("courses", []), key='title'),
+        "TestScores": summarize_test_scores(raw_profile.get("testScores", []))
+    }
+# === Create & fill state ===
+def initialize_state(raw_profile: Dict[str, Any]) -> Dict[str,Any]:
+    """
+    Initializes the chatbot state used in LangGraph:
+    - Keeps both raw and processed profile
+    - Splits important sections for quick access
+    - Initializes placeholders for tool outputs
+    - Adds empty chat history for conversation context
+    """
+    # Your preprocessing function that cleans / normalizes scraped profile
+    profile = preprocess_profile(raw_profile)
+    print(f"initializing url as {profile['profile_url']}")
+    state: Dict[str, Any] = {
+        "profile": profile,             # Cleaned & normalized profile
+        "profile_url": normalize_url(profile.get("profile_url","") or ""),
+        # === Separate sections (make sure all are strings, never None) ===
+        "sections": {
+            "about": profile.get("About", "") or "",
+            "headline": profile.get("Headline", "") or "",
+            "skills": profile.get("Skills", "") or "",
+            "projects": profile.get("Projects", "") or "",
+            "educations": profile.get("Educations", "") or "",
+            "certifications": profile.get("Certifications", "") or "",
+            "honors_and_awards": profile.get("HonorsAndAwards", "") or "",
+            "experiences": profile.get("Experiences", "") or "",
+            "publications": profile.get("Publications", "") or "",
+            "patents": profile.get("Patents", "") or "",
+            "courses": profile.get("Courses", "") or "",
+            "test_scores": profile.get("TestScores", "") or "",
+            "verifications": profile.get("Verifications", "") or "",
+            "highlights": profile.get("Highlights", "") or "",
+            "job_title": profile.get("JobTitle", "") or "",
+            "company_name": profile.get("CompanyName", "") or "",
+            "company_industry": profile.get("CompanyIndustry", "") or "",
+            "current_job_duration": profile.get("CurrentJobDuration", "") or "",
+            "full_name": profile.get("FullName", "") or ""
+        },
+        # === Placeholders populated by tools ===
+        "enhanced_content": {},        # Populated by ContentGenerator tool
+        "profile_analysis": None,      # Can be None initially (Optional)
+        "job_fit": None,               # Can be None initially (Optional)
+        "target_role": None,           # Optional[str]
+        "editing_section": None,       # Optional[str]
+        # === Chat history ===
+        # Pydantic expects list of dicts like {"role": "user", "content": "..."}
+        "messages": [],
+        "next_tool_name": None
+    }
+    return state

requirements.txt ADDED Viewed

	@@ -0,0 +1,31 @@

+apify-client
+python-dotenv
+streamlit
+langchain
+openai
+python-dotenv
+langchain_openai
+# Core Python packages
+python-dotenv
+streamlit
+pydantic
+# LangChain ecosystem
+langchain-core
+langchain-openai
+langgraph
+langgraph-checkpoint
+# For OpenAI-compatible LLMs (Groq, etc.)
+openai
+# For parsing "dirty" JSON
+dirtyjson
+typing-extensions
+tqdm

scraping_profile.py ADDED Viewed

	@@ -0,0 +1,42 @@

+from apify_client import ApifyClient
+from dotenv import load_dotenv
+import os
+import json
+# Load environment variables
+load_dotenv()
+# Get API token
+api_token = os.getenv("APIFY_API_TOKEN")
+# Initialize client once (global)
+client = ApifyClient(api_token)
+def scrape_linkedin_profile(profile_url: str) -> dict:
+    """
+    📄 Scrapes a LinkedIn profile using Apify and returns the data as a Python dict.
+    """
+    try:
+        run_input = {"profileUrls": [profile_url]}
+        run = client.actor("dev_fusion/Linkedin-Profile-Scraper").call(run_input=run_input)
+        items = list(client.dataset(run["defaultDatasetId"]).iterate_items())
+        if items:
+            with open("scraped_profile.json", "w") as f:
+                json.dump(items[0], f, indent=2)
+            return items[0]
+        else:
+            print("⚠️ No data found in dataset.")
+            return {}
+    except Exception as e:
+        print(f"❌ Error during scraping: {e}")
+        return {}
+# 🧪 OPTIONAL: test code only runs when this file is executed directly
+if __name__ == "__main__":
+    test_url = "https://www.linkedin.com/in/sri-vallabh-tammireddy/"
+    profile = scrape_linkedin_profile(test_url)
+    print(json.dumps(profile, indent=2))