tsrivallabh's picture
Upload 10 files
5318b09 verified
raw
history blame
26.5 kB
import os
import json
import re
import time
from typing import Dict, Any, List, Optional, Annotated
from chatbot_model import (
UserMemory,
ChatbotState,
ProfileAnalysisModel,
JobFitModel,
ContentGenerationModel,
)
from llm_utils import call_llm_and_parse
from profile_preprocessing import (
preprocess_profile,
initialize_state,
normalize_url
)
from openai import OpenAI
import streamlit as st
import hashlib
from dotenv import load_dotenv
from pydantic import BaseModel, Field,ValidationError
# import pdb; pdb.set_trace()
from scraping_profile import scrape_linkedin_profile
from langchain_openai import ChatOpenAI
from langchain_core.messages import SystemMessage, HumanMessage, AIMessage,BaseMessage,ToolMessage
from langchain_core.tools import tool
from langgraph.graph import StateGraph, END,START
from langgraph.checkpoint.memory import MemorySaver
from langgraph.graph import add_messages # if your framework exposes this
from langgraph.prebuilt import ToolNode,tools_condition,InjectedState
import dirtyjson
import sqlite3
try:
from langgraph.checkpoint.sqlite import SqliteSaver
SQLITE_AVAILABLE = True
except ImportError:
SQLITE_AVAILABLE = False
# ========== 1. ENVIRONMENT & LLM SETUP ==========
load_dotenv()
groq_key = os.getenv("GROQ_API_KEY")
assert groq_key, "GROQ_API_KEY not found in environment!"
groq_client=OpenAI(
api_key=os.getenv("GROQ_API_KEY"),
base_url="https://api.groq.com/openai/v1"
)
def normalize_url(url):
return url.strip().rstrip('/')
def validate_state(state: dict) -> None:
"""
Validate given state dict against ChatbotState schema.
Displays result in Streamlit instead of printing.
"""
# st.write("=== Validating chatbot state ===")
try:
ChatbotState.model_validate(state)
# st.success("βœ… State is valid!")
except ValidationError as e:
st.error("❌ Validation failed!")
errors_list = []
for error in e.errors():
loc = " β†’ ".join(str(item) for item in error['loc'])
msg = error['msg']
errors_list.append(f"- At: {loc}\n Error: {msg}")
st.write("\n".join(errors_list))
# Optionally show raw validation error too:
st.expander("See raw validation error").write(str(e))
st.stop()
user_memory = UserMemory()
# ========== 7. AGENT FUNCTIONS ==========
def profile_analysis_prompt(profile: Dict[str, str]) -> str:
return f"""
You are a top-tier LinkedIn career coach and AI analyst.
Analyze the following candidate profile carefully.
Candidate profile data:
FullName: {profile.get("FullName", "")}
Headline: {profile.get("Headline", "")}
JobTitle: {profile.get("JobTitle", "")}
CompanyName: {profile.get("CompanyName", "")}
CompanyIndustry: {profile.get("CompanyIndustry", "")}
CurrentJobDuration: {profile.get("CurrentJobDuration", "")}
About: {profile.get("About", "")}
Experiences: {profile.get("Experiences", "")}
Skills: {profile.get("Skills", "")}
Educations: {profile.get("Educations", "")}
Certifications: {profile.get("Certifications", "")}
HonorsAndAwards: {profile.get("HonorsAndAwards", "")}
Verifications: {profile.get("Verifications", "")}
Highlights: {profile.get("Highlights", "")}
Projects: {profile.get("Projects", "")}
Publications: {profile.get("Publications", "")}
Patents: {profile.get("Patents", "")}
Courses: {profile.get("Courses", "")}
TestScores: {profile.get("TestScores", "")}
Identify and summarize:
1. strengths:
- technical strengths (skills, tools, frameworks)
- project strengths (impactful projects, innovation)
- educational strengths (degrees, certifications, awards)
- soft skills and personality traits (teamwork, leadership)
2. weaknesses:
- missing or weak technical skills
- gaps in projects, experience, or education
- unclear profile sections or missing context
3. actionable suggestions:
- concrete ways to improve profile headline, about section, or add projects
- suggestions to learn or highlight new skills
- ideas to make the profile more attractive for recruiters
Important instructions:
- Respond ONLY with valid JSON.
- Do NOT include text before or after JSON.
- Be concise but detailed.
Example JSON format:
{{
"strengths": {{
"technical": ["...", "..."],
"projects": ["...", "..."],
"education": ["...", "..."],
"soft_skills": ["...", "..."]
}},
"weaknesses": {{
"technical_gaps": ["...", "..."],
"project_or_experience_gaps": ["...", "..."],
"missing_context": ["...", "..."]
}},
"suggestions": [
"...",
"...",
"..."
]
}}
""".strip()
def job_fit_prompt(sections: Dict[str, str], target_role: str) -> str:
return f"""
You are an expert career coach and recruiter.
Compare the following candidate profile against the typical requirements for the role of "{target_role}".
Candidate Profile:
- Headline: {sections.get('headline', '')}
- About: {sections.get('about', '')}
- Job Title: {sections.get('job_title', '')}
- Company: {sections.get('company_name', '')}
- Industry: {sections.get('company_industry', '')}
- Current Job Duration: {sections.get('current_job_duration', '')}
- Skills: {sections.get('skills', '')}
- Projects: {sections.get('projects', '')}
- Educations: {sections.get('educations', '')}
- Certifications: {sections.get('certifications', '')}
- Honors & Awards: {sections.get('honors_and_awards', '')}
- Experiences: {sections.get('experiences', '')}
**Instructions:**
- Respond ONLY with valid JSON.
- Your JSON must exactly match the following schema:
{{
"match_score": 85,
"missing_skills": ["Skill1", "Skill2"],
"suggestions": ["...", "...", "..."]
}}
- "match_score": integer from 0–100 estimating how well the profile fits the target role.
- "missing_skills": key missing or weakly mentioned skills.
- "suggestions": 3 actionable recommendations to improve fit (e.g., learn tools, rewrite headline).
Do NOT include explanations, text outside JSON, or markdown.
Start with '{{' and end with '}}'.
The JSON must be directly parseable.
""".strip()
# --- Tool: Profile Analyzer ---
@tool
def profile_analyzer(state: Annotated[ChatbotState, InjectedState]) -> dict:
"""
Tool: Analyze the overall full user's profile to give strengths, weaknesses, suggestions.
This is needed only if full analysis of profile is needed.
Returns the full analysis in the form of a json.
- It takes no arguments
"""
# Get summarized profile (dictionary of strings)
profile = getattr(state, "profile", {}) or {}
# Build prompt
prompt = profile_analysis_prompt(profile)
# Call the LLM & parse structured result
analysis_model = call_llm_and_parse(groq_client,prompt, ProfileAnalysisModel)
analysis_dict = analysis_model.model_dump()
# Save to state and user memory
state.profile_analysis = analysis_dict
user_memory.save("profile_analysis", analysis_dict)
print("πŸ’Ύ [DEBUG] Saved analysis to user memory.")
print("πŸ“¦ [DEBUG] Updated state.profile_analysis with analysis.")
return analysis_dict
# --- Tool: Job Matcher ---
@tool
def job_matcher(
state: Annotated[ChatbotState, InjectedState],
target_role: str = None
) -> dict:
"""
Tool: Analyze how well the user's profile fits the target role.
- If user is asking if he is a good fit for a certain role, or needs to see if his profile is compatible with a certain role, call this.
- Takes target_role as an argument.
- this tool is needed when match score, missing skills, suggestions are needed based on a job name given.
"""
print(f"target role is {target_role}")
# Update state.target_role if provided
sections = getattr(state, "sections", {})
# Build prompt
prompt = job_fit_prompt(sections, target_role)
# Call LLM and parse
try:
job_fit_model = call_llm_and_parse(groq_client,prompt, JobFitModel)
job_fit_dict = job_fit_model.model_dump()
job_fit_dict["target_role"] = target_role
except Exception as e:
job_fit_dict = {
"target_role":target_role,
"match_score": 0,
"missing_skills": [],
"suggestions": ["Parsing failed or incomplete response."]
}
# Save to state and user memory
state.job_fit = job_fit_dict
user_memory.save("job_fit", job_fit_dict)
return job_fit_dict
@tool
def extract_from_state_tool(
state: Annotated[ChatbotState, InjectedState],
key: str
) -> dict:
"""
This tool is used if user wants to ask about any particular part of this profile. Use this if a singe section is targeted. It expects key as an arguement, that represents what
the user is wanting to look at, from his profile.
Argument:
key: only pass one from the below list, identify one thing the user wants to look into and choose that:
"sections.about", "sections.headline", "sections.skills", "sections.projects",
"sections.educations", "sections.certifications", "sections.honors_and_awards",
"sections.experiences", "sections.publications", "sections.patents",
"sections.courses", "sections.test_scores", "sections.verifications",
"sections.highlights", "sections.job_title", "sections.company_name",
"sections.company_industry", "sections.current_job_duration", "sections.full_name",
"enhanced_content,"profile_analysis", "job_fit", "target_role", "editing_section"
"""
value = state
try:
for part in key.split('.'):
# Support both dict and Pydantic model
if isinstance(value, dict):
value = value.get(part)
elif hasattr(value, part):
value = getattr(value, part)
else:
value = None
if value is None:
break
except Exception:
value = None
return {"result": value}
tools = [
profile_analyzer,
job_matcher,
extract_from_state_tool
]
llm = ChatOpenAI(
api_key=groq_key,
base_url="https://api.groq.com/openai/v1",
model="llama3-8b-8192",
temperature=0
)
llm_with_tools = llm.bind_tools(tools)
# ========== 8. LANGGRAPH PIPELINE ==========
def chatbot_node(state: ChatbotState) -> ChatbotState:
validate_state(state)
messages = state.get("messages", [])
system_prompt = """
You are a helpful AI assistant specialized in LinkedIn profile coaching.
You can:
- Answer user questions.
- If user is greeting , greet him back also telling how you can help him.
- You should proactively use specialized tools whenever possible to give richer, data-driven answers.
IMPORTANT RULES:
- You must call at most one tool at a time.
- Never call multiple tools together in the same step.
- If user asks to show any section, use extract_from_state_tool, and after that, show the exact result from it.
- If information about that section is already known, dont call extract_from_state_tool, directly answer the user query.
- call profile_analyzer function only when full profile analysis is needed, otherwise rely on extract_from_state_tool.
- If user asks to enhance any section, check if it is there in history, otherwise call extract_from_state_tool first.
- Prefer to call a tool when answering instead of directly replying, especially if it can add new, useful insights or up-to-date data.
- If a tool has been recently used and new info isn’t needed, you may answer directly.
- Use tools to verify assumptions, enrich answers, or when the user asks about strengths, weaknesses, job fit, or wants improvements.
Always respond helpfully, clearly, and with actionable advice to guide the user in improving their LinkedIn profile.
"""
# Build messages & invoke LLM
messages = [SystemMessage(content=system_prompt)] + messages[-2:]
# messages = [SystemMessage(content=system_prompt)]
response = llm_with_tools.invoke(messages)
if hasattr(response, "tool_calls") and response.tool_calls:
first_tool = response.tool_calls[0]
tool_name = first_tool.get("name") if isinstance(first_tool, dict) else getattr(first_tool, "name", None)
tool_args = first_tool.get("args") if isinstance(first_tool, dict) else getattr(first_tool, "args", {})
print(f"[DEBBBBUUUUGGG] using tool {tool_name}")
# DEBUG
print("[DEBUG] LLM response:", response)
state.setdefault("messages", []).append(response)
return state
# --- Graph definition ---
graph = StateGraph(state_schema=ChatbotState)
graph.add_node("chatbot", chatbot_node)
graph.add_node("tools", ToolNode(tools))
graph.add_edge(START, "chatbot")
graph.add_conditional_edges("chatbot", tools_condition)
graph.add_edge("tools","chatbot")
graph.set_entry_point("chatbot")
# --- Streamlit UI ---
st.set_page_config(page_title="πŸ’Ό LinkedIn AI Career Assistant", page_icon="πŸ€–", layout="wide")
st.title("πŸ§‘β€πŸ’Ό LinkedIn AI Career Assistant")
# --- Checkpointer and graph initialization ---
if "checkpointer" not in st.session_state:
if SQLITE_AVAILABLE:
conn = sqlite3.connect("checkpoints1.db", check_same_thread=False)
st.session_state["checkpointer"] = SqliteSaver(conn)
else:
st.session_state["checkpointer"] = MemorySaver()
checkpointer = st.session_state["checkpointer"]
if "app_graph" not in st.session_state:
st.session_state["app_graph"] = graph.compile(checkpointer=checkpointer)
app_graph = st.session_state["app_graph"]
# Find or create thread
def find_thread_id_for_url(checkpointer, url, max_threads=100):
search_url = normalize_url(url)
for tid in range(max_threads):
config = {"configurable": {"thread_id": str(tid), "checkpoint_ns": ""}}
state = checkpointer.get(config)
if state and "channel_values" in state:
user_state = state["channel_values"]
stored_url = normalize_url(user_state.get("profile_url", ""))
if stored_url == search_url:
return str(tid), user_state
return None, None
def delete_thread_checkpoint(checkpointer, thread_id):
# For SqliteSaver, use the delete_thread method if available
if hasattr(checkpointer, "delete_thread"):
checkpointer.delete_thread(thread_id)
else:
# For in-memory or custom checkpointers, implement as needed
pass
def get_next_thread_id(checkpointer, max_threads=100):
used = set()
for tid in range(max_threads):
config = {"configurable": {"thread_id": str(tid), "checkpoint_ns": ""}}
if checkpointer.get(config):
used.add(tid)
for tid in range(max_threads):
if tid not in used:
return str(tid)
raise RuntimeError("No available thread_id")
# --- Session selection and state initialization ---
if "chat_mode" not in st.session_state:
profile_url = st.text_input("Profile URL (e.g., https://www.linkedin.com/in/username/)")
if not profile_url:
st.info("Please enter a valid LinkedIn profile URL above to start.")
st.stop()
valid_pattern = r"^https://www\.linkedin\.com/in/[^/]+/?$"
if not re.match(valid_pattern, profile_url.strip()):
st.error("❌ Invalid LinkedIn profile URL. Make sure it matches the format.")
st.stop()
url = profile_url.strip()
existing_thread_id, previous_state = find_thread_id_for_url(checkpointer, url)
# Defensive: ensure required fields
required_fields = ["profile", "sections"]
if previous_state and not all(f in previous_state and previous_state[f] for f in required_fields):
st.warning("Previous session is missing required data. Please start a new chat.")
previous_state = None
if previous_state:
st.info("A previous session found. Choose:")
col1, col2 = st.columns(2)
if col1.button("Continue previous chat"):
st.session_state["chat_mode"] = "continue"
st.session_state["thread_id"] = existing_thread_id
st.session_state.state = previous_state
st.rerun()
elif col2.button("Start new chat"):
delete_thread_checkpoint(checkpointer, existing_thread_id)
with st.spinner("Fetching and processing profile... ⏳"):
raw=scrape_linkedin_profile(url)
thread_id = existing_thread_id
st.session_state["chat_mode"] = "new"
st.session_state["thread_id"] = thread_id
st.session_state.state = initialize_state(raw)
st.session_state.state["profile_url"] = normalize_url(url)
st.session_state.state["messages"] = []
st.rerun()
st.stop()
else:
with st.spinner("Fetching and processing profile... ⏳"):
raw=scrape_linkedin_profile(url)
thread_id = get_next_thread_id(checkpointer)
st.session_state["thread_id"] = thread_id
st.session_state["chat_mode"] = "new"
st.session_state.state = initialize_state(raw)
st.session_state.state["profile_url"] = normalize_url(url)
st.session_state.state["messages"] = []
st.rerun()
# --- Main chat UI (only after chat_mode is set) ---
state = st.session_state.state
thread_id = st.session_state.get("thread_id")
st.subheader("πŸ’¬ Chat with your AI Assistant")
messages = state.get("messages", [])
chat_container = st.container()
with chat_container:
st.markdown(
"""
<style>
.chat-row { display: flex; width: 100%; margin-bottom: 12px; animation: fadeIn 0.5s; }
.chat-row.user { justify-content: flex-end; }
.chat-row.ai { justify-content: flex-start; }
.chat-bubble { font-family: 'Segoe UI', 'Roboto', 'Arial', sans-serif; font-size: 1.08rem; line-height: 1.65; padding: 14px 22px; border-radius: 20px; min-width: 60px; max-width: 75vw; box-shadow: 0 2px 12px rgba(0,0,0,0.10); word-break: break-word; display: inline-block; position: relative; margin-bottom: 2px; }
.bubble-user { background: linear-gradient(90deg, #43e97b 0%, #38f9d7 100%); color: #fff; border-bottom-right-radius: 6px; border-top-right-radius: 22px; text-align: right; box-shadow: 0 4px 16px rgba(67,233,123,0.13); }
.bubble-ai { background: linear-gradient(90deg, #e3f0ff 0%, #c9eaff 100%); color: #1a237e; border-bottom-left-radius: 6px; border-top-left-radius: 22px; text-align: left; border: 1.5px solid #b3e0fc; box-shadow: 0 4px 16px rgba(44, 62, 80, 0.08); }
.bubble-unknown { background: #fffbe6; color: #8a6d3b; border-radius: 14px; text-align: center; border: 1px solid #ffe082; display: inline-block; }
.sender-label { font-size: 0.93em; font-weight: 600; opacity: 0.7; margin-bottom: 4px; display: block; }
.avatar { width: 38px; height: 38px; border-radius: 50%; margin-right: 10px; margin-top: 2px; background: #e0e0e0; object-fit: cover; box-shadow: 0 2px 6px rgba(0,0,0,0.07); }
@keyframes fadeIn { from { opacity: 0; transform: translateY(12px);} to { opacity: 1; transform: translateY(0);} }
</style>
""",
unsafe_allow_html=True,
)
job_fit = state.get("job_fit")
for msg in messages:
if isinstance(msg, HumanMessage):
st.markdown(
f"""
<div class="chat-row user">
<div class="chat-bubble bubble-user">
<span class="sender-label">πŸ§‘β€πŸ’» You</span>
{msg.content}
</div>
</div>
""",
unsafe_allow_html=True,
)
elif isinstance(msg, AIMessage):
if not msg.content or not msg.content.strip():
continue
st.markdown(
f"""
<div class="chat-row ai">
<img class="avatar" src="https://img.icons8.com/ios-filled/50/1a237e/robot-2.png" alt="AI"/>
<div class="chat-bubble bubble-ai">
<span class="sender-label">πŸ€– AI</span>
{msg.content}
</div>
</div>
""",
unsafe_allow_html=True,
)
elif isinstance(msg, ToolMessage):
raw_content = msg.content or "(no content)"
try:
parsed = json.loads(raw_content)
except Exception:
parsed = None
if parsed and isinstance(parsed, dict):
# --- Profile analysis format ---
if all(k in parsed for k in ("strengths", "weaknesses", "suggestions")):
strengths = parsed["strengths"]
weaknesses = parsed["weaknesses"]
suggestions = parsed["suggestions"]
formatted = (
"### πŸ’ͺ **Strengths**\n"
f"- **Technical:** {', '.join(strengths.get('technical', []) or ['None'])}\n"
f"- **Projects:** {', '.join(strengths.get('projects', []) or ['None'])}\n"
f"- **Education:** {', '.join(strengths.get('education', []) or ['None'])}\n"
f"- **Soft Skills:** {', '.join(strengths.get('soft_skills', []) or ['None'])}\n\n"
"### ⚠️ **Weaknesses**\n"
f"- **Technical Gaps:** {', '.join(weaknesses.get('technical_gaps', []) or ['None'])}\n"
f"- **Project/Experience Gaps:** {', '.join(weaknesses.get('project_or_experience_gaps', []) or ['None'])}\n"
f"- **Missing Context:** {', '.join(weaknesses.get('missing_context', []) or ['None'])}\n\n"
"### πŸ›  **Suggestions to improve**\n"
+ "\n".join(f"- {s}" for s in suggestions)
)
st.markdown(f"""
<div class="chat-row ai">
<img class="avatar" src="https://img.icons8.com/ios-filled/50/1a237e/robot-2.png" alt="Tool"/>
<div class="chat-bubble bubble-ai">
<span class="sender-label">πŸ“Š Profile Analysis</span>
{formatted}
</div>
</div>
""", unsafe_allow_html=True)
# --- Job fit format ---
elif "match_score" in parsed:
percent = parsed["match_score"]
suggestions = parsed.get("suggestions", [])
missing = parsed.get("missing_skills", [])
target_role = parsed.get('target_role', 'unspecified')
state["target_role"]=target_role
suggestions_html = "<br>".join(f"β€’ {s}" for s in suggestions)
missing_html = "<br>".join(f"β€’ {s}" for s in missing)
st.markdown(f"""
<div class="chat-row ai">
<img class="avatar" src="https://img.icons8.com/ios-filled/50/1a237e/robot-2.png" alt="Tool"/>
<div class="chat-bubble bubble-ai">
<span class="sender-label">πŸ“Š Job Fit</span>
<b>🎯 Target Role:</b> {target_role}<br>
<div style="
width: 120px; height: 120px; border-radius: 50%;
background: conic-gradient(#25D366 {percent * 3.6}deg, #e0e0e0 0deg);
display: flex; align-items: center; justify-content: center;
font-size: 1.8rem; color: #333; margin: 10px auto;">
{percent}%
</div>
<b>Missing Skills:</b><br>{missing_html}<br><br>
<b>Suggestions:</b><br>{suggestions_html}
</div>
</div>
""", unsafe_allow_html=True)
# --- Section text format ---
elif "result" in parsed:
text = parsed["result"]
st.markdown(f"""
<div class="chat-row ai">
<img class="avatar" src="https://img.icons8.com/ios-filled/50/1a237e/robot-2.png" alt="Tool"/>
<div class="chat-bubble bubble-ai">
<span class="sender-label">πŸ“„ Section Content</span>
{text}
</div>
</div>
""", unsafe_allow_html=True)
else:
st.markdown(
f"""
<div class="chat-row">
<div class="chat-bubble bubble-unknown">
<span class="sender-label">⚠️ Unknown</span>
{getattr(msg, 'content', str(msg))}
</div>
</div>
""",
unsafe_allow_html=True,
)
st.markdown('<div style="clear:both"></div>', unsafe_allow_html=True)
st.markdown("---")
user_input = st.chat_input(
placeholder="Ask about your LinkedIn profile, e.g., 'Analyze my profile, how do I fit for AI role, how is my about section?'"
)
if user_input and user_input.strip():
state.setdefault("messages", []).append(HumanMessage(content=user_input.strip()))
validate_state(state)
thread_id = st.session_state.get("thread_id")
config = {"configurable": {"thread_id": thread_id}}
with st.spinner("Processing your request..."):
st.session_state.state = app_graph.invoke(state, config)
st.rerun()