Spaces:

AYS11231
/

BioChat2

Sleeping

File size: 10,271 Bytes

0af0679

{
 "cells": [
  {
   "cell_type": "markdown",
   "metadata": {},
   "source": [
    "# Multi-model Evaluation LinkedIn Summary and FAQ"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 1,
   "metadata": {},
   "outputs": [
    {
     "data": {
      "text/plain": [
       "True"
      ]
     },
     "execution_count": 1,
     "metadata": {},
     "output_type": "execute_result"
    }
   ],
   "source": [
    "import os\n",
    "import gradio as gr\n",
    "from dotenv import load_dotenv\n",
    "from pypdf import PdfReader\n",
    "from pathlib import Path\n",
    "from IPython.display import Markdown, display\n",
    "from anthropic import Anthropic\n",
    "from openai import OpenAI  # Used here to call Ollama-compatible API and Google Gemini\n",
    "\n",
    "load_dotenv(override=True)"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 2,
   "metadata": {},
   "outputs": [
    {
     "name": "stdout",
     "output_type": "stream",
     "text": [
      "OpenAI API Key not set\n",
      "Anthropic API Key exists and begins sk-ant-\n",
      "Google API Key exists and begins AI\n",
      "DeepSeek API Key not set (and this is optional)\n",
      "Groq API Key exists and begins gsk_\n"
     ]
    }
   ],
   "source": [
    "# Print the key prefixes to help with any debugging\n",
    "\n",
    "openai_api_key = os.getenv('OPENAI_API_KEY')\n",
    "anthropic_api_key = os.getenv('ANTHROPIC_API_KEY')\n",
    "google_api_key = os.getenv('GOOGLE_API_KEY')\n",
    "deepseek_api_key = os.getenv('DEEPSEEK_API_KEY')\n",
    "groq_api_key = os.getenv('GROQ_API_KEY')\n",
    "\n",
    "if openai_api_key:\n",
    "    print(f\"OpenAI API Key exists and begins {openai_api_key[:8]}\")\n",
    "else:\n",
    "    print(\"OpenAI API Key not set\")\n",
    "    \n",
    "if anthropic_api_key:\n",
    "    print(f\"Anthropic API Key exists and begins {anthropic_api_key[:7]}\")\n",
    "else:\n",
    "    print(\"Anthropic API Key not set (and this is optional)\")\n",
    "\n",
    "if google_api_key:\n",
    "    print(f\"Google API Key exists and begins {google_api_key[:2]}\")\n",
    "else:\n",
    "    print(\"Google API Key not set (and this is optional)\")\n",
    "\n",
    "if deepseek_api_key:\n",
    "    print(f\"DeepSeek API Key exists and begins {deepseek_api_key[:3]}\")\n",
    "else:\n",
    "    print(\"DeepSeek API Key not set (and this is optional)\")\n",
    "\n",
    "if groq_api_key:\n",
    "    print(f\"Groq API Key exists and begins {groq_api_key[:4]}\")\n",
    "else:\n",
    "    print(\"Groq API Key not set (and this is optional)\")"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 6,
   "metadata": {},
   "outputs": [],
   "source": [
    "anthropic = Anthropic()\n",
    "\n",
    "# === Load PDF and extract resume text ===\n",
    "\n",
    "reader = PdfReader(\"../claude_based_chatbot_tc/me/linkedin.pdf\")\n",
    "linkedin = \"\"\n",
    "for page in reader.pages:\n",
    "    text = page.extract_text()\n",
    "    if text:\n",
    "        linkedin += text\n",
    "\n",
    "# === Create the shared FAQ generation prompt ===\n",
    "faq_prompt = (\n",
    "    \"Please read the following professional background and resume content carefully. \"\n",
    "    \"Based on this information, generate a well-structured FAQ (Frequently Asked Questions) document that reflects the subject’s professional background.\\n\\n\"\n",
    "    \"== RESUME TEXT START ==\\n\"\n",
    "    f\"{linkedin}\\n\"\n",
    "    \"== RESUME TEXT END ==\\n\\n\"\n",
    "\n",
    "    \"**Instructions:**\\n\"\n",
    "    \"- Write at least 15 FAQs.\\n\"\n",
    "    \"- Each entry should be in the format:\\n\"\n",
    "    \"  - Q: [Question here]\\n\"\n",
    "    \"  - A: [Answer here]\\n\"\n",
    "    \"- Focus on real-world questions that recruiters, collaborators, or website visitors would ask.\\n\"\n",
    "    \"- Be concise, accurate, and use only the information in the resume. Do not speculate or invent details.\\n\"\n",
    "    \"- Use a professional tone suitable for publishing on a personal website.\\n\\n\"\n",
    "\n",
    "    \"Output only the FAQ content. Do not include commentary, headers, or formatting outside of the Q/A list.\"\n",
    ")\n",
    "\n",
    "messages = [{\"role\": \"user\", \"content\": faq_prompt}]\n",
    "evaluators = []\n",
    "answers = []\n",
    "\n"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": null,
   "metadata": {},
   "outputs": [],
   "source": [
    "# Anthropic API Call\n",
    "\n",
    "model_name = \"claude-3-7-sonnet-latest\"\n",
    "\n",
    "claude = Anthropic()\n",
    "faq_prompt = claude.messages.create(\n",
    "    model=model_name, \n",
    "    messages=messages, \n",
    "    max_tokens=1000\n",
    ")\n",
    "\n",
    "faq_answer = faq_prompt.content[0].text\n",
    "\n",
    "display(Markdown(faq_answer))\n",
    "evaluators.append(model_name)\n",
    "answers.append(faq_answer)"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": null,
   "metadata": {},
   "outputs": [],
   "source": [
    "# === 2. Google Gemini Call ===\n",
    "\n",
    "gemini = OpenAI(api_key=google_api_key, base_url=\"https://generativelanguage.googleapis.com/v1beta/openai/\")\n",
    "model_name = \"gemini-2.5-flash\"\n",
    "\n",
    "faq_prompt = gemini.chat.completions.create(model=model_name, messages=messages)\n",
    "faq_answer = faq_prompt.choices[0].message.content\n",
    "\n",
    "display(Markdown(faq_answer))\n",
    "evaluators.append(model_name)\n",
    "answers.append(faq_answer)\n"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": null,
   "metadata": {},
   "outputs": [],
   "source": [
    "# === 2. Ollama Groq Call ===\n",
    "\n",
    "groq = OpenAI(api_key=groq_api_key, base_url=\"https://api.groq.com/openai/v1\")\n",
    "model_name = \"llama-3.3-70b-versatile\"\n",
    "\n",
    "faq_prompt = groq.chat.completions.create(model=model_name, messages=messages)\n",
    "faq_answer = faq_prompt.choices[0].message.content\n",
    "\n",
    "display(Markdown(faq_answer))\n",
    "evaluators.append(model_name)\n",
    "answers.append(faq_answer)"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": null,
   "metadata": {},
   "outputs": [],
   "source": [
    "# It's nice to know how to use \"zip\"\n",
    "\n",
    "for evaluator, answer in zip(evaluators, answers):\n",
    "    print(f\"Evaluator: {evaluator}\\n\\n{answer}\")\n",
    "\n",
    "\n",
    "# Let's bring this together - note the use of \"enumerate\"\n",
    "\n",
    "together = \"\"\n",
    "for index, answer in enumerate(answers):\n",
    "    together += f\"# Response from evaluator {index+1}\\n\\n\"\n",
    "    together += answer + \"\\n\\n\""
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 15,
   "metadata": {},
   "outputs": [],
   "source": [
    "formatter = f\"\"\"You are a meticulous AI evaluator tasked with synthesizing multiple assistant-generated career FAQs and summaries into one high-quality file. You have received {len(evaluators)} drafts based on the same resume, each containing a 2-line summary and a set of FAQ questions with answers.\n",
    "\n",
    "---\n",
    "**Original Request:**\n",
    "\"{faq_prompt}\"\n",
    "---\n",
    "\n",
    "Your goal is to combine the strongest parts of each submission into a single, polished output. This will be the final `faq.txt` that lives in a public-facing portfolio folder.\n",
    "\n",
    "**Evaluation & Synthesis Instructions:**\n",
    "\n",
    "1. **Prioritize Accuracy:** Only include information clearly supported by the resume. Do not invent or speculate.\n",
    "2. **Best Questions Only:** Select the most relevant and insightful FAQ questions. Discard weak, redundant, or generic ones.\n",
    "3. **Edit for Quality:** Improve the clarity and fluency of answers. Fix grammar, wording, or formatting inconsistencies.\n",
    "4. **Merge Strengths:** If two assistants answer the same question differently, combine the best phrasing and facts from each.\n",
    "5. **Consistency in Voice:** Ensure a single professional tone throughout the summary and FAQ.\n",
    "\n",
    "**Required Output Structure:**\n",
    "\n",
    "1. **2-Line Summary:** Start with the best or synthesized version of the summary, capturing key career strengths.\n",
    "2. **FAQ Entries:** Follow with at least 8–12 strong FAQ entries in this format:\n",
    "\n",
    "Q: [Question]  \n",
    "A: [Answer]\n",
    "\n",
    "---\n",
    "**Examples of Strong FAQ Topics:**\n",
    "- Key technical skills or languages\n",
    "- Past projects or employers\n",
    "- Teamwork or communication style\n",
    "- Remote work or leadership experience\n",
    "- Career goals or current availability\n",
    "\n",
    "This will be saved as a plain text file (`faq.txt`). Ensure the tone is accurate, clean, and helpful. Do not add unnecessary commentary or meta-analysis. The final version should look like it was written by a professional assistant who knows the subject well.\n",
    "\"\"\"\n",
    "\n",
    "formatter_messages = [{\"role\": \"user\", \"content\": formatter}]"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": null,
   "metadata": {},
   "outputs": [],
   "source": [
    "# === 1. Final (Claude) API Call ===\n",
    "anthropic = Anthropic(api_key=anthropic_api_key)\n",
    "faq_prompt = anthropic.messages.create(\n",
    "    model=\"claude-3-7-sonnet-latest\",\n",
    "    messages=formatter_messages,\n",
    "    max_tokens=1000,\n",
    ")\n",
    "results = faq_prompt.content[0].text\n",
    "display(Markdown(results))\n"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": null,
   "metadata": {},
   "outputs": [],
   "source": [
    "gr.ChatInterface(results, type=\"messages\").launch()"
   ]
  }
 ],
 "metadata": {
  "kernelspec": {
   "display_name": ".venv",
   "language": "python",
   "name": "python3"
  },
  "language_info": {
   "codemirror_mode": {
    "name": "ipython",
    "version": 3
   },
   "file_extension": ".py",
   "mimetype": "text/x-python",
   "name": "python",
   "nbconvert_exporter": "python",
   "pygments_lexer": "ipython3",
   "version": "3.12.10"
  }
 },
 "nbformat": 4,
 "nbformat_minor": 2
}