Spaces:

BillyZ1129
/

Billy_Space

Sleeping

App Files Files Community

BillyZ1129 commited on Apr 17

Commit

9f48a22

verified ·

1 Parent(s): 5274cf6

Update app.py

Browse files

Files changed (1) hide show

app.py +23 -417

app.py CHANGED Viewed

@@ -1,424 +1,30 @@
-from flask import Flask, render_template, request, jsonify
-from werkzeug.utils import secure_filename
-from openai import OpenAI
-from io import BytesIO
-import PyPDF2
-from pdfminer.high_level import extract_text
-from docx import Document
-import os
-import re
-import uuid
-from typing import Tuple
-import pdfplumber
-app = Flask(__name__)
-app.config['UPLOAD_FOLDER'] = '/home/billy1129/resume_optimizer/static/uploads'
-os.makedirs(app.config['UPLOAD_FOLDER'], exist_ok=True)
-app.config['MAX_CONTENT_LENGTH'] = 16 * 1024 * 1024  # 16MB限制
-# 初始化Azure OpenAI客户端
-client = OpenAI(
-    base_url="https://api.deepseek.com",
-    api_key="sk-bc73223a36d240758af12bf4a197a3be"
-)
-def safe_filename(filename: str) -> str:
-    """安全处理文件名，保留中文字符"""
-    filename = re.sub(r'[^\w\u4e00-\u9fff\-\.]', '', filename.strip())
-    name, ext = os.path.splitext(filename)
-    random_str = uuid.uuid4().hex[:6]
-    return f"{name}_{random_str}{ext}"
-def extract_text_from_pdf(file_stream: BytesIO) -> str:
-    """混合提取方案，增强去重处理（包括标点符号和括号）"""
-    def process_duplicates(text: str) -> str:
-        """处理各种重复字符（中文、英文、标点、括号等）"""
-        # 处理中文重复（包括全角标点）
-        text = re.sub(r'([\u4e00-\u9fff])\1+', r'\1', text)
-        # 处理常见标点符号重复（包括全角和半角）
-        text = re.sub(r'([，。、；："“”‘’\'\"\(\)\[\]\{\}\<\>])\1+', r'\1', text)
-        # 处理特殊重复模式（如"（（"变成"（"）
-        text = re.sub(r'(（)\1+', r'\1', text)
-        text = re.sub(r'(）)\1+', r'\1', text)
-        return text
-    try:
-        # 优先尝试pdfplumber
-        try:
-            file_stream.seek(0)
-            with pdfplumber.open(file_stream) as pdf:
-                text = "\n".join([
-                    page.extract_text(x_tolerance=2, y_tolerance=2)
-                    for page in pdf.pages
-                    if page.extract_text()
-                ])
-            print("========= pdfplumber 原始提取内容 =========")
-            print(text)
-            text = process_duplicates(text)
-            print("========= 去重处理后内容 =========")
-            print(text)
-            return text.strip()
-        except Exception as e:
-            print(f"pdfplumber提取失败，尝试PyPDF2: {str(e)}")
-            # 备用方案：PyPDF2+去重
-            file_stream.seek(0)
-            reader = PyPDF2.PdfReader(file_stream)
-            text = '\n'.join({
-                line.strip()
-                for page in reader.pages
-                for line in (page.extract_text() or "").split('\n')
-                if line.strip()
-            })
-            print("========= PyPDF2 原始提取内容 =========")
-            print(text)
-            text = process_duplicates(text)
-            print("========= 去重处理后内容 =========")
-            print(text)
-            return text
-    except Exception as e:
-        raise ValueError(f"PDF解析失败: {str(e)}")
-def extract_text_from_word(file_stream: BytesIO) -> str:
-    """从Word文档提取文本"""
-    try:
-        file_stream.seek(0)
-        doc = Document(file_stream)
-        return "\n".join([para.text for para in doc.paragraphs if para.text])
-    except Exception as e:
-        raise ValueError(f"Word解析失败: {str(e)}")
-def extract_text_from_file(file_stream: BytesIO, filename: str) -> str:
-    """从上传文件提取文本内容"""
-    if filename.lower().endswith('.pdf'):
-        return extract_text_from_pdf(file_stream)
-    elif filename.lower().endswith(('.doc', '.docx')):
-        return extract_text_from_word(file_stream)
-    elif filename.lower().endswith('.txt'):
-        file_stream.seek(0)
-        return file_stream.read().decode('utf-8', errors='ignore')
-    else:
-        raise ValueError("不支持的文件格式")
-def analyze_resume_with_ai(text: str, job_position: str = None) -> Tuple[list, int]:
-    """使用OpenAI分析简历文本并评分"""
-    MAX_TOKENS = 120000
-    if len(text) > MAX_TOKENS * 3.5:
-        return ["简历内容过长，请简化内容"], 0
-    # 根据岗位生成针对性提示
-    job_specific_prompt = ""
-    if job_position:
-        job_specific_prompt = f"""
-[岗位针对性分析]
-目标岗位: {job_position}
-请特别关注以下与目标岗位相关的评估维度:
-1. 专业技能匹配度: 检查简历中是否包含该岗位的核心技能关键词
-2. 项目经验相关性: 评估项目经验与目标岗位的匹配程度
-3. 行业术语使用: 检查是否使用了该岗位领域的专业术语
-4. 成就量化标准: 根据该岗位特点评估成就描述的量化程度
-"""
-    prompt = f"""请严格按照以下四部分分析简历,严格遵循格式:
-{job_specific_prompt if job_specific_prompt else ""}
-[总扣分]
-总扣分: XX分  # 必须单独一行明确写出总扣分值
-[扣分项]
-请列出简历的所有扣分项，每一项必须明确指出扣分项在简历中的位置，扣分数量，并给出具体改进建议，将扣分项和建议放在【缺点】中输出给用户。
-请严格遵循以下评分标准中的扣分规则，最后在第一行，计算总扣分量，格式为"总扣分: XX分"。
-[整体总结]
-用一段话来整体概括这篇简历的优缺点，特别关注与目标岗位的匹配度。
-[优点]
-• 优点1 (特别标注与目标岗位相关的优势)
-• 优点2
-[缺点]
-• 具体位置(简历第几行或哪个部分): 具体问题 (具体改进建议) (-X分)
-• 具体位置(简历第几行或哪个部分): 具体问题 (具体改进建议) (-X分)
-确保在[缺点]部分之后不输出任何其他内容
-评分标准：
-高质量简历评分标准（基于STAR法则和岗位匹配度）
-一、基础信息完整性（满分15分）
-必备信息要求：
-姓名
-联系方式（电话、邮箱）
-住址信息（至少提供省份或城市）
-评分细则：
-每项必备信息均完整且正确：得满分15分。
-缺失任一项：扣5分；如缺失两项及以上，累计扣分，但最低分为0分。
-二、内容结构与逻辑性（满分25分）
-结构要求：
-简历需清晰划分区域，如个人信息、教育经历、工作经历、技能、项目经验等。
-每一区块内容需符合逻辑，信息层次分明。
-评分细则：
-每个必备区域（至少5个区域）均明确标识并合理排序：每个区域得5分，区域缺失或模糊者扣5分。
-在每个区域内，要求描述具备逻辑性和条理性，出现明显逻辑混乱（如叙述前后矛盾或顺序混乱）者，每处扣2分，累计扣分不超过该区域分值。
-三、专业技能及关键词匹配（满分30分）
-匹配要求：
-简历中必须明确列出与目标职位直接相关的核心技能或关键词（建议不少于3项，最多5项计分）。
-评分细则：
-每列出一项与目标职位高度匹配的技能或关键词，得6分（最多计5项得分）。
-如未列出任何相关技能或关键词，直接扣30分。
-若关键词存在但与目标职位匹配度较低或描述不清晰，依据实际情况酌情扣分（每项扣分范围为2-4分）。
-【新增】岗位相关关键词匹配度额外评分：
-• 完全匹配目标岗位核心技能：每项+2分（最高+10分）
-• 部分匹配目标岗位次要技能：每项+1分（最高+5分）
-四、工作成就与项目描述（满分20分，必须遵循STAR法则）
-要求说明：
-每段工作经历或项目描述必须完整包含：
-Situation（情境）： 说明工作/项目背景与挑战。
-Task（任务）： 说明你在该情境下需要完成的任务。
-Action（行动）： 描述为解决问题所采取的具体措施。
-Result（结果）： 列出取得的成果和影响（最好附量化指标）。
-评分细则：
-每完整描述一项工作或项目经历且具备STAR所有要素：得5分，最多计4项得分。
-若工作或项目描述存在缺失或不清晰（例如缺少关键STAR元素），则每项扣2-5分（依据缺失程度和信息模糊程度）。
-如果简历完全没有相关描述，直接扣20分。
-【新增】岗位相关项目经验额外评分：
-• 高度相关项目：每项+3分（最高+9分）
-• 部分相关项目：每项+1分（最高+3分）
-五、语言表达及排版质量（满分10分）
-表达与排版要求：
-整体语言表达准确、专业，无明显错别字或语法错误。
-排版整洁、格式统一，避免混乱或信息堆砌。
-评分细则：
-排版格式符合要求，得5分；若出现明显格式错误或杂乱，每项错误扣1至5分，累计扣分最高5分。
-语言表达无错别字或语法错误，得5分；每出现一处错别字或语法错误扣1分（最多扣5分）。
-简历内容：
-{text[:30000]}{'...' if len(text) > 30000 else ''}"""
-    try:
-        response = client.chat.completions.create(
-            model="deepseek-chat",
-            messages=[
-                {"role": "system", "content": "你是一位严格的简历评估专家。你必须严格按照评分标准进行评分和扣分，并明确指出每个缺点在简历中的具体位置。总分为100分，最终分数 = 100 - 总扣分。"},
-                {"role": "user", "content": prompt}
-            ],
-            temperature=1,
-            stream=False
-        )
-        content = response.choices[0].message.content
-        # 输出原始 AI 响应以便调试
-        print("======== RAW AI RESPONSE ========")
-        print(content)
-        print("=================================")
-        # 解析响应内容
-        lines = [line.strip() for line in content.split('\n') if line.strip()]
-        suggestions = []
-        deduction_points = 0
-        current_section = None
-        # 首先查找显式的总扣分声明
-        total_deduction_match = None
-        for line in lines:
-            total_deduction_match = re.search(r'总扣分[:：]\s*(\d+)分', line)
-            if total_deduction_match:
-                deduction_points = int(total_deduction_match.group(1))
-                break
-        # 如果没有找到显式总扣分，则尝试从缺点部分累加
-        if total_deduction_match is None:
-            in_cons_section = False
-            for line in lines:
-                if re.match(r'^\[?缺点\]?', line, re.IGNORECASE):
-                    in_cons_section = True
-                    continue
-                if in_cons_section:
-                    deduction_match = re.search(r'\(-(\d+)分\)', line)
-                    if deduction_match:
-                        deduction_points += int(deduction_match.group(1))
-        # 确保扣分值在合理范围内
-        deduction_points = max(0, min(100, deduction_points))
-        # 计算最终分数
-        score = max(0, min(100, 100 - deduction_points))
-        current_section = None
-        # 更严格的章节检测
-        for line in lines:
-            # 检测章节标题
-            if re.match(r'^\[?整体总结\]?', line, re.IGNORECASE):
-                current_section = "summary"
-                suggestions.append(line)
-                continue
-            elif re.match(r'^\[?优点\]?', line, re.IGNORECASE):
-                current_section = "pros"
-                suggestions.append(line)
-                continue
-            elif re.match(r'^\[?缺点\]?', line, re.IGNORECASE):
-                current_section = "cons"
-                suggestions.append(line)
-                continue
-            elif re.match(r'^\[?扣分项\]?', line, re.IGNORECASE):
-                current_section = "deduction"
-                continue
-            # 只保留当前章节的内容
-            if current_section in ["summary", "pros", "cons"]:
-                suggestions.append(line)
-        return suggestions, score
-    except Exception as e:
-        print(f"AI分析错误: {str(e)}")
-        return [f"AI分析时发生错误: {str(e)}"], 0
-@app.route('/')
-def index():
-    return render_template('index.html')
-@app.route('/upload', methods=['POST'])
-def upload_file():
-    if 'resume' not in request.files:
-        return jsonify({'error': '请选择文件上传'}), 400
-    file = request.files['resume']
-    if file.filename == '':
-        return jsonify({'error': '未选择文件'}), 400
-    try:
-        # 从form获取job_position
-        job_position = request.form.get('job_position')
-        if not job_position:
-            return jsonify({'error': '请选择目标岗位'}), 400
-        filename = safe_filename(file.filename)
-        file_stream = BytesIO(file.read())
-        text = extract_text_from_file(file_stream, file.filename)
-        if not text.strip():
-            return jsonify({'error': '文件内容为空或无法解析'}), 400
-        suggestions, score = analyze_resume_with_ai(text, job_position)
-        return jsonify({
-            'message': '分析成功',
-            'suggestions': suggestions,
-            'score': score,
-            'filename': filename,
-            'job_position': job_position
-        })
-    except ValueError as e:
-        return jsonify({'error': str(e)}), 400
-    except Exception as e:
-        print(f"上传处理错误: {str(e)}")
-        return jsonify({'error': f'处理失败: {str(e)}'}), 500
-@app.route('/generate_cover_letter', methods=['POST'])
-def generate_cover_letter():
-    try:
-        data = request.json
-        required_fields = ['company_name', 'position', 'resume_text', 'job_description']
-        # 验证必填字段
-        for field in required_fields:
-            if not data.get(field):
-                return jsonify({'error': f'缺少必填字段: {field}'}), 400
-        # 构建AI提示词 - 优化版
-        prompt = f"""你是一位专业的职业顾问，需要根据申请人提供的简历内容撰写求职信。请严格遵守以下规则：
-1. 信息真实性原则：
-- 只能使用简��中明确列出的教育背景、工作经历、项目经验和技能
-- 绝对禁止添加、编造或推断简历中没有的信息
-- 如果某项要求(如特定技能)在简历中未体现，不要在求职信中提及
-2. 内容要求：
-[必须包含的格式要素]
-- 正式商务信函格式(日期、称呼、正文、结尾敬语)
-- 称呼使用"尊敬的招聘经理"（如不知道具体姓名）
-- 结尾要有明确的行动号召(如期待面试机会)
-[内容结构]
-第一段：明确申请职位和动机(30-50字)
-第二段：从简历中提取与职位最相关的2-3个核心优势(80-120字)
-第三段：结合公司文化和职位要求的具体匹配点(80-120字)
-第四段：礼貌结尾和行动号召(30-50字)
-3. 写作规范：
-- 语言简洁专业，总字数严格控制在300-400字
-- 使用主动语态和积极措辞
-- 量化成果时只能使用简历中提供的数据
-- 避免使用夸张或主观的描述词
-4. 特别注意：
-- 如果简历中没有公司要求的关键技能或经验，不要在信中编造
-- 不要假设任何简历中没有的工作职责或成就
-- 不要添加简历中未列出的证书、奖项或培训经历
-[申请人简历内容]
-{data['resume_text'][:10000]}
-[目标公司信息]
-公司名称: {data['company_name']}
-公司介绍: {data.get('company_info', '未提供')}
-[申请职位]
-{data['position']}
-[职位描述及要求]
-{data['job_description']}
-[申请动机]
-{data.get('motivation', '未提供')}
-请现在开始撰写求职信，严格遵循以上所有要求。"""
-        # 调用AI生成推荐信
-        response = client.chat.completions.create(
-            model="deepseek-chat",
-            messages=[
-                {
-                    "role": "system",
-                    "content": """你是一位严谨的职业顾问，专门帮助求职者撰写基于事实的求职信。
-                    你必须：
-                    1. 只使用申请人简历中明确提供的信息
-                    2. 绝不添加、推断或编造任何简历中没有的内容
-                    3. 如果简历缺少职位要求的关键资质，如实呈现而不虚构
-                    4. 所有成就描述必须有简历中的具体数据支持"""
-                },
-                {"role": "user", "content": prompt}
-            ],
-            temperature=0.5,  # 降低创造性，提高准确性
-            max_tokens=2000
-        )
-        content = response.choices[0].message.content
-        # 后处理检查
-        if "简历中未提及" in content or "根据我的了解" in content:
-            raise ValueError("AI尝试添加简历外信息")
-        return jsonify({
-            'success': True,
-            'cover_letter': content,
-            'word_count': len(content.split())
-        })
-    except Exception as e:
-        print(f"推荐信生成错误: {str(e)}")
-        return jsonify({'error': f'生成失败: {str(e)}'}), 500
-if __name__ == '__main__':
-    app.run(debug=True)

+import streamlit as st
+from transformers import pipeline
+# Load the text classification model pipeline
+classifier = pipeline("text-classification",model='isom5240ust/bert-base-uncased-emotion', return_all_scores=True)
+# Streamlit application title
+st.title("Text Classification for you")
+st.write("Classification for 6 emotions: sadness, joy, love, anger, fear, surprise")
+# Text input for user to enter the text to classify
+text = st.text_area("Enter the text to classify", "")
+# Perform text classification when the user clicks the "Classify" button
+if st.button("Classify"):
+    # Perform text classification on the input text
+    results = classifier(text)[0]
+    # Display the classification result
+    max_score = float('-inf')
+    max_label = ''
+    for result in results:
+        if result['score'] > max_score:
+            max_score = result['score']
+            max_label = result['label']
+    st.write("Text:", text)
+    st.write("Label:", max_label)
+    st.write("Score:", max_score)