import streamlit as st import pandas as pd import numpy as np import re import pickle import nltk from nltk.corpus import stopwords from tensorflow.keras.models import load_model from tensorflow.keras.preprocessing.text import Tokenizer from tensorflow.keras.preprocessing.sequence import pad_sequences nltk.download('stopwords') stop_words = set(stopwords.words('english')) def cleanResume(resumeText): resumeText = re.sub(r'http\S+\s*', ' ', resumeText) resumeText = re.sub(r'RT|cc', ' ', resumeText) resumeText = re.sub(r'#\S+', '', resumeText) resumeText = re.sub(r'@\S+', ' ', resumeText) resumeText = re.sub(r'[%s]' % re.escape("!\"#$%&'()*+,-./:;<=>?@[\\]^_`{|}~"), ' ', resumeText) resumeText = re.sub(r'[^\x00-\x7f]', r' ', resumeText) resumeText = re.sub(r'\s+', ' ', resumeText) resumeText = ' '.join([word for word in resumeText.split() if word.lower() not in stop_words]) return resumeText def load_resources(): with open('tokenizer.pkl', 'rb') as f: tokenizer = pickle.load(f) with open('label_encoder.pkl', 'rb') as f: label_encoder = pickle.load(f) model = load_model('deeprank_model_v2.h5') return tokenizer, label_encoder, model def infer(text, tokenizer, label_encoder, model): cleaned_text = cleanResume(text) sequence = tokenizer.texts_to_sequences([cleaned_text]) padded_sequence = pad_sequences(sequence, maxlen=500) prediction = model.predict(padded_sequence) predicted_class = label_encoder.inverse_transform([np.argmax(prediction)]) return predicted_class[0] st.title("Resume Category Predictor") st.write("Upload a resume text file or enter text below to predict the job category.") uploaded_file = st.file_uploader("Upload Resume (TXT file)", type=["txt"]) user_input = st.text_area("Or paste resume text here:") if uploaded_file is not None: resume_text = uploaded_file.read().decode("utf-8") st.session_state["resume_text"] = resume_text elif user_input: resume_text = user_input st.session_state["resume_text"] = resume_text else: resume_text = "" if st.button("Predict Category") and resume_text: tokenizer, label_encoder, model = load_resources() prediction = infer(resume_text, tokenizer, label_encoder, model) st.write(f"Predicted Category: **{prediction}**")