Spaces:
Sleeping
Sleeping
import streamlit as st | |
import pandas as pd | |
import numpy as np | |
import re | |
import pickle | |
import nltk | |
from nltk.corpus import stopwords | |
from tensorflow.keras.models import load_model | |
from tensorflow.keras.preprocessing.text import Tokenizer | |
from tensorflow.keras.preprocessing.sequence import pad_sequences | |
nltk.download('stopwords') | |
stop_words = set(stopwords.words('english')) | |
def cleanResume(resumeText): | |
resumeText = re.sub(r'http\S+\s*', ' ', resumeText) | |
resumeText = re.sub(r'RT|cc', ' ', resumeText) | |
resumeText = re.sub(r'#\S+', '', resumeText) | |
resumeText = re.sub(r'@\S+', ' ', resumeText) | |
resumeText = re.sub(r'[%s]' % re.escape("!\"#$%&'()*+,-./:;<=>?@[\\]^_`{|}~"), ' ', resumeText) | |
resumeText = re.sub(r'[^\x00-\x7f]', r' ', resumeText) | |
resumeText = re.sub(r'\s+', ' ', resumeText) | |
resumeText = ' '.join([word for word in resumeText.split() if word.lower() not in stop_words]) | |
return resumeText | |
def load_resources(): | |
with open('tokenizer.pkl', 'rb') as f: | |
tokenizer = pickle.load(f) | |
with open('label_encoder.pkl', 'rb') as f: | |
label_encoder = pickle.load(f) | |
model = load_model('deeprank_model_v2.h5') | |
return tokenizer, label_encoder, model | |
def infer(text, tokenizer, label_encoder, model): | |
cleaned_text = cleanResume(text) | |
sequence = tokenizer.texts_to_sequences([cleaned_text]) | |
padded_sequence = pad_sequences(sequence, maxlen=500) | |
prediction = model.predict(padded_sequence) | |
predicted_class = label_encoder.inverse_transform([np.argmax(prediction)]) | |
return predicted_class[0] | |
st.title("Resume Category Predictor") | |
st.write("Upload a resume text file or enter text below to predict the job category.") | |
uploaded_file = st.file_uploader("Upload Resume (TXT file)", type=["txt"]) | |
user_input = st.text_area("Or paste resume text here:") | |
if uploaded_file is not None: | |
resume_text = uploaded_file.read().decode("utf-8") | |
st.session_state["resume_text"] = resume_text | |
elif user_input: | |
resume_text = user_input | |
st.session_state["resume_text"] = resume_text | |
else: | |
resume_text = "" | |
if st.button("Predict Category") and resume_text: | |
tokenizer, label_encoder, model = load_resources() | |
prediction = infer(resume_text, tokenizer, label_encoder, model) | |
st.write(f"Predicted Category: **{prediction}**") | |