DeepRank / app.py
billusanda007's picture
Update app.py
312d081 verified
raw
history blame
2.33 kB
import streamlit as st
import pandas as pd
import numpy as np
import re
import pickle
import nltk
from nltk.corpus import stopwords
from tensorflow.keras.models import load_model
from tensorflow.keras.preprocessing.text import Tokenizer
from tensorflow.keras.preprocessing.sequence import pad_sequences
nltk.download('stopwords')
stop_words = set(stopwords.words('english'))
def cleanResume(resumeText):
resumeText = re.sub(r'http\S+\s*', ' ', resumeText)
resumeText = re.sub(r'RT|cc', ' ', resumeText)
resumeText = re.sub(r'#\S+', '', resumeText)
resumeText = re.sub(r'@\S+', ' ', resumeText)
resumeText = re.sub(r'[%s]' % re.escape("!\"#$%&'()*+,-./:;<=>?@[\\]^_`{|}~"), ' ', resumeText)
resumeText = re.sub(r'[^\x00-\x7f]', r' ', resumeText)
resumeText = re.sub(r'\s+', ' ', resumeText)
resumeText = ' '.join([word for word in resumeText.split() if word.lower() not in stop_words])
return resumeText
def load_resources():
with open('tokenizer.pkl', 'rb') as f:
tokenizer = pickle.load(f)
with open('label_encoder.pkl', 'rb') as f:
label_encoder = pickle.load(f)
model = load_model('deeprank_model_v2.h5')
return tokenizer, label_encoder, model
def infer(text, tokenizer, label_encoder, model):
cleaned_text = cleanResume(text)
sequence = tokenizer.texts_to_sequences([cleaned_text])
padded_sequence = pad_sequences(sequence, maxlen=500)
prediction = model.predict(padded_sequence)
predicted_class = label_encoder.inverse_transform([np.argmax(prediction)])
return predicted_class[0]
st.title("Resume Category Predictor")
st.write("Upload a resume text file or enter text below to predict the job category.")
uploaded_file = st.file_uploader("Upload Resume (TXT file)", type=["txt"])
user_input = st.text_area("Or paste resume text here:")
if uploaded_file is not None:
resume_text = uploaded_file.read().decode("utf-8")
st.session_state["resume_text"] = resume_text
elif user_input:
resume_text = user_input
st.session_state["resume_text"] = resume_text
else:
resume_text = ""
if st.button("Predict Category") and resume_text:
tokenizer, label_encoder, model = load_resources()
prediction = infer(resume_text, tokenizer, label_encoder, model)
st.write(f"Predicted Category: **{prediction}**")