Spaces:
Sleeping
Sleeping
File size: 2,328 Bytes
67d85c4 312d081 67d85c4 312d081 67d85c4 312d081 67d85c4 312d081 |
1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20 21 22 23 24 25 26 27 28 29 30 31 32 33 34 35 36 37 38 39 40 41 42 43 44 45 46 47 48 49 50 51 52 53 54 55 56 57 58 59 60 61 62 |
import streamlit as st
import pandas as pd
import numpy as np
import re
import pickle
import nltk
from nltk.corpus import stopwords
from tensorflow.keras.models import load_model
from tensorflow.keras.preprocessing.text import Tokenizer
from tensorflow.keras.preprocessing.sequence import pad_sequences
nltk.download('stopwords')
stop_words = set(stopwords.words('english'))
def cleanResume(resumeText):
resumeText = re.sub(r'http\S+\s*', ' ', resumeText)
resumeText = re.sub(r'RT|cc', ' ', resumeText)
resumeText = re.sub(r'#\S+', '', resumeText)
resumeText = re.sub(r'@\S+', ' ', resumeText)
resumeText = re.sub(r'[%s]' % re.escape("!\"#$%&'()*+,-./:;<=>?@[\\]^_`{|}~"), ' ', resumeText)
resumeText = re.sub(r'[^\x00-\x7f]', r' ', resumeText)
resumeText = re.sub(r'\s+', ' ', resumeText)
resumeText = ' '.join([word for word in resumeText.split() if word.lower() not in stop_words])
return resumeText
def load_resources():
with open('tokenizer.pkl', 'rb') as f:
tokenizer = pickle.load(f)
with open('label_encoder.pkl', 'rb') as f:
label_encoder = pickle.load(f)
model = load_model('deeprank_model_v2.h5')
return tokenizer, label_encoder, model
def infer(text, tokenizer, label_encoder, model):
cleaned_text = cleanResume(text)
sequence = tokenizer.texts_to_sequences([cleaned_text])
padded_sequence = pad_sequences(sequence, maxlen=500)
prediction = model.predict(padded_sequence)
predicted_class = label_encoder.inverse_transform([np.argmax(prediction)])
return predicted_class[0]
st.title("Resume Category Predictor")
st.write("Upload a resume text file or enter text below to predict the job category.")
uploaded_file = st.file_uploader("Upload Resume (TXT file)", type=["txt"])
user_input = st.text_area("Or paste resume text here:")
if uploaded_file is not None:
resume_text = uploaded_file.read().decode("utf-8")
st.session_state["resume_text"] = resume_text
elif user_input:
resume_text = user_input
st.session_state["resume_text"] = resume_text
else:
resume_text = ""
if st.button("Predict Category") and resume_text:
tokenizer, label_encoder, model = load_resources()
prediction = infer(resume_text, tokenizer, label_encoder, model)
st.write(f"Predicted Category: **{prediction}**")
|