Spaces:
Sleeping
Sleeping
| import streamlit as st | |
| import pandas as pd | |
| import numpy as np | |
| import re | |
| import pickle | |
| import nltk | |
| from nltk.corpus import stopwords | |
| from tensorflow.keras.models import load_model | |
| from tensorflow.keras.preprocessing.text import Tokenizer | |
| from tensorflow.keras.preprocessing.sequence import pad_sequences | |
| nltk.download('stopwords') | |
| stop_words = set(stopwords.words('english')) | |
| def cleanResume(resumeText): | |
| resumeText = re.sub(r'http\S+\s*', ' ', resumeText) | |
| resumeText = re.sub(r'RT|cc', ' ', resumeText) | |
| resumeText = re.sub(r'#\S+', '', resumeText) | |
| resumeText = re.sub(r'@\S+', ' ', resumeText) | |
| resumeText = re.sub(r'[%s]' % re.escape("!\"#$%&'()*+,-./:;<=>?@[\\]^_`{|}~"), ' ', resumeText) | |
| resumeText = re.sub(r'[^\x00-\x7f]', r' ', resumeText) | |
| resumeText = re.sub(r'\s+', ' ', resumeText) | |
| resumeText = ' '.join([word for word in resumeText.split() if word.lower() not in stop_words]) | |
| return resumeText | |
| def load_resources(): | |
| with open('tokenizer.pkl', 'rb') as f: | |
| tokenizer = pickle.load(f) | |
| with open('label_encoder.pkl', 'rb') as f: | |
| label_encoder = pickle.load(f) | |
| model = load_model('deeprank_model_v2.h5') | |
| return tokenizer, label_encoder, model | |
| def infer(text, tokenizer, label_encoder, model): | |
| cleaned_text = cleanResume(text) | |
| sequence = tokenizer.texts_to_sequences([cleaned_text]) | |
| padded_sequence = pad_sequences(sequence, maxlen=500) | |
| prediction = model.predict(padded_sequence) | |
| predicted_class = label_encoder.inverse_transform([np.argmax(prediction)]) | |
| return predicted_class[0] | |
| st.title("Resume Category Predictor") | |
| st.write("Upload a resume text file or enter text below to predict the job category.") | |
| uploaded_file = st.file_uploader("Upload Resume (TXT file)", type=["txt"]) | |
| user_input = st.text_area("Or paste resume text here:") | |
| if uploaded_file is not None: | |
| resume_text = uploaded_file.read().decode("utf-8") | |
| st.session_state["resume_text"] = resume_text | |
| elif user_input: | |
| resume_text = user_input | |
| st.session_state["resume_text"] = resume_text | |
| else: | |
| resume_text = "" | |
| if st.button("Predict Category") and resume_text: | |
| tokenizer, label_encoder, model = load_resources() | |
| prediction = infer(resume_text, tokenizer, label_encoder, model) | |
| st.write(f"Predicted Category: **{prediction}**") | |