File size: 1,633 Bytes
5bea701 5a5c182 c40c6c3 88d066d 040362f 5bea701 9ac410d 88d066d 9ac410d 23fd868 aa023ef 23fd868 aa023ef 319dddf 6e78c7b 23fd868 6e78c7b 7a0c57e d6ea9fc 23fd868 6e78c7b 23fd868 6e78c7b 1d303e7 23fd868 1d303e7 79bc629 88d066d c40c6c3 |
1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20 21 22 23 24 25 26 27 28 29 30 31 32 33 34 35 36 37 38 39 40 41 42 43 44 45 46 47 48 49 50 51 52 53 54 55 56 |
import streamlit as st
from PyPDF2 import PdfReader
import pandas as pd
from sklearn.feature_extraction.text import TfidfVectorizer
from sklearn.metrics.pairwise import cosine_similarity
import streamlit as st
from PyPDF2 import PdfReader
import pandas as pd
from sklearn.feature_extraction.text import TfidfVectorizer
from sklearn.metrics.pairwise import cosine_similarity
from gliner import GLiNER
import streamlit as st
import pandas as pd
from PyPDF2 import PdfReader
from gliner import GLiNER
uploaded_files = st.file_uploader(
"Choose a PDF file(s) and job description as pdf", accept_multiple_files=True, type="pdf"
)
if uploaded_files:
all_data = [] # Store dictionaries of text and entities for each PDF
for i, uploaded_file in enumerate(uploaded_files):
try:
pdf_reader = PdfReader(uploaded_file)
text_data = ""
for page in pdf_reader.pages:
text_data += page.extract_text()
model = GLiNER.from_pretrained("urchade/gliner_base")
labels = ["person", "country", "organization", "time", "role"]
entities = model.predict_entities(text_data, labels)
entity_dict = {}
for label in labels:
entity_dict[label] = [entity["text"] for entity in entities if entity["label"] == label]
data = {"Text": text_data, **entity_dict}
all_data.append(data)
except Exception as e:
st.error(f"Error processing file {uploaded_file.name}: {e}")
if all_data:
df = pd.DataFrame(all_data)
st.dataframe(df)
|