File size: 1,633 Bytes
5bea701
 
5a5c182
c40c6c3
88d066d
040362f
5bea701
9ac410d
 
 
 
 
88d066d
 
9ac410d
23fd868
 
 
 
 
aa023ef
23fd868
aa023ef
319dddf
6e78c7b
23fd868
6e78c7b
 
 
 
 
 
 
7a0c57e
d6ea9fc
23fd868
6e78c7b
23fd868
 
 
 
 
 
6e78c7b
 
 
1d303e7
23fd868
 
 
 
1d303e7
79bc629
88d066d
c40c6c3
 
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
import streamlit as st
from PyPDF2 import PdfReader
import pandas as pd
from sklearn.feature_extraction.text import TfidfVectorizer

from sklearn.metrics.pairwise import cosine_similarity

import streamlit as st
from PyPDF2 import PdfReader
import pandas as pd
from sklearn.feature_extraction.text import TfidfVectorizer
from sklearn.metrics.pairwise import cosine_similarity
from gliner import GLiNER


import streamlit as st
import pandas as pd
from PyPDF2 import PdfReader
from gliner import GLiNER

uploaded_files = st.file_uploader(
    "Choose a PDF file(s) and job description as pdf", accept_multiple_files=True, type="pdf"
)

if uploaded_files:
    all_data = []  # Store dictionaries of text and entities for each PDF
    for i, uploaded_file in enumerate(uploaded_files):
        try:
            pdf_reader = PdfReader(uploaded_file)
            text_data = ""
            for page in pdf_reader.pages:
                text_data += page.extract_text()

            model = GLiNER.from_pretrained("urchade/gliner_base")
            labels = ["person", "country", "organization", "time", "role"]
            entities = model.predict_entities(text_data, labels)

            entity_dict = {}
            for label in labels:
                entity_dict[label] = [entity["text"] for entity in entities if entity["label"] == label]

            data = {"Text": text_data, **entity_dict}
            all_data.append(data)

        except Exception as e:
            st.error(f"Error processing file {uploaded_file.name}: {e}")

    if all_data:
        df = pd.DataFrame(all_data)
        st.dataframe(df)