Spaces:

shrut27
/

ESG_Report_Analysis

Build error

App Files Files Community

shrut27 commited on Mar 6, 2024

Commit

2083211

verified ·

1 Parent(s): c4426e9

Update app.py

Browse files

Files changed (1) hide show

app.py +8 -9

app.py CHANGED Viewed

@@ -8,28 +8,28 @@ import pandas as pd
 # Loading spaCy model outside the streamlit cache
 nlp = spacy.load("en_core_web_sm")
-@st.cache(allow_output_mutation=True)
 def load_environmental_model():
     name_env = "ESGBERT/EnvironmentalBERT-environmental"
     tokenizer_env = AutoTokenizer.from_pretrained(name_env)
     model_env = AutoModelForSequenceClassification.from_pretrained(name_env)
     return pipeline("text-classification", model=model_env, tokenizer=tokenizer_env)
-@st.cache(allow_output_mutation=True)
 def load_social_model():
     name_soc = "ESGBERT/SocialBERT-social"
     tokenizer_soc = AutoTokenizer.from_pretrained(name_soc)
     model_soc = AutoModelForSequenceClassification.from_pretrained(name_soc)
     return pipeline("text-classification", model=model_soc, tokenizer=tokenizer_soc)
-@st.cache(allow_output_mutation=True)
 def load_governance_model():
     name_gov = "ESGBERT/GovernanceBERT-governance"
     tokenizer_gov = AutoTokenizer.from_pretrained(name_gov)
     model_gov = AutoModelForSequenceClassification.from_pretrained(name_gov)
     return pipeline("text-classification", model=model_gov, tokenizer=tokenizer_gov)
-@st.cache(allow_output_mutation=True)
 def load_sentiment_model():
     model_name = "climatebert/distilroberta-base-climate-sentiment"
     model = AutoModelForSequenceClassification.from_pretrained(model_name)
@@ -37,12 +37,14 @@ def load_sentiment_model():
     return pipeline("text-classification", model=model, tokenizer=tokenizer)
 # Streamlit App
-st.title("ESGBERT Text Classification App")
 # Get report URL from user input
 url = st.text_input("Enter the URL of the report (PDF):")
 # Model selection dropdown
 selected_model = st.selectbox("Select Model", ["Environmental Model", "Social Model", "Governance Model", "Sentiment Model"])
 if url:
@@ -52,18 +54,15 @@ if url:
     if response.status_code == 200:
         # Parse PDF and extract text
         raw_text = parser.from_buffer(response.content)['content']
         # Extract sentences using spaCy
         doc = nlp(raw_text)
         sentences = [sent.text for sent in doc.sents]
         # Filtering and preprocessing sentences
         sequences = list(map(str, sentences))
         sentences = [x.replace("\n", "") for x in sequences]
         sentences = [x for x in sentences if x != ""]
         sentences = [x for x in sentences if x[0].isupper()]
-        sub_sentences = sentences[:100]  # Takes around 20 seconds
         # Classification using different models based on user selection
         if selected_model == "Environmental Model":
             pipe_model = load_environmental_model()

 # Loading spaCy model outside the streamlit cache
 nlp = spacy.load("en_core_web_sm")
+@st.cache_resource()
 def load_environmental_model():
     name_env = "ESGBERT/EnvironmentalBERT-environmental"
     tokenizer_env = AutoTokenizer.from_pretrained(name_env)
     model_env = AutoModelForSequenceClassification.from_pretrained(name_env)
     return pipeline("text-classification", model=model_env, tokenizer=tokenizer_env)
+@st.cache_resource()
 def load_social_model():
     name_soc = "ESGBERT/SocialBERT-social"
     tokenizer_soc = AutoTokenizer.from_pretrained(name_soc)
     model_soc = AutoModelForSequenceClassification.from_pretrained(name_soc)
     return pipeline("text-classification", model=model_soc, tokenizer=tokenizer_soc)
+@st.cache_resource()
 def load_governance_model():
     name_gov = "ESGBERT/GovernanceBERT-governance"
     tokenizer_gov = AutoTokenizer.from_pretrained(name_gov)
     model_gov = AutoModelForSequenceClassification.from_pretrained(name_gov)
     return pipeline("text-classification", model=model_gov, tokenizer=tokenizer_gov)
+@st.cache_resource()
 def load_sentiment_model():
     model_name = "climatebert/distilroberta-base-climate-sentiment"
     model = AutoModelForSequenceClassification.from_pretrained(model_name)
     return pipeline("text-classification", model=model, tokenizer=tokenizer)
 # Streamlit App
+st.title("ESG Report Classification using Natural Language Processing")
 # Get report URL from user input
 url = st.text_input("Enter the URL of the report (PDF):")
 # Model selection dropdown
+st.write("Environmental Model, Social Model, Governance Model would give the percentage denoting the parameter chosen.")
+st.write("Sentiment Model shows if the company is a risk or opportunity based on all 3 parameters.")
 selected_model = st.selectbox("Select Model", ["Environmental Model", "Social Model", "Governance Model", "Sentiment Model"])
 if url:
     if response.status_code == 200:
         # Parse PDF and extract text
         raw_text = parser.from_buffer(response.content)['content']
         # Extract sentences using spaCy
         doc = nlp(raw_text)
         sentences = [sent.text for sent in doc.sents]
         # Filtering and preprocessing sentences
         sequences = list(map(str, sentences))
         sentences = [x.replace("\n", "") for x in sequences]
         sentences = [x for x in sentences if x != ""]
         sentences = [x for x in sentences if x[0].isupper()]
+        sub_sentences = sentences[:100]
         # Classification using different models based on user selection
         if selected_model == "Environmental Model":
             pipe_model = load_environmental_model()