Spaces:
Sleeping
Sleeping
# Import necessary libraries | |
import streamlit as st | |
import textrazor | |
import matplotlib.pyplot as plt | |
import pandas as pd | |
import re | |
# Set TextRazor API key using Streamlit Secrets | |
textrazor.api_key = st.secrets["textrazer_api"] | |
# Streamlit app title and input for URL | |
st.title("TextRazor Entities Analysis") | |
url_input = st.text_input("Enter URL to Analyze") | |
# Check if URL is provided | |
if url_input: | |
# Create TextRazor client and analyze URL | |
client = textrazor.TextRazor(extractors=["entities", "topics"]) | |
response = client.analyze_url(url_input) | |
# Create dataframe and lists | |
df = pd.DataFrame(columns=['Entity ID', 'Entity Relevance', 'Entity Confidence', 'Entity Freebase']) | |
new_entityid = [] | |
new_relevance = [] | |
new_confidence = [] | |
new_freebase = [] | |
# Loop entities and append to list | |
for entity in response.entities(): | |
entity_id = re.sub(r'\d+', '', entity.id) | |
new_entityid.append(entity_id) | |
new_relevance.append(entity.relevance_score) | |
new_confidence.append(entity.confidence_score) | |
new_freebase.append(entity.freebase_types) | |
# Assign columns values from lists to dataframe | |
df['Entity ID'] = new_entityid | |
df['Entity Relevance'] = new_relevance | |
df['Entity Confidence'] = new_confidence | |
df['Entity Freebase'] = new_freebase | |
# Save data to CSV | |
df.to_csv('textrazor_v1_entities.csv') | |
# Display dataframe | |
st.write("Top 25 Entities:") | |
st.write(df.head(25)) | |
# Plot the top 10 entities | |
st.bar_chart(df['Entity ID'].value_counts().nlargest(10)) | |
# Add labels and title to the chart | |
plt.xlabel('Entity') | |
plt.ylabel('Frequency') | |
plt.title('Top 10 Entities by frequency') | |
# Show the plot in Streamlit | |
st.pyplot(plt) | |
else: | |
st.warning("Please enter a URL to analyze.") | |