Spaces:
Sleeping
Sleeping
File size: 1,827 Bytes
b5de194 |
1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20 21 22 23 24 25 26 27 28 29 30 31 32 33 34 35 36 37 38 39 40 41 42 43 44 45 46 47 48 49 50 51 52 53 54 55 56 57 58 59 60 61 |
# Import necessary libraries
import streamlit as st
import textrazor
import matplotlib.pyplot as plt
import pandas as pd
import re
# Set TextRazor API key using Streamlit Secrets
textrazor.api_key = st.secrets["textrazer_api"]
# Streamlit app title and input for URL
st.title("TextRazor Entities Analysis")
url_input = st.text_input("Enter URL to Analyze")
# Check if URL is provided
if url_input:
# Create TextRazor client and analyze URL
client = textrazor.TextRazor(extractors=["entities", "topics"])
response = client.analyze_url(url_input)
# Create dataframe and lists
df = pd.DataFrame(columns=['Entity ID', 'Entity Relevance', 'Entity Confidence', 'Entity Freebase'])
new_entityid = []
new_relevance = []
new_confidence = []
new_freebase = []
# Loop entities and append to list
for entity in response.entities():
entity_id = re.sub(r'\d+', '', entity.id)
new_entityid.append(entity_id)
new_relevance.append(entity.relevance_score)
new_confidence.append(entity.confidence_score)
new_freebase.append(entity.freebase_types)
# Assign columns values from lists to dataframe
df['Entity ID'] = new_entityid
df['Entity Relevance'] = new_relevance
df['Entity Confidence'] = new_confidence
df['Entity Freebase'] = new_freebase
# Save data to CSV
df.to_csv('textrazor_v1_entities.csv')
# Display dataframe
st.write("Top 25 Entities:")
st.write(df.head(25))
# Plot the top 10 entities
st.bar_chart(df['Entity ID'].value_counts().nlargest(10))
# Add labels and title to the chart
plt.xlabel('Entity')
plt.ylabel('Frequency')
plt.title('Top 10 Entities by frequency')
# Show the plot in Streamlit
st.pyplot(plt)
else:
st.warning("Please enter a URL to analyze.")
|