Spaces:
Sleeping
Sleeping
import json | |
import streamlit as st | |
from google.oauth2 import service_account | |
from google.cloud import language_v1 | |
import pandas as pd | |
# Function to generate Google Search link for MID | |
def query_knowledge_graph(entity_id): | |
try: | |
google_search_link = f"https://www.google.com/search?kgmid={entity_id}" | |
st.markdown(f'[Open in Google Search]({google_search_link})', unsafe_allow_html=True) | |
except Exception as e: | |
st.write(f"An error occurred: {e}") | |
# Function to serialize metadata | |
def serialize_entity_metadata(metadata): | |
return {k: str(v) for k, v in metadata.items()} | |
# Count Google Entities (those with /g/ or /m/ mids) | |
def count_google_entities(entities): | |
return sum( | |
1 for entity in entities | |
if 'mid' in entity.metadata and ('/g/' in entity.metadata['mid'] or '/m/' in entity.metadata['mid']) | |
) | |
# Export all entities, regardless of mid | |
def export_entities(entities): | |
entity_list = [] | |
for entity in entities: | |
metadata = serialize_entity_metadata(entity.metadata) if entity.metadata else {} | |
mid = metadata.get('mid', '') | |
entity_info = { | |
"Name": entity.name, | |
"Type": language_v1.Entity.Type(entity.type_).name, | |
"Salience Score": entity.salience, | |
"MID": mid, | |
"Metadata": metadata, | |
"Mentions": [mention.text.content for mention in entity.mentions] | |
} | |
entity_list.append(entity_info) | |
if not entity_list: | |
st.write("No entities found to export.") | |
return | |
df = pd.DataFrame(entity_list) | |
st.download_button(label="Export Entities as CSV", data=df.to_csv(index=False), file_name="entities.csv", mime="text/csv") | |
json_data = json.dumps(entity_list, indent=2) | |
st.download_button(label="Export Entities as JSON", data=json_data, file_name="entities.json", mime="application/json") | |
# Sidebar | |
st.sidebar.title("About This Tool") | |
st.sidebar.markdown("This tool uses Google Cloud Natural Language API to identify entities.") | |
st.sidebar.markdown("### How to Use") | |
st.sidebar.markdown(""" | |
1. **Enter text** in the box below. | |
2. **Click Analyze** to detect entities. | |
3. **Export** results to CSV or JSON. | |
""") | |
# Header | |
st.title("Google Cloud NLP Entity Analyzer") | |
st.write("Analyze text and extract all entities, including those without Google metadata (MID).") | |
# NLP Analysis Logic | |
def analyze_entities(text_content): | |
service_account_info = json.loads(st.secrets["google_nlp"]) | |
credentials = service_account.Credentials.from_service_account_info( | |
service_account_info, scopes=["https://www.googleapis.com/auth/cloud-platform"] | |
) | |
client = language_v1.LanguageServiceClient(credentials=credentials) | |
document = {"content": text_content, "type_": language_v1.Document.Type.PLAIN_TEXT, "language": "en"} | |
encoding_type = language_v1.EncodingType.UTF8 | |
response = client.analyze_entities(request={"document": document, "encoding_type": encoding_type}) | |
entities = response.entities | |
total_entities = len(entities) | |
google_entities = count_google_entities(entities) | |
if google_entities == 0: | |
st.markdown(f"### Found {total_entities} entities β no Google-linked (MID) entities found.") | |
else: | |
st.markdown(f"### Found {total_entities} entities β {google_entities} Google-linked entities with MID.") | |
st.write("---") | |
for i, entity in enumerate(entities): | |
st.write(f"**Entity {i+1} of {total_entities}**") | |
st.write(f"**Name:** {entity.name}") | |
st.write(f"**Type:** {language_v1.Entity.Type(entity.type_).name}") | |
st.write(f"**Salience Score:** {entity.salience:.4f}") | |
if entity.metadata: | |
st.write("**Metadata:**") | |
st.json(entity.metadata) | |
if 'mid' in entity.metadata and ('/g/' in entity.metadata['mid'] or '/m/' in entity.metadata['mid']): | |
query_knowledge_graph(entity.metadata['mid']) | |
else: | |
st.write("_No metadata available_") | |
if entity.mentions: | |
st.write(f"**Mentions ({len(entity.mentions)}):**") | |
st.write([mention.text.content for mention in entity.mentions]) | |
st.write("---") | |
export_entities(entities) | |
# Text Input | |
user_input = st.text_area("Enter text to analyze") | |
if st.button("Analyze"): | |
if user_input.strip(): | |
analyze_entities(user_input) | |
else: | |
st.warning("Please enter some text before clicking Analyze.") | |