File size: 11,137 Bytes
ca4d658 5b0d3b2 ca4d658 5b0d3b2 ca4d658 d5d75ac ca4d658 d5d75ac ca4d658 d5d75ac ca4d658 d5d75ac ca4d658 81d2076 ca4d658 81d2076 ca4d658 |
1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20 21 22 23 24 25 26 27 28 29 30 31 32 33 34 35 36 37 38 39 40 41 42 43 44 45 46 47 48 49 50 51 52 53 54 55 56 57 58 59 60 61 62 63 64 65 66 67 68 69 70 71 72 73 74 75 76 77 78 79 80 81 82 83 84 85 86 87 88 89 90 91 92 93 94 95 96 97 98 99 100 101 102 103 104 105 106 107 108 109 110 111 112 113 114 115 116 117 118 119 120 121 122 123 124 125 126 127 128 129 130 131 132 133 134 135 136 137 138 139 140 141 142 143 144 145 146 147 148 149 150 151 152 153 154 155 156 157 158 159 160 161 162 163 164 165 166 167 168 169 170 171 172 173 174 175 176 177 178 179 180 181 182 183 184 185 186 187 188 189 190 191 192 193 194 195 196 197 198 199 200 201 202 203 204 205 206 207 208 209 210 211 212 213 214 215 216 217 218 219 220 221 222 223 224 225 226 227 228 229 230 231 232 233 234 235 236 237 238 239 240 241 242 243 244 245 246 247 248 249 250 251 |
import gradio as gr
import json
import re
from google.cloud import language_v1
from google.oauth2 import service_account
import os
import pandas as pd
# Initialize the Google Cloud Natural Language client
def init_client():
"""Initialize the Google Cloud Natural Language client"""
try:
# Option 1: Using service account key file
# Uncomment and modify the path to your service account key
# credentials = service_account.Credentials.from_service_account_file(
# "path/to/your/service-account-key.json"
# )
# client = language_v1.LanguageServiceClient(credentials=credentials)
# Option 2: Using environment variable (recommended for Hugging Face Spaces)
# Set GOOGLE_APPLICATION_CREDENTIALS_JSON as a Hugging Face Space secret
if 'GOOGLE_APPLICATION_CREDENTIALS_JSON' in os.environ:
credentials_info = json.loads(os.environ['GOOGLE_APPLICATION_CREDENTIALS_JSON'])
credentials = service_account.Credentials.from_service_account_info(credentials_info)
client = language_v1.LanguageServiceClient(credentials=credentials)
else:
# Fallback to default credentials
client = language_v1.LanguageServiceClient()
return client
except Exception as e:
return None, str(e)
def classify_text(client, text_content):
"""Classify text using Google Cloud Natural Language API with V2 model"""
try:
document = language_v1.Document(
content=text_content,
type_=language_v1.Document.Type.PLAIN_TEXT
)
# Configure V2 model options for better performance
classification_model_options = language_v1.ClassificationModelOptions(
v2_model=language_v1.ClassificationModelOptions.V2Model()
)
response = client.classify_text(
request={
'document': document,
'classification_model_options': classification_model_options
}
)
categories = response.categories
results = []
for category in categories:
results.append({
'category': category.name,
'confidence': round(category.confidence, 4)
})
return results
except Exception as e:
return [{'error': str(e)}]
def extract_words(text):
"""Extract words from text, preserving punctuation context"""
# Split text into words while keeping track of positions
words = re.findall(r'\b\w+\b', text.lower())
return list(set(words)) # Remove duplicates
def process_paragraph(paragraph):
"""Process the paragraph for both word-level and paragraph-level classification"""
if not paragraph.strip():
return "Please enter some text to analyze.", "", ""
# Initialize client
client_result = init_client()
if isinstance(client_result, tuple):
client, error = client_result
return f"Error initializing Google Cloud client: {error}", "", ""
else:
client = client_result
# Extract unique words
words = extract_words(paragraph)
# Process individual words
word_results = {}
for word in words:
if len(word) > 1: # Skip single characters
classifications = classify_text(client, word)
if classifications and 'error' not in classifications[0]:
word_results[word] = classifications
# Process entire paragraph
paragraph_classifications = classify_text(client, paragraph)
# Format word-level results
word_analysis = "## Word-Level Classification Results\n\n"
if word_results:
for word, classifications in word_results.items():
word_analysis += f"**{word.upper()}**\n"
for i, result in enumerate(classifications, 1):
word_analysis += f" {i}. Category: `{result['category']}`\n"
word_analysis += f" Confidence: {result['confidence']:.4f}\n"
word_analysis += "\n"
else:
word_analysis += "No classifications found for individual words.\n"
# Format paragraph-level results
paragraph_analysis = "## Paragraph-Level Classification Results\n\n"
if paragraph_classifications and 'error' not in paragraph_classifications[0]:
for i, result in enumerate(paragraph_classifications, 1):
paragraph_analysis += f"**{i}. Category:** `{result['category']}`\n"
paragraph_analysis += f" **Confidence:** {result['confidence']:.4f}\n\n"
else:
if paragraph_classifications and 'error' in paragraph_classifications[0]:
paragraph_analysis += f"Error: {paragraph_classifications[0]['error']}\n"
else:
paragraph_analysis += "No classifications found for the paragraph.\n"
# Create annotated text
annotated_text = create_annotated_text(paragraph, word_results)
return word_analysis, paragraph_analysis, annotated_text
def create_annotated_text(original_text, word_results):
"""Create annotated version of the text with classification info"""
annotated = "## Annotated Text\n\n"
words = original_text.split()
annotated_words = []
for word in words:
clean_word = re.sub(r'[^\w]', '', word.lower())
if clean_word in word_results:
# Get the top classification for annotation
top_result = word_results[clean_word][0]
category_short = top_result['category'].split('/')[-1] # Get last part of category
confidence = top_result['confidence']
annotated_word = f"**{word}** `[{category_short}: {confidence:.3f}]`"
annotated_words.append(annotated_word)
else:
annotated_words.append(word)
annotated += " ".join(annotated_words)
return annotated
# Create Gradio interface
def create_interface():
with gr.Blocks(title="Google Cloud Natural Language Classifier") as app:
gr.Markdown("""
# Google Cloud Natural Language Text Classifier (V2 Model)
This intelligent text classification tool leverages Google Cloud's powerful Natural Language API V2 model to analyze and categorize your content with high accuracy and confidence scores.
## How to Use This Tool
1. **Enter Your Text**: Type or paste any paragraph, article excerpt, or text content into the input box on the left
2. **Click "Analyze Text"**: Hit the analyze button to process your content
3. **Review Results**: The tool provides three types of analysis:
- **Word-Level Classification**: Each unique word gets categorized individually with confidence scores
- **Annotated Text**: Your original text with inline category labels and confidence scores
- **Paragraph-Level Classification**: The entire text analyzed as a cohesive unit
## What You'll Get
- **Detailed Categories**: Content is classified into specific categories like `/Arts & Entertainment/Movies`, `/Business & Industrial/Finance`, `/Health/Medical`, etc.
- **Confidence Scores**: Each classification includes a confidence score (0-1) indicating how certain the AI is about the categorization
- **Multiple Classifications**: Words and paragraphs can belong to multiple categories simultaneously
- **Comprehensive Analysis**: Both granular (word-level) and holistic (paragraph-level) insights
## Perfect For
- Content creators organizing their material
- Researchers analyzing text themes
- Marketers understanding content categories
- Students exploring text classification
- Anyone curious about how AI categorizes written content
## Powered By
This tool uses Google Cloud's Natural Language API V2 model for superior performance and accuracy. The V2 model supports an expanded set of content categories for more precise classification.
**Source**: [Google Cloud Natural Language API - Content Categories](https://cloud.google.com/natural-language/docs/categories#version_2)
---
**Try the examples below or enter your own text to get started!**
""")
with gr.Row():
with gr.Column(scale=1):
input_text = gr.Textbox(
label="Enter your paragraph",
placeholder="Type or paste your text here...",
lines=8,
max_lines=20
)
analyze_btn = gr.Button("Analyze Text", variant="primary")
with gr.Column(scale=1):
word_results = gr.Markdown(
label="Word-Level Results",
value="Results will appear here after analysis..."
)
with gr.Row():
with gr.Column():
annotated_output = gr.Markdown(
label="Annotated Text",
value="Annotated text with classifications will appear here..."
)
with gr.Row():
with gr.Column():
paragraph_results = gr.Markdown(
label="Paragraph-Level Results",
value="Paragraph classification results will appear here..."
)
# Event handlers
analyze_btn.click(
fn=process_paragraph,
inputs=[input_text],
outputs=[word_results, paragraph_results, annotated_output]
)
# Example inputs - designed to show different confidence score ranges
gr.Examples(
examples=[
# High confidence example - clear, focused topic
["Apple Inc. reported record quarterly earnings today, with iPhone sales driving revenue growth of 15% year-over-year. The company's stock price surged 8% in after-hours trading as investors celebrated the strong financial performance and optimistic guidance for the upcoming fiscal quarter."],
# Low confidence example - mixed/ambiguous content
["The thing about stuff is that it matters sometimes. People often think about various topics and ideas when they consider different aspects of life and situations that may or may not be relevant to their daily experiences and general thoughts."],
# Medium confidence example - specific but niche topic
["Quantum computing researchers at MIT have developed a new algorithm that could potentially solve certain optimization problems exponentially faster than classical computers, though practical applications remain years away from commercial implementation."]
],
inputs=[input_text]
)
return app
# Launch the app
if __name__ == "__main__":
app = create_interface()
app.launch() |