|
import gradio as gr |
|
import json |
|
import re |
|
from google.cloud import language_v1 |
|
from google.oauth2 import service_account |
|
import os |
|
import pandas as pd |
|
|
|
|
|
def init_client(): |
|
"""Initialize the Google Cloud Natural Language client""" |
|
try: |
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
if 'GOOGLE_APPLICATION_CREDENTIALS_JSON' in os.environ: |
|
credentials_info = json.loads(os.environ['GOOGLE_APPLICATION_CREDENTIALS_JSON']) |
|
credentials = service_account.Credentials.from_service_account_info(credentials_info) |
|
client = language_v1.LanguageServiceClient(credentials=credentials) |
|
else: |
|
|
|
client = language_v1.LanguageServiceClient() |
|
|
|
return client |
|
except Exception as e: |
|
return None, str(e) |
|
|
|
def classify_text(client, text_content): |
|
"""Classify text using Google Cloud Natural Language API with V2 model""" |
|
try: |
|
document = language_v1.Document( |
|
content=text_content, |
|
type_=language_v1.Document.Type.PLAIN_TEXT |
|
) |
|
|
|
|
|
classification_model_options = language_v1.ClassificationModelOptions( |
|
v2_model=language_v1.ClassificationModelOptions.V2Model() |
|
) |
|
|
|
response = client.classify_text( |
|
request={ |
|
'document': document, |
|
'classification_model_options': classification_model_options |
|
} |
|
) |
|
categories = response.categories |
|
|
|
results = [] |
|
for category in categories: |
|
results.append({ |
|
'category': category.name, |
|
'confidence': round(category.confidence, 4) |
|
}) |
|
|
|
return results |
|
except Exception as e: |
|
return [{'error': str(e)}] |
|
|
|
def extract_words(text): |
|
"""Extract words from text, preserving punctuation context""" |
|
|
|
words = re.findall(r'\b\w+\b', text.lower()) |
|
return list(set(words)) |
|
|
|
def process_paragraph(paragraph): |
|
"""Process the paragraph for both word-level and paragraph-level classification""" |
|
if not paragraph.strip(): |
|
return "Please enter some text to analyze.", "", "" |
|
|
|
|
|
client_result = init_client() |
|
if isinstance(client_result, tuple): |
|
client, error = client_result |
|
return f"Error initializing Google Cloud client: {error}", "", "" |
|
else: |
|
client = client_result |
|
|
|
|
|
words = extract_words(paragraph) |
|
|
|
|
|
word_results = {} |
|
for word in words: |
|
if len(word) > 1: |
|
classifications = classify_text(client, word) |
|
if classifications and 'error' not in classifications[0]: |
|
word_results[word] = classifications |
|
|
|
|
|
paragraph_classifications = classify_text(client, paragraph) |
|
|
|
|
|
word_analysis = "## Word-Level Classification Results\n\n" |
|
if word_results: |
|
for word, classifications in word_results.items(): |
|
word_analysis += f"**{word.upper()}**\n" |
|
for i, result in enumerate(classifications, 1): |
|
word_analysis += f" {i}. Category: `{result['category']}`\n" |
|
word_analysis += f" Confidence: {result['confidence']:.4f}\n" |
|
word_analysis += "\n" |
|
else: |
|
word_analysis += "No classifications found for individual words.\n" |
|
|
|
|
|
paragraph_analysis = "## Paragraph-Level Classification Results\n\n" |
|
if paragraph_classifications and 'error' not in paragraph_classifications[0]: |
|
for i, result in enumerate(paragraph_classifications, 1): |
|
paragraph_analysis += f"**{i}. Category:** `{result['category']}`\n" |
|
paragraph_analysis += f" **Confidence:** {result['confidence']:.4f}\n\n" |
|
else: |
|
if paragraph_classifications and 'error' in paragraph_classifications[0]: |
|
paragraph_analysis += f"Error: {paragraph_classifications[0]['error']}\n" |
|
else: |
|
paragraph_analysis += "No classifications found for the paragraph.\n" |
|
|
|
|
|
annotated_text = create_annotated_text(paragraph, word_results) |
|
|
|
return word_analysis, paragraph_analysis, annotated_text |
|
|
|
def create_annotated_text(original_text, word_results): |
|
"""Create annotated version of the text with classification info""" |
|
annotated = "## Annotated Text\n\n" |
|
|
|
words = original_text.split() |
|
annotated_words = [] |
|
|
|
for word in words: |
|
clean_word = re.sub(r'[^\w]', '', word.lower()) |
|
if clean_word in word_results: |
|
|
|
top_result = word_results[clean_word][0] |
|
category_short = top_result['category'].split('/')[-1] |
|
confidence = top_result['confidence'] |
|
annotated_word = f"**{word}** `[{category_short}: {confidence:.3f}]`" |
|
annotated_words.append(annotated_word) |
|
else: |
|
annotated_words.append(word) |
|
|
|
annotated += " ".join(annotated_words) |
|
return annotated |
|
|
|
|
|
def create_interface(): |
|
with gr.Blocks(title="Google Cloud Natural Language Classifier") as app: |
|
gr.Markdown(""" |
|
# Google Cloud Natural Language Text Classifier (V2 Model) |
|
|
|
This intelligent text classification tool leverages Google Cloud's powerful Natural Language API V2 model to analyze and categorize your content with high accuracy and confidence scores. |
|
|
|
## How to Use This Tool |
|
|
|
1. **Enter Your Text**: Type or paste any paragraph, article excerpt, or text content into the input box on the left |
|
2. **Click "Analyze Text"**: Hit the analyze button to process your content |
|
3. **Review Results**: The tool provides three types of analysis: |
|
- **Word-Level Classification**: Each unique word gets categorized individually with confidence scores |
|
- **Annotated Text**: Your original text with inline category labels and confidence scores |
|
- **Paragraph-Level Classification**: The entire text analyzed as a cohesive unit |
|
|
|
## What You'll Get |
|
|
|
- **Detailed Categories**: Content is classified into specific categories like `/Arts & Entertainment/Movies`, `/Business & Industrial/Finance`, `/Health/Medical`, etc. |
|
- **Confidence Scores**: Each classification includes a confidence score (0-1) indicating how certain the AI is about the categorization |
|
- **Multiple Classifications**: Words and paragraphs can belong to multiple categories simultaneously |
|
- **Comprehensive Analysis**: Both granular (word-level) and holistic (paragraph-level) insights |
|
|
|
## Perfect For |
|
|
|
- Content creators organizing their material |
|
- Researchers analyzing text themes |
|
- Marketers understanding content categories |
|
- Students exploring text classification |
|
- Anyone curious about how AI categorizes written content |
|
|
|
## Powered By |
|
|
|
This tool uses Google Cloud's Natural Language API V2 model for superior performance and accuracy. The V2 model supports an expanded set of content categories for more precise classification. |
|
|
|
**Source**: [Google Cloud Natural Language API - Content Categories](https://cloud.google.com/natural-language/docs/categories#version_2) |
|
|
|
--- |
|
|
|
**Try the examples below or enter your own text to get started!** |
|
""") |
|
|
|
with gr.Row(): |
|
with gr.Column(scale=1): |
|
input_text = gr.Textbox( |
|
label="Enter your paragraph", |
|
placeholder="Type or paste your text here...", |
|
lines=8, |
|
max_lines=20 |
|
) |
|
|
|
analyze_btn = gr.Button("Analyze Text", variant="primary") |
|
|
|
with gr.Column(scale=1): |
|
word_results = gr.Markdown( |
|
label="Word-Level Results", |
|
value="Results will appear here after analysis..." |
|
) |
|
|
|
with gr.Row(): |
|
with gr.Column(): |
|
annotated_output = gr.Markdown( |
|
label="Annotated Text", |
|
value="Annotated text with classifications will appear here..." |
|
) |
|
|
|
with gr.Row(): |
|
with gr.Column(): |
|
paragraph_results = gr.Markdown( |
|
label="Paragraph-Level Results", |
|
value="Paragraph classification results will appear here..." |
|
) |
|
|
|
|
|
analyze_btn.click( |
|
fn=process_paragraph, |
|
inputs=[input_text], |
|
outputs=[word_results, paragraph_results, annotated_output] |
|
) |
|
|
|
|
|
gr.Examples( |
|
examples=[ |
|
|
|
["Apple Inc. reported record quarterly earnings today, with iPhone sales driving revenue growth of 15% year-over-year. The company's stock price surged 8% in after-hours trading as investors celebrated the strong financial performance and optimistic guidance for the upcoming fiscal quarter."], |
|
|
|
|
|
["The thing about stuff is that it matters sometimes. People often think about various topics and ideas when they consider different aspects of life and situations that may or may not be relevant to their daily experiences and general thoughts."], |
|
|
|
|
|
["Quantum computing researchers at MIT have developed a new algorithm that could potentially solve certain optimization problems exponentially faster than classical computers, though practical applications remain years away from commercial implementation."] |
|
], |
|
inputs=[input_text] |
|
) |
|
|
|
return app |
|
|
|
|
|
if __name__ == "__main__": |
|
app = create_interface() |
|
app.launch() |