Spaces:

entropy25
/

multilingual-sentiment-analyzer

Sleeping

App Files Files Community

entropy25 commited on 23 days ago

Commit

d7cd2ec

verified ·

1 Parent(s): 86af8b6

Update app.py

Browse files

Files changed (1) hide show

app.py +26 -13

app.py CHANGED Viewed

@@ -96,9 +96,16 @@ class ModelManager:
     @staticmethod
     def detect_language(text: str) -> str:
-        """Detect text language"""
         try:
             detected = langdetect.detect(text)
             return detected if detected in config.SUPPORTED_LANGUAGES else 'en'
         except:
             return 'en'
@@ -162,10 +169,18 @@ class TextProcessor:
     @staticmethod
     def extract_keywords(text: str, top_k: int = 5) -> List[str]:
         """Extract key words from text"""
-        cleaned = TextProcessor.clean_text(text)
-        words = cleaned.split()
-        word_freq = Counter(words)
-        return [word for word, _ in word_freq.most_common(top_k)]
 class SentimentAnalyzer:
     """Enhanced sentiment analysis"""
@@ -185,10 +200,10 @@ class SentimentAnalyzer:
         # Get appropriate model
         model, tokenizer = model_manager.get_model(detected_lang)
-        # Preprocessing options
         options = preprocessing_options or {}
         processed_text = text
-        if options.get('clean_text', False):
             processed_text = TextProcessor.clean_text(
                 text,
                 options.get('remove_punctuation', True),
@@ -388,7 +403,7 @@ def analyze_single_text(text: str, language: str, theme: str, clean_text: bool,
     """Enhanced single text analysis"""
     try:
         if not text.strip():
-            return "Please enter text", None, None, "No analysis performed"
         # Map display names back to language codes
         language_map = {
@@ -437,11 +452,11 @@ def analyze_single_text(text: str, language: str, theme: str, clean_text: bool,
 - **Stats:** {result['word_count']} words, {result['char_count']} characters
         """
-        return info_text, gauge_fig, bars_fig, "Analysis completed successfully"
     except Exception as e:
         logger.error(f"Analysis failed: {e}")
-        return f"Error: {str(e)}", None, None, "Analysis failed"
 def get_history_stats():
     """Get history statistics"""
@@ -555,8 +570,6 @@ with gr.Blocks(theme=gr.themes.Soft(), title="Advanced Sentiment Analyzer") as d
         with gr.Row():
             gauge_plot = gr.Plot(label="Sentiment Gauge")
             bars_plot = gr.Plot(label="Probability Distribution")
-        status_output = gr.Textbox(label="Status", interactive=False)
     with gr.Tab("📊 History & Analytics"):
         with gr.Row():
@@ -576,7 +589,7 @@ with gr.Blocks(theme=gr.themes.Soft(), title="Advanced Sentiment Analyzer") as d
     analyze_btn.click(
         analyze_single_text,
         inputs=[text_input, language_select, theme_select, clean_text, remove_punct, remove_nums],
-        outputs=[result_info, gauge_plot, bars_plot, status_output]
     )
     stats_btn.click(

     @staticmethod
     def detect_language(text: str) -> str:
+        """Detect text language properly"""
         try:
+            # Use langdetect for all languages
             detected = langdetect.detect(text)
+            # Map some common langdetect codes to our supported languages
+            language_mapping = {
+                'zh-cn': 'zh',
+                'zh-tw': 'zh'
+            }
+            detected = language_mapping.get(detected, detected)
             return detected if detected in config.SUPPORTED_LANGUAGES else 'en'
         except:
             return 'en'
     @staticmethod
     def extract_keywords(text: str, top_k: int = 5) -> List[str]:
         """Extract key words from text"""
+        # For Chinese text, extract characters
+        if re.search(r'[\u4e00-\u9fff]', text):
+            words = re.findall(r'[\u4e00-\u9fff]+', text)
+            all_chars = ''.join(words)
+            char_freq = Counter(all_chars)
+            return [char for char, _ in char_freq.most_common(top_k)]
+        else:
+            # For other languages, use word-based extraction
+            cleaned = TextProcessor.clean_text(text)
+            words = cleaned.split()
+            word_freq = Counter(words)
+            return [word for word, _ in word_freq.most_common(top_k)]
 class SentimentAnalyzer:
     """Enhanced sentiment analysis"""
         # Get appropriate model
         model, tokenizer = model_manager.get_model(detected_lang)
+        # Preprocessing options - don't clean Chinese text
         options = preprocessing_options or {}
         processed_text = text
+        if options.get('clean_text', False) and not re.search(r'[\u4e00-\u9fff]', text):
             processed_text = TextProcessor.clean_text(
                 text,
                 options.get('remove_punctuation', True),
     """Enhanced single text analysis"""
     try:
         if not text.strip():
+            return "Please enter text", None, None
         # Map display names back to language codes
         language_map = {
 - **Stats:** {result['word_count']} words, {result['char_count']} characters
         """
+        return info_text, gauge_fig, bars_fig
     except Exception as e:
         logger.error(f"Analysis failed: {e}")
+        return f"Error: {str(e)}", None, None
 def get_history_stats():
     """Get history statistics"""
         with gr.Row():
             gauge_plot = gr.Plot(label="Sentiment Gauge")
             bars_plot = gr.Plot(label="Probability Distribution")
     with gr.Tab("📊 History & Analytics"):
         with gr.Row():
     analyze_btn.click(
         analyze_single_text,
         inputs=[text_input, language_select, theme_select, clean_text, remove_punct, remove_nums],
+        outputs=[result_info, gauge_plot, bars_plot]
     )
     stats_btn.click(