entropy25 commited on
Commit
d7cd2ec
·
verified ·
1 Parent(s): 86af8b6

Update app.py

Browse files
Files changed (1) hide show
  1. app.py +26 -13
app.py CHANGED
@@ -96,9 +96,16 @@ class ModelManager:
96
 
97
  @staticmethod
98
  def detect_language(text: str) -> str:
99
- """Detect text language"""
100
  try:
 
101
  detected = langdetect.detect(text)
 
 
 
 
 
 
102
  return detected if detected in config.SUPPORTED_LANGUAGES else 'en'
103
  except:
104
  return 'en'
@@ -162,10 +169,18 @@ class TextProcessor:
162
  @staticmethod
163
  def extract_keywords(text: str, top_k: int = 5) -> List[str]:
164
  """Extract key words from text"""
165
- cleaned = TextProcessor.clean_text(text)
166
- words = cleaned.split()
167
- word_freq = Counter(words)
168
- return [word for word, _ in word_freq.most_common(top_k)]
 
 
 
 
 
 
 
 
169
 
170
  class SentimentAnalyzer:
171
  """Enhanced sentiment analysis"""
@@ -185,10 +200,10 @@ class SentimentAnalyzer:
185
  # Get appropriate model
186
  model, tokenizer = model_manager.get_model(detected_lang)
187
 
188
- # Preprocessing options
189
  options = preprocessing_options or {}
190
  processed_text = text
191
- if options.get('clean_text', False):
192
  processed_text = TextProcessor.clean_text(
193
  text,
194
  options.get('remove_punctuation', True),
@@ -388,7 +403,7 @@ def analyze_single_text(text: str, language: str, theme: str, clean_text: bool,
388
  """Enhanced single text analysis"""
389
  try:
390
  if not text.strip():
391
- return "Please enter text", None, None, "No analysis performed"
392
 
393
  # Map display names back to language codes
394
  language_map = {
@@ -437,11 +452,11 @@ def analyze_single_text(text: str, language: str, theme: str, clean_text: bool,
437
  - **Stats:** {result['word_count']} words, {result['char_count']} characters
438
  """
439
 
440
- return info_text, gauge_fig, bars_fig, "Analysis completed successfully"
441
 
442
  except Exception as e:
443
  logger.error(f"Analysis failed: {e}")
444
- return f"Error: {str(e)}", None, None, "Analysis failed"
445
 
446
  def get_history_stats():
447
  """Get history statistics"""
@@ -555,8 +570,6 @@ with gr.Blocks(theme=gr.themes.Soft(), title="Advanced Sentiment Analyzer") as d
555
  with gr.Row():
556
  gauge_plot = gr.Plot(label="Sentiment Gauge")
557
  bars_plot = gr.Plot(label="Probability Distribution")
558
-
559
- status_output = gr.Textbox(label="Status", interactive=False)
560
 
561
  with gr.Tab("📊 History & Analytics"):
562
  with gr.Row():
@@ -576,7 +589,7 @@ with gr.Blocks(theme=gr.themes.Soft(), title="Advanced Sentiment Analyzer") as d
576
  analyze_btn.click(
577
  analyze_single_text,
578
  inputs=[text_input, language_select, theme_select, clean_text, remove_punct, remove_nums],
579
- outputs=[result_info, gauge_plot, bars_plot, status_output]
580
  )
581
 
582
  stats_btn.click(
 
96
 
97
  @staticmethod
98
  def detect_language(text: str) -> str:
99
+ """Detect text language properly"""
100
  try:
101
+ # Use langdetect for all languages
102
  detected = langdetect.detect(text)
103
+ # Map some common langdetect codes to our supported languages
104
+ language_mapping = {
105
+ 'zh-cn': 'zh',
106
+ 'zh-tw': 'zh'
107
+ }
108
+ detected = language_mapping.get(detected, detected)
109
  return detected if detected in config.SUPPORTED_LANGUAGES else 'en'
110
  except:
111
  return 'en'
 
169
  @staticmethod
170
  def extract_keywords(text: str, top_k: int = 5) -> List[str]:
171
  """Extract key words from text"""
172
+ # For Chinese text, extract characters
173
+ if re.search(r'[\u4e00-\u9fff]', text):
174
+ words = re.findall(r'[\u4e00-\u9fff]+', text)
175
+ all_chars = ''.join(words)
176
+ char_freq = Counter(all_chars)
177
+ return [char for char, _ in char_freq.most_common(top_k)]
178
+ else:
179
+ # For other languages, use word-based extraction
180
+ cleaned = TextProcessor.clean_text(text)
181
+ words = cleaned.split()
182
+ word_freq = Counter(words)
183
+ return [word for word, _ in word_freq.most_common(top_k)]
184
 
185
  class SentimentAnalyzer:
186
  """Enhanced sentiment analysis"""
 
200
  # Get appropriate model
201
  model, tokenizer = model_manager.get_model(detected_lang)
202
 
203
+ # Preprocessing options - don't clean Chinese text
204
  options = preprocessing_options or {}
205
  processed_text = text
206
+ if options.get('clean_text', False) and not re.search(r'[\u4e00-\u9fff]', text):
207
  processed_text = TextProcessor.clean_text(
208
  text,
209
  options.get('remove_punctuation', True),
 
403
  """Enhanced single text analysis"""
404
  try:
405
  if not text.strip():
406
+ return "Please enter text", None, None
407
 
408
  # Map display names back to language codes
409
  language_map = {
 
452
  - **Stats:** {result['word_count']} words, {result['char_count']} characters
453
  """
454
 
455
+ return info_text, gauge_fig, bars_fig
456
 
457
  except Exception as e:
458
  logger.error(f"Analysis failed: {e}")
459
+ return f"Error: {str(e)}", None, None
460
 
461
  def get_history_stats():
462
  """Get history statistics"""
 
570
  with gr.Row():
571
  gauge_plot = gr.Plot(label="Sentiment Gauge")
572
  bars_plot = gr.Plot(label="Probability Distribution")
 
 
573
 
574
  with gr.Tab("📊 History & Analytics"):
575
  with gr.Row():
 
589
  analyze_btn.click(
590
  analyze_single_text,
591
  inputs=[text_input, language_select, theme_select, clean_text, remove_punct, remove_nums],
592
+ outputs=[result_info, gauge_plot, bars_plot]
593
  )
594
 
595
  stats_btn.click(