Sa-m commited on
Commit
018141a
·
verified ·
1 Parent(s): 200dcdb

Update app.py

Browse files
Files changed (1) hide show
  1. app.py +38 -28
app.py CHANGED
@@ -360,46 +360,58 @@ def get_contextual_search_result(target_word, tar_passage, groq_client_instance,
360
  return "Contextual search requires the LLM API. Please set up your GROQ_API_KEY."
361
 
362
  # Basic check if word exists (optional, LLM can handle it too)
363
- if target_word.lower() not in tar_passage.lower():
364
- return f"The term '{target_word}' was not found in the manifesto text."
 
365
 
366
  # Truncate passage if too long for the model/context window
367
- # You might need to adjust this based on your model's limits and desired performance
368
- if len(tar_passage) > max_context_length:
369
  # Simple truncation; could be improved to ensure sentences are complete
370
- tar_passage = tar_passage[:max_context_length]
371
- print(f"Warning: Passage truncated for LLM search context to {max_context_length} characters.")
 
 
372
 
 
373
  prompt = f"""
374
- You are given a political manifesto text and a specific search term.
375
- Your task is to find all relevant mentions of the search term in the text and provide a concise, informative summary of the context surrounding each mention.
376
- Focus on the key ideas, policies, or points related to the search term.
377
- If the term is not found or not relevant, state that clearly.
378
- Search Term: {target_word}
379
- Manifesto Text:
380
- {tar_passage}
381
- """
 
 
 
 
382
 
383
  try:
384
  completion = groq_client_instance.chat.completions.create(
385
  model="llama3-8b-8192", # Use the same or a suitable model
386
  messages=[
387
- {"role": "system", "content": "You are a helpful assistant skilled at analyzing political texts and extracting relevant information based on a search query."},
388
  {"role": "user", "content": prompt}
389
  ],
390
  temperature=0.2, # Low temperature for more factual extraction
391
  max_tokens=1000 # Adjust based on expected output length
392
  )
393
  result = completion.choices[0].message.content.strip()
394
- return result if result else f"No specific context for '{target_word}' could be generated."
 
 
 
395
  except Exception as e:
396
  error_msg = f"Error during contextual search for '{target_word}': {str(e)}"
397
  print(error_msg)
398
  traceback.print_exc()
399
  # Fallback to concordance if LLM fails?
400
- # return get_all_phases_containing_tar_wrd_fallback(target_word, tar_passage)
401
  return error_msg # Or return the error message directly
402
 
 
403
  def analysis(Manifesto, Search):
404
  try:
405
  if Manifesto is None:
@@ -412,10 +424,10 @@ def analysis(Manifesto, Search):
412
  return raw_party, {}, None, None, None, None, None, "Parsing failed"
413
  text_Party = clean_text(raw_party)
414
  text_Party_processed = Preprocess(text_Party)
415
-
416
  # --- Perform Search FIRST using the ORIGINAL text for better context ---
417
  # Pass the original raw text for richer context to the LLM
418
- searChRes = get_contextual_search_result(Search, raw_party, groq_client)
419
 
420
  # --- Then proceed with other analyses ---
421
  summary = generate_summary(raw_party) # Use raw_party for summary for more context?
@@ -438,12 +450,10 @@ def analysis(Manifesto, Search):
438
  sentiment_plot = safe_plot(lambda: df_dummy['Polarity_Label'].value_counts().plot(kind='bar', color="#FF9F45", title='Sentiment Analysis'))
439
  subjectivity_plot = safe_plot(lambda: df_dummy['Subjectivity_Label'].value_counts().plot(kind='bar', color="#B667F1", title='Subjectivity Analysis'))
440
  freq_plot = fDistancePlot(text_Party_processed)
441
- dispersion_plot = DispersionPlot(text_Party_processed)
442
  wordcloud = word_cloud_generator(Manifesto, text_Party_processed) # Pass Manifesto object itself
443
  fdist_Party = fDistance(text_Party_processed)
444
-
445
- # searChRes is now generated earlier
446
-
447
  return searChRes, fdist_Party, sentiment_plot, subjectivity_plot, wordcloud, freq_plot, dispersion_plot, summary
448
 
449
  except Exception as e:
@@ -455,7 +465,6 @@ def analysis(Manifesto, Search):
455
 
456
 
457
  # --- Gradio Interface (remains largely the same, just ensuring output variable names match) ---
458
- # Use Blocks for custom layout
459
  with gr.Blocks(title='Manifesto Analysis') as demo:
460
  gr.Markdown("# Manifesto Analysis")
461
  # Input Section
@@ -519,6 +528,7 @@ with gr.Blocks(title='Manifesto Analysis') as demo:
519
  )
520
 
521
  # --- Examples ---
 
522
  gr.Examples(
523
  examples=[
524
  ["Example/AAP_Manifesto_2019.pdf", "government"],
@@ -526,11 +536,11 @@ with gr.Blocks(title='Manifesto Analysis') as demo:
526
  ["Example/Congress_Manifesto_2019.pdf", "safety"]
527
  ],
528
  inputs=[file_input, search_input],
529
- outputs=[search_output, topics_output, sentiment_output, subjectivity_output, wordcloud_output, freq_output, dispersion_output, summary_output], # Link examples to outputs
 
530
  fn=analysis # Run analysis on example click
531
  )
532
 
533
- # Launch the app
534
- if __name__ == "__main__":
535
- demo.launch(debug=True, share=False, show_error=True)
536
 
 
 
 
360
  return "Contextual search requires the LLM API. Please set up your GROQ_API_KEY."
361
 
362
  # Basic check if word exists (optional, LLM can handle it too)
363
+ # Simple check, might generate false positives/negatives
364
+ # if target_word.lower() not in tar_passage.lower():
365
+ # return f"The term '{target_word}' was not found in the manifesto text."
366
 
367
  # Truncate passage if too long for the model/context window
368
+ original_length = len(tar_passage)
369
+ if original_length > max_context_length:
370
  # Simple truncation; could be improved to ensure sentences are complete
371
+ tar_passage_truncated = tar_passage[:max_context_length]
372
+ print(f"Warning: Passage truncated for LLM search context from {original_length} to {max_context_length} characters.")
373
+ else:
374
+ tar_passage_truncated = tar_passage
375
 
376
+ # --- Improved Prompt ---
377
  prompt = f"""
378
+ You are an expert political analyst. You have been given a section of a political manifesto and a specific search term.
379
+ Your task is to extract and summarize all information related to the search term from the provided text.
380
+ Focus on:
381
+ 1. Specific policies, promises, or statements related to the term.
382
+ 2. The context in which the term is used.
383
+ 3. Any key details, figures, or commitments mentioned.
384
+ Present your findings concisely. If the term is not relevant or not found in the provided text section, state that clearly.
385
+ Search Term: {target_word}
386
+ Manifesto Text Section:
387
+ {tar_passage_truncated}
388
+ Relevant Information:
389
+ """
390
 
391
  try:
392
  completion = groq_client_instance.chat.completions.create(
393
  model="llama3-8b-8192", # Use the same or a suitable model
394
  messages=[
395
+ {"role": "system", "content": "You are a helpful assistant skilled at analyzing political texts and extracting relevant information based on a search query. Provide clear, concise summaries."},
396
  {"role": "user", "content": prompt}
397
  ],
398
  temperature=0.2, # Low temperature for more factual extraction
399
  max_tokens=1000 # Adjust based on expected output length
400
  )
401
  result = completion.choices[0].message.content.strip()
402
+ # Add a note if the input was truncated
403
+ if original_length > max_context_length:
404
+ result = f"(Note: Analysis based on the first {max_context_length} characters of the manifesto.)\n\n" + result
405
+ return result if result else f"No specific context for '{target_word}' could be generated from the provided text section."
406
  except Exception as e:
407
  error_msg = f"Error during contextual search for '{target_word}': {str(e)}"
408
  print(error_msg)
409
  traceback.print_exc()
410
  # Fallback to concordance if LLM fails?
411
+ # return get_all_phases_containing_tar_wrd(target_word, tar_passage)
412
  return error_msg # Or return the error message directly
413
 
414
+
415
  def analysis(Manifesto, Search):
416
  try:
417
  if Manifesto is None:
 
424
  return raw_party, {}, None, None, None, None, None, "Parsing failed"
425
  text_Party = clean_text(raw_party)
426
  text_Party_processed = Preprocess(text_Party)
427
+
428
  # --- Perform Search FIRST using the ORIGINAL text for better context ---
429
  # Pass the original raw text for richer context to the LLM
430
+ searChRes = get_contextual_search_result(Search, raw_party, groq_client)
431
 
432
  # --- Then proceed with other analyses ---
433
  summary = generate_summary(raw_party) # Use raw_party for summary for more context?
 
450
  sentiment_plot = safe_plot(lambda: df_dummy['Polarity_Label'].value_counts().plot(kind='bar', color="#FF9F45", title='Sentiment Analysis'))
451
  subjectivity_plot = safe_plot(lambda: df_dummy['Subjectivity_Label'].value_counts().plot(kind='bar', color="#B667F1", title='Subjectivity Analysis'))
452
  freq_plot = fDistancePlot(text_Party_processed)
453
+ dispersion_plot = DispersionPlot(text_Party_processed) # Use fixed version
454
  wordcloud = word_cloud_generator(Manifesto, text_Party_processed) # Pass Manifesto object itself
455
  fdist_Party = fDistance(text_Party_processed)
456
+
 
 
457
  return searChRes, fdist_Party, sentiment_plot, subjectivity_plot, wordcloud, freq_plot, dispersion_plot, summary
458
 
459
  except Exception as e:
 
465
 
466
 
467
  # --- Gradio Interface (remains largely the same, just ensuring output variable names match) ---
 
468
  with gr.Blocks(title='Manifesto Analysis') as demo:
469
  gr.Markdown("# Manifesto Analysis")
470
  # Input Section
 
528
  )
529
 
530
  # --- Examples ---
531
+ # Ensure outputs list references the PREDEFINED components from the layout
532
  gr.Examples(
533
  examples=[
534
  ["Example/AAP_Manifesto_2019.pdf", "government"],
 
536
  ["Example/Congress_Manifesto_2019.pdf", "safety"]
537
  ],
538
  inputs=[file_input, search_input],
539
+ # --- Key Fix: Reference the predefined output components ---
540
+ outputs=[search_output, topics_output, sentiment_output, subjectivity_output, wordcloud_output, freq_output, dispersion_output, summary_output],
541
  fn=analysis # Run analysis on example click
542
  )
543
 
 
 
 
544
 
545
+ if __name__ == "__main__":
546
+ demo.launch(debug=True, share=False, show_error=True)