Spaces:
Running
Running
Update app.py
Browse files
app.py
CHANGED
@@ -360,46 +360,58 @@ def get_contextual_search_result(target_word, tar_passage, groq_client_instance,
|
|
360 |
return "Contextual search requires the LLM API. Please set up your GROQ_API_KEY."
|
361 |
|
362 |
# Basic check if word exists (optional, LLM can handle it too)
|
363 |
-
|
364 |
-
|
|
|
365 |
|
366 |
# Truncate passage if too long for the model/context window
|
367 |
-
|
368 |
-
if
|
369 |
# Simple truncation; could be improved to ensure sentences are complete
|
370 |
-
|
371 |
-
print(f"Warning: Passage truncated for LLM search context to {max_context_length} characters.")
|
|
|
|
|
372 |
|
|
|
373 |
prompt = f"""
|
374 |
-
|
375 |
-
|
376 |
-
|
377 |
-
|
378 |
-
|
379 |
-
|
380 |
-
|
381 |
-
|
|
|
|
|
|
|
|
|
382 |
|
383 |
try:
|
384 |
completion = groq_client_instance.chat.completions.create(
|
385 |
model="llama3-8b-8192", # Use the same or a suitable model
|
386 |
messages=[
|
387 |
-
{"role": "system", "content": "You are a helpful assistant skilled at analyzing political texts and extracting relevant information based on a search query."},
|
388 |
{"role": "user", "content": prompt}
|
389 |
],
|
390 |
temperature=0.2, # Low temperature for more factual extraction
|
391 |
max_tokens=1000 # Adjust based on expected output length
|
392 |
)
|
393 |
result = completion.choices[0].message.content.strip()
|
394 |
-
|
|
|
|
|
|
|
395 |
except Exception as e:
|
396 |
error_msg = f"Error during contextual search for '{target_word}': {str(e)}"
|
397 |
print(error_msg)
|
398 |
traceback.print_exc()
|
399 |
# Fallback to concordance if LLM fails?
|
400 |
-
# return
|
401 |
return error_msg # Or return the error message directly
|
402 |
|
|
|
403 |
def analysis(Manifesto, Search):
|
404 |
try:
|
405 |
if Manifesto is None:
|
@@ -412,10 +424,10 @@ def analysis(Manifesto, Search):
|
|
412 |
return raw_party, {}, None, None, None, None, None, "Parsing failed"
|
413 |
text_Party = clean_text(raw_party)
|
414 |
text_Party_processed = Preprocess(text_Party)
|
415 |
-
|
416 |
# --- Perform Search FIRST using the ORIGINAL text for better context ---
|
417 |
# Pass the original raw text for richer context to the LLM
|
418 |
-
searChRes = get_contextual_search_result(Search, raw_party, groq_client)
|
419 |
|
420 |
# --- Then proceed with other analyses ---
|
421 |
summary = generate_summary(raw_party) # Use raw_party for summary for more context?
|
@@ -438,12 +450,10 @@ def analysis(Manifesto, Search):
|
|
438 |
sentiment_plot = safe_plot(lambda: df_dummy['Polarity_Label'].value_counts().plot(kind='bar', color="#FF9F45", title='Sentiment Analysis'))
|
439 |
subjectivity_plot = safe_plot(lambda: df_dummy['Subjectivity_Label'].value_counts().plot(kind='bar', color="#B667F1", title='Subjectivity Analysis'))
|
440 |
freq_plot = fDistancePlot(text_Party_processed)
|
441 |
-
dispersion_plot = DispersionPlot(text_Party_processed)
|
442 |
wordcloud = word_cloud_generator(Manifesto, text_Party_processed) # Pass Manifesto object itself
|
443 |
fdist_Party = fDistance(text_Party_processed)
|
444 |
-
|
445 |
-
# searChRes is now generated earlier
|
446 |
-
|
447 |
return searChRes, fdist_Party, sentiment_plot, subjectivity_plot, wordcloud, freq_plot, dispersion_plot, summary
|
448 |
|
449 |
except Exception as e:
|
@@ -455,7 +465,6 @@ def analysis(Manifesto, Search):
|
|
455 |
|
456 |
|
457 |
# --- Gradio Interface (remains largely the same, just ensuring output variable names match) ---
|
458 |
-
# Use Blocks for custom layout
|
459 |
with gr.Blocks(title='Manifesto Analysis') as demo:
|
460 |
gr.Markdown("# Manifesto Analysis")
|
461 |
# Input Section
|
@@ -519,6 +528,7 @@ with gr.Blocks(title='Manifesto Analysis') as demo:
|
|
519 |
)
|
520 |
|
521 |
# --- Examples ---
|
|
|
522 |
gr.Examples(
|
523 |
examples=[
|
524 |
["Example/AAP_Manifesto_2019.pdf", "government"],
|
@@ -526,11 +536,11 @@ with gr.Blocks(title='Manifesto Analysis') as demo:
|
|
526 |
["Example/Congress_Manifesto_2019.pdf", "safety"]
|
527 |
],
|
528 |
inputs=[file_input, search_input],
|
529 |
-
|
|
|
530 |
fn=analysis # Run analysis on example click
|
531 |
)
|
532 |
|
533 |
-
# Launch the app
|
534 |
-
if __name__ == "__main__":
|
535 |
-
demo.launch(debug=True, share=False, show_error=True)
|
536 |
|
|
|
|
|
|
360 |
return "Contextual search requires the LLM API. Please set up your GROQ_API_KEY."
|
361 |
|
362 |
# Basic check if word exists (optional, LLM can handle it too)
|
363 |
+
# Simple check, might generate false positives/negatives
|
364 |
+
# if target_word.lower() not in tar_passage.lower():
|
365 |
+
# return f"The term '{target_word}' was not found in the manifesto text."
|
366 |
|
367 |
# Truncate passage if too long for the model/context window
|
368 |
+
original_length = len(tar_passage)
|
369 |
+
if original_length > max_context_length:
|
370 |
# Simple truncation; could be improved to ensure sentences are complete
|
371 |
+
tar_passage_truncated = tar_passage[:max_context_length]
|
372 |
+
print(f"Warning: Passage truncated for LLM search context from {original_length} to {max_context_length} characters.")
|
373 |
+
else:
|
374 |
+
tar_passage_truncated = tar_passage
|
375 |
|
376 |
+
# --- Improved Prompt ---
|
377 |
prompt = f"""
|
378 |
+
You are an expert political analyst. You have been given a section of a political manifesto and a specific search term.
|
379 |
+
Your task is to extract and summarize all information related to the search term from the provided text.
|
380 |
+
Focus on:
|
381 |
+
1. Specific policies, promises, or statements related to the term.
|
382 |
+
2. The context in which the term is used.
|
383 |
+
3. Any key details, figures, or commitments mentioned.
|
384 |
+
Present your findings concisely. If the term is not relevant or not found in the provided text section, state that clearly.
|
385 |
+
Search Term: {target_word}
|
386 |
+
Manifesto Text Section:
|
387 |
+
{tar_passage_truncated}
|
388 |
+
Relevant Information:
|
389 |
+
"""
|
390 |
|
391 |
try:
|
392 |
completion = groq_client_instance.chat.completions.create(
|
393 |
model="llama3-8b-8192", # Use the same or a suitable model
|
394 |
messages=[
|
395 |
+
{"role": "system", "content": "You are a helpful assistant skilled at analyzing political texts and extracting relevant information based on a search query. Provide clear, concise summaries."},
|
396 |
{"role": "user", "content": prompt}
|
397 |
],
|
398 |
temperature=0.2, # Low temperature for more factual extraction
|
399 |
max_tokens=1000 # Adjust based on expected output length
|
400 |
)
|
401 |
result = completion.choices[0].message.content.strip()
|
402 |
+
# Add a note if the input was truncated
|
403 |
+
if original_length > max_context_length:
|
404 |
+
result = f"(Note: Analysis based on the first {max_context_length} characters of the manifesto.)\n\n" + result
|
405 |
+
return result if result else f"No specific context for '{target_word}' could be generated from the provided text section."
|
406 |
except Exception as e:
|
407 |
error_msg = f"Error during contextual search for '{target_word}': {str(e)}"
|
408 |
print(error_msg)
|
409 |
traceback.print_exc()
|
410 |
# Fallback to concordance if LLM fails?
|
411 |
+
# return get_all_phases_containing_tar_wrd(target_word, tar_passage)
|
412 |
return error_msg # Or return the error message directly
|
413 |
|
414 |
+
|
415 |
def analysis(Manifesto, Search):
|
416 |
try:
|
417 |
if Manifesto is None:
|
|
|
424 |
return raw_party, {}, None, None, None, None, None, "Parsing failed"
|
425 |
text_Party = clean_text(raw_party)
|
426 |
text_Party_processed = Preprocess(text_Party)
|
427 |
+
|
428 |
# --- Perform Search FIRST using the ORIGINAL text for better context ---
|
429 |
# Pass the original raw text for richer context to the LLM
|
430 |
+
searChRes = get_contextual_search_result(Search, raw_party, groq_client)
|
431 |
|
432 |
# --- Then proceed with other analyses ---
|
433 |
summary = generate_summary(raw_party) # Use raw_party for summary for more context?
|
|
|
450 |
sentiment_plot = safe_plot(lambda: df_dummy['Polarity_Label'].value_counts().plot(kind='bar', color="#FF9F45", title='Sentiment Analysis'))
|
451 |
subjectivity_plot = safe_plot(lambda: df_dummy['Subjectivity_Label'].value_counts().plot(kind='bar', color="#B667F1", title='Subjectivity Analysis'))
|
452 |
freq_plot = fDistancePlot(text_Party_processed)
|
453 |
+
dispersion_plot = DispersionPlot(text_Party_processed) # Use fixed version
|
454 |
wordcloud = word_cloud_generator(Manifesto, text_Party_processed) # Pass Manifesto object itself
|
455 |
fdist_Party = fDistance(text_Party_processed)
|
456 |
+
|
|
|
|
|
457 |
return searChRes, fdist_Party, sentiment_plot, subjectivity_plot, wordcloud, freq_plot, dispersion_plot, summary
|
458 |
|
459 |
except Exception as e:
|
|
|
465 |
|
466 |
|
467 |
# --- Gradio Interface (remains largely the same, just ensuring output variable names match) ---
|
|
|
468 |
with gr.Blocks(title='Manifesto Analysis') as demo:
|
469 |
gr.Markdown("# Manifesto Analysis")
|
470 |
# Input Section
|
|
|
528 |
)
|
529 |
|
530 |
# --- Examples ---
|
531 |
+
# Ensure outputs list references the PREDEFINED components from the layout
|
532 |
gr.Examples(
|
533 |
examples=[
|
534 |
["Example/AAP_Manifesto_2019.pdf", "government"],
|
|
|
536 |
["Example/Congress_Manifesto_2019.pdf", "safety"]
|
537 |
],
|
538 |
inputs=[file_input, search_input],
|
539 |
+
# --- Key Fix: Reference the predefined output components ---
|
540 |
+
outputs=[search_output, topics_output, sentiment_output, subjectivity_output, wordcloud_output, freq_output, dispersion_output, summary_output],
|
541 |
fn=analysis # Run analysis on example click
|
542 |
)
|
543 |
|
|
|
|
|
|
|
544 |
|
545 |
+
if __name__ == "__main__":
|
546 |
+
demo.launch(debug=True, share=False, show_error=True)
|