Spaces:
Running
Running
Update app.py
Browse files
app.py
CHANGED
@@ -28,21 +28,21 @@ import unidecode
|
|
28 |
import contractions
|
29 |
from sklearn.feature_extraction.text import TfidfVectorizer
|
30 |
|
31 |
-
|
32 |
# Load environment variables
|
33 |
load_dotenv()
|
34 |
|
35 |
-
# Download NLTK resources
|
36 |
-
nltk.download(['stopwords', 'wordnet', 'words'])
|
37 |
-
nltk.download('punkt')
|
38 |
-
nltk.download('punkt_tab')
|
|
|
39 |
# Initialize Groq client
|
40 |
groq_api_key = os.getenv("GROQ_API_KEY")
|
41 |
groq_client = groq.Groq(api_key=groq_api_key) if groq_api_key else None
|
42 |
|
43 |
# Stopwords customization
|
44 |
stop_words = set(stopwords.words('english'))
|
45 |
-
stop_words.update('ask','much','thank','etc.', 'e', 'We', 'In', 'ed','pa', 'This','also', 'A', 'fu','To','5','ing', 'er', '2')
|
46 |
|
47 |
# --- Parsing & Preprocessing Functions ---
|
48 |
def Parsing(parsed_text):
|
@@ -51,8 +51,10 @@ def Parsing(parsed_text):
|
|
51 |
file_path = parsed_text.name
|
52 |
else:
|
53 |
file_path = parsed_text
|
54 |
-
|
55 |
-
|
|
|
|
|
56 |
except Exception as e:
|
57 |
print(f"Error parsing PDF: {e}")
|
58 |
return f"Error parsing PDF: {e}"
|
@@ -83,10 +85,10 @@ def generate_summary(text):
|
|
83 |
text = text[:10000]
|
84 |
try:
|
85 |
completion = groq_client.chat.completions.create(
|
86 |
-
model="llama3-8b-8192",
|
87 |
messages=[
|
88 |
{"role": "system", "content": "You are a helpful assistant that summarizes political manifestos. Provide a concise, objective summary that captures the key policy proposals, themes, and promises in the manifesto."},
|
89 |
-
{"role": "user", "content": f"Please summarize the following political manifesto text in about 300-500 words, focusing on the main policy areas, promises, and themes:\n
|
90 |
],
|
91 |
temperature=0.3,
|
92 |
max_tokens=800
|
@@ -99,25 +101,37 @@ def fDistance(text2Party):
|
|
99 |
word_tokens_party = word_tokenize(text2Party)
|
100 |
fdistance = FreqDist(word_tokens_party).most_common(10)
|
101 |
mem = {x[0]: x[1] for x in fdistance}
|
102 |
-
|
103 |
vectorizer = TfidfVectorizer(max_features=15, stop_words='english')
|
104 |
-
|
105 |
-
|
106 |
-
|
107 |
-
|
108 |
-
|
109 |
-
|
110 |
-
|
111 |
-
|
112 |
-
|
113 |
-
|
114 |
-
|
115 |
-
|
116 |
-
|
117 |
-
|
118 |
-
|
119 |
-
|
120 |
-
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
121 |
|
122 |
def normalize(d, target=1.0):
|
123 |
raw = sum(d.values())
|
@@ -130,64 +144,105 @@ def safe_plot(func, *args, **kwargs):
|
|
130 |
plt.clf()
|
131 |
func(*args, **kwargs)
|
132 |
buf = BytesIO()
|
133 |
-
plt.savefig(buf, format='png')
|
134 |
buf.seek(0)
|
135 |
-
|
|
|
|
|
136 |
except Exception as e:
|
137 |
-
print(f"Plotting error: {e}")
|
138 |
-
|
|
|
139 |
|
140 |
def fDistancePlot(text2Party):
|
141 |
-
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
142 |
|
143 |
def DispersionPlot(textParty):
|
144 |
try:
|
145 |
word_tokens_party = word_tokenize(textParty)
|
146 |
-
|
|
|
|
|
147 |
fdistance = FreqDist(word_tokens_party)
|
148 |
-
|
149 |
-
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
150 |
plt.title('Dispersion Plot')
|
151 |
moby.dispersion_plot(word_Lst)
|
152 |
plt.tight_layout()
|
153 |
buf = BytesIO()
|
154 |
-
plt.savefig(buf, format='png')
|
155 |
buf.seek(0)
|
156 |
img = Image.open(buf)
|
157 |
-
plt.
|
158 |
return img
|
159 |
except Exception as e:
|
160 |
print(f"Dispersion plot error: {e}")
|
|
|
161 |
return None
|
162 |
|
163 |
def word_cloud_generator(parsed_text_name, text_Party):
|
164 |
try:
|
165 |
-
|
166 |
-
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
167 |
mask_path = 'bjpImg2.jpeg'
|
168 |
-
elif 'congress' in
|
169 |
mask_path = 'congress3.jpeg'
|
170 |
-
elif 'aap' in
|
171 |
mask_path = 'aapMain2.jpg'
|
172 |
-
|
173 |
-
|
|
|
|
|
174 |
|
175 |
if mask_path and os.path.exists(mask_path):
|
176 |
orgImg = Image.open(mask_path)
|
|
|
|
|
|
|
177 |
mask = np.array(orgImg)
|
178 |
-
wordcloud = WordCloud(max_words=3000, mask=mask).generate(text_Party)
|
179 |
-
plt.imshow(wordcloud)
|
180 |
else:
|
181 |
-
wordcloud = WordCloud(max_words=2000).generate(text_Party)
|
182 |
-
|
|
|
|
|
183 |
plt.axis("off")
|
|
|
184 |
buf = BytesIO()
|
185 |
-
plt.savefig(buf, format='png')
|
186 |
buf.seek(0)
|
187 |
-
|
|
|
|
|
188 |
except Exception as e:
|
189 |
print(f"Word cloud error: {e}")
|
190 |
-
|
|
|
191 |
|
192 |
def get_all_phases_containing_tar_wrd(target_word, tar_passage, left_margin=10, right_margin=10, numLins=4):
|
193 |
"""
|
@@ -195,19 +250,18 @@ def get_all_phases_containing_tar_wrd(target_word, tar_passage, left_margin=10,
|
|
195 |
"""
|
196 |
if not target_word or target_word.strip() == "":
|
197 |
return "Please enter a search term"
|
198 |
-
|
199 |
tokens = nltk.word_tokenize(tar_passage)
|
200 |
text = nltk.Text(tokens)
|
201 |
c = nltk.ConcordanceIndex(text.tokens, key=lambda s: s.lower())
|
202 |
offsets = c.offsets(target_word)
|
203 |
-
|
|
|
204 |
concordance_txt = [
|
205 |
text.tokens[max(0, offset - left_margin):offset + right_margin]
|
206 |
for offset in offsets[:numLins]
|
207 |
]
|
208 |
-
|
209 |
result = [' '.join(con_sub) for con_sub in concordance_txt]
|
210 |
-
return '\n'.join(result)
|
211 |
|
212 |
# --- Main Analysis Function ---
|
213 |
def analysis(Manifesto, Search):
|
@@ -216,27 +270,35 @@ def analysis(Manifesto, Search):
|
|
216 |
return "No file uploaded", {}, None, None, None, None, None, "No file uploaded"
|
217 |
if Search.strip() == "":
|
218 |
Search = "government"
|
219 |
-
|
220 |
raw_party = Parsing(Manifesto)
|
221 |
if isinstance(raw_party, str) and raw_party.startswith("Error"):
|
222 |
return raw_party, {}, None, None, None, None, None, "Parsing failed"
|
223 |
|
224 |
text_Party = clean_text(raw_party)
|
225 |
text_Party_processed = Preprocess(text_Party)
|
226 |
-
summary = generate_summary(raw_party)
|
227 |
|
228 |
-
|
229 |
-
df['Subjectivity'] = df['Content'].apply(lambda x: TextBlob(x).sentiment.subjectivity)
|
230 |
-
df['Polarity'] = df['Content'].apply(lambda x: TextBlob(x).sentiment.polarity)
|
231 |
-
df['Polarity_Label'] = df['Polarity'].apply(lambda x: 'Positive' if x > 0 else 'Negative' if x < 0 else 'Neutral')
|
232 |
-
df['Subjectivity_Label'] = df['Subjectivity'].apply(lambda x: 'High' if x > 0.5 else 'Low')
|
233 |
|
234 |
-
#
|
235 |
-
|
236 |
-
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
237 |
freq_plot = fDistancePlot(text_Party_processed)
|
238 |
dispersion_plot = DispersionPlot(text_Party_processed)
|
239 |
-
wordcloud = word_cloud_generator(Manifesto
|
240 |
|
241 |
fdist_Party = fDistance(text_Party_processed)
|
242 |
searChRes = get_all_phases_containing_tar_wrd(Search, text_Party_processed)
|
@@ -244,55 +306,388 @@ def analysis(Manifesto, Search):
|
|
244 |
return searChRes, fdist_Party, sentiment_plot, subjectivity_plot, wordcloud, freq_plot, dispersion_plot, summary
|
245 |
|
246 |
except Exception as e:
|
247 |
-
error_msg = f"Critical error: {str(e)}"
|
248 |
print(error_msg)
|
249 |
traceback.print_exc()
|
|
|
250 |
return error_msg, {}, None, None, None, None, None, "Analysis failed"
|
251 |
|
252 |
-
# --- Gradio Interface ---
|
253 |
-
Search_txt = "text"
|
254 |
-
filePdf = "file"
|
255 |
|
|
|
|
|
256 |
with gr.Blocks(title='Manifesto Analysis') as demo:
|
257 |
gr.Markdown("# Manifesto Analysis")
|
|
|
|
|
258 |
with gr.Row():
|
259 |
-
with gr.Column():
|
260 |
-
|
261 |
-
|
262 |
-
|
|
|
|
|
|
|
263 |
with gr.Tabs():
|
264 |
-
|
265 |
-
with gr.TabItem("
|
266 |
-
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
267 |
with gr.TabItem("Visualizations"):
|
268 |
-
|
269 |
-
|
270 |
-
|
271 |
-
|
272 |
-
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
273 |
|
|
|
|
|
|
|
|
|
|
|
|
|
274 |
submit_btn.click(
|
275 |
fn=analysis,
|
276 |
inputs=[file_input, search_input],
|
277 |
outputs=[
|
278 |
-
|
279 |
-
|
280 |
-
|
281 |
-
|
282 |
-
|
283 |
-
|
284 |
-
|
285 |
-
|
286 |
-
]
|
|
|
287 |
)
|
288 |
|
|
|
289 |
gr.Examples(
|
290 |
examples=[
|
291 |
["Example/AAP_Manifesto_2019.pdf", "government"],
|
292 |
["Example/Bjp_Manifesto_2019.pdf", "environment"],
|
293 |
["Example/Congress_Manifesto_2019.pdf", "safety"]
|
294 |
],
|
295 |
-
inputs=[file_input, search_input]
|
|
|
|
|
296 |
)
|
297 |
|
298 |
-
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
28 |
import contractions
|
29 |
from sklearn.feature_extraction.text import TfidfVectorizer
|
30 |
|
|
|
31 |
# Load environment variables
|
32 |
load_dotenv()
|
33 |
|
34 |
+
# Download NLTK resources (Ensure this runs once or handle caching)
|
35 |
+
# nltk.download(['stopwords', 'wordnet', 'words'])
|
36 |
+
# nltk.download('punkt')
|
37 |
+
# nltk.download('punkt_tab')
|
38 |
+
|
39 |
# Initialize Groq client
|
40 |
groq_api_key = os.getenv("GROQ_API_KEY")
|
41 |
groq_client = groq.Groq(api_key=groq_api_key) if groq_api_key else None
|
42 |
|
43 |
# Stopwords customization
|
44 |
stop_words = set(stopwords.words('english'))
|
45 |
+
stop_words.update({'ask', 'much', 'thank', 'etc.', 'e', 'We', 'In', 'ed', 'pa', 'This', 'also', 'A', 'fu', 'To', '5', 'ing', 'er', '2'}) # Ensure stop_words is a set
|
46 |
|
47 |
# --- Parsing & Preprocessing Functions ---
|
48 |
def Parsing(parsed_text):
|
|
|
51 |
file_path = parsed_text.name
|
52 |
else:
|
53 |
file_path = parsed_text
|
54 |
+
# Ensure textract handles encoding correctly or handle errors
|
55 |
+
raw_party = textract.process(file_path) # Removed encoding/method for broader compatibility
|
56 |
+
decoded_text = raw_party.decode('utf-8', errors='ignore') # Decode bytes to string, handling errors
|
57 |
+
return clean(decoded_text) # Pass decoded string to clean
|
58 |
except Exception as e:
|
59 |
print(f"Error parsing PDF: {e}")
|
60 |
return f"Error parsing PDF: {e}"
|
|
|
85 |
text = text[:10000]
|
86 |
try:
|
87 |
completion = groq_client.chat.completions.create(
|
88 |
+
model="llama3-8b-8192", # Or your preferred model
|
89 |
messages=[
|
90 |
{"role": "system", "content": "You are a helpful assistant that summarizes political manifestos. Provide a concise, objective summary that captures the key policy proposals, themes, and promises in the manifesto."},
|
91 |
+
{"role": "user", "content": f"Please summarize the following political manifesto text in about 300-500 words, focusing on the main policy areas, promises, and themes:\n{text}"}
|
92 |
],
|
93 |
temperature=0.3,
|
94 |
max_tokens=800
|
|
|
101 |
word_tokens_party = word_tokenize(text2Party)
|
102 |
fdistance = FreqDist(word_tokens_party).most_common(10)
|
103 |
mem = {x[0]: x[1] for x in fdistance}
|
|
|
104 |
vectorizer = TfidfVectorizer(max_features=15, stop_words='english')
|
105 |
+
try:
|
106 |
+
tfidf_matrix = vectorizer.fit_transform(sent_tokenize(text2Party))
|
107 |
+
feature_names = vectorizer.get_feature_names_out()
|
108 |
+
tfidf_scores = {}
|
109 |
+
sentences = sent_tokenize(text2Party)
|
110 |
+
for i, word in enumerate(feature_names):
|
111 |
+
scores = []
|
112 |
+
for j in range(tfidf_matrix.shape[0]): # Iterate through sentences
|
113 |
+
if i < tfidf_matrix.shape[1]: # Check if word index is valid for this sentence vector
|
114 |
+
scores.append(tfidf_matrix[j, i])
|
115 |
+
if scores:
|
116 |
+
tfidf_scores[word] = sum(scores) / len(scores) # Average TF-IDF score across sentences
|
117 |
+
combined_scores = {}
|
118 |
+
all_words = set(list(mem.keys()) + list(tfidf_scores.keys()))
|
119 |
+
max_freq = max(mem.values()) if mem else 1
|
120 |
+
max_tfidf = max(tfidf_scores.values()) if tfidf_scores else 1
|
121 |
+
for word in all_words:
|
122 |
+
freq_score = mem.get(word, 0) / max_freq
|
123 |
+
tfidf_score = tfidf_scores.get(word, 0) / max_tfidf
|
124 |
+
combined_scores[word] = (freq_score * 0.3) + (tfidf_score * 0.7)
|
125 |
+
top_words = dict(sorted(combined_scores.items(), key=lambda x: x[1], reverse=True)[:10])
|
126 |
+
return normalize(top_words)
|
127 |
+
except ValueError as ve: # Handle case where TF-IDF fails (e.g., empty after processing)
|
128 |
+
print(f"Warning: TF-IDF failed, using only frequency: {ve}")
|
129 |
+
# Fallback to just normalized frequency if TF-IDF fails
|
130 |
+
if mem:
|
131 |
+
max_freq = max(mem.values())
|
132 |
+
return {k: v / max_freq for k, v in list(mem.items())[:10]} # Return top 10 freq, normalized
|
133 |
+
else:
|
134 |
+
return {}
|
135 |
|
136 |
def normalize(d, target=1.0):
|
137 |
raw = sum(d.values())
|
|
|
144 |
plt.clf()
|
145 |
func(*args, **kwargs)
|
146 |
buf = BytesIO()
|
147 |
+
plt.savefig(buf, format='png', bbox_inches='tight') # Add bbox_inches for better fit
|
148 |
buf.seek(0)
|
149 |
+
img = Image.open(buf)
|
150 |
+
plt.close() # Use plt.close() instead of clf for better memory management after save
|
151 |
+
return img
|
152 |
except Exception as e:
|
153 |
+
print(f"Plotting error in safe_plot: {e}")
|
154 |
+
traceback.print_exc() # Print traceback for debugging
|
155 |
+
return None # Return None on error
|
156 |
|
157 |
def fDistancePlot(text2Party):
|
158 |
+
def plot_func():
|
159 |
+
tokens = word_tokenize(text2Party)
|
160 |
+
if not tokens:
|
161 |
+
plt.text(0.5, 0.5, "No data to plot", ha='center', va='center')
|
162 |
+
return
|
163 |
+
fdist = FreqDist(tokens)
|
164 |
+
fdist.plot(15, title='Frequency Distribution')
|
165 |
+
plt.xticks(rotation=45, ha='right') # Rotate x-axis labels if needed
|
166 |
+
plt.tight_layout()
|
167 |
+
return safe_plot(plot_func)
|
168 |
|
169 |
def DispersionPlot(textParty):
|
170 |
try:
|
171 |
word_tokens_party = word_tokenize(textParty)
|
172 |
+
if not word_tokens_party:
|
173 |
+
return None
|
174 |
+
moby = Text(word_tokens_party)
|
175 |
fdistance = FreqDist(word_tokens_party)
|
176 |
+
# Get top 5 words, handle potential IndexError if less than 5 unique words
|
177 |
+
common_words = fdistance.most_common(6)
|
178 |
+
if len(common_words) < 5:
|
179 |
+
word_Lst = [word for word, _ in common_words]
|
180 |
+
else:
|
181 |
+
word_Lst = [common_words[x][0] for x in range(5)]
|
182 |
+
|
183 |
+
if not word_Lst:
|
184 |
+
return None
|
185 |
+
|
186 |
+
plt.figure(figsize=(10, 5)) # Adjust figure size
|
187 |
plt.title('Dispersion Plot')
|
188 |
moby.dispersion_plot(word_Lst)
|
189 |
plt.tight_layout()
|
190 |
buf = BytesIO()
|
191 |
+
plt.savefig(buf, format='png', bbox_inches='tight')
|
192 |
buf.seek(0)
|
193 |
img = Image.open(buf)
|
194 |
+
plt.close() # Close the figure
|
195 |
return img
|
196 |
except Exception as e:
|
197 |
print(f"Dispersion plot error: {e}")
|
198 |
+
traceback.print_exc()
|
199 |
return None
|
200 |
|
201 |
def word_cloud_generator(parsed_text_name, text_Party):
|
202 |
try:
|
203 |
+
# Handle case where parsed_text_name might not have .name
|
204 |
+
filename_lower = ""
|
205 |
+
if hasattr(parsed_text_name, 'name') and parsed_text_name.name:
|
206 |
+
filename_lower = parsed_text_name.name.lower()
|
207 |
+
elif isinstance(parsed_text_name, str):
|
208 |
+
filename_lower = parsed_text_name.lower()
|
209 |
+
|
210 |
+
mask_path = None
|
211 |
+
if 'bjp' in filename_lower:
|
212 |
mask_path = 'bjpImg2.jpeg'
|
213 |
+
elif 'congress' in filename_lower:
|
214 |
mask_path = 'congress3.jpeg'
|
215 |
+
elif 'aap' in filename_lower:
|
216 |
mask_path = 'aapMain2.jpg'
|
217 |
+
|
218 |
+
# Generate word cloud
|
219 |
+
if text_Party.strip() == "":
|
220 |
+
raise ValueError("Text for word cloud is empty")
|
221 |
|
222 |
if mask_path and os.path.exists(mask_path):
|
223 |
orgImg = Image.open(mask_path)
|
224 |
+
# Ensure mask is in the right format (e.g., uint8)
|
225 |
+
if orgImg.mode != 'RGB':
|
226 |
+
orgImg = orgImg.convert('RGB')
|
227 |
mask = np.array(orgImg)
|
228 |
+
wordcloud = WordCloud(max_words=3000, mask=mask, background_color='white').generate(text_Party) # Added background color
|
|
|
229 |
else:
|
230 |
+
wordcloud = WordCloud(max_words=2000, background_color='white').generate(text_Party)
|
231 |
+
|
232 |
+
plt.figure(figsize=(8, 6)) # Set figure size
|
233 |
+
plt.imshow(wordcloud, interpolation='bilinear') # Use bilinear interpolation
|
234 |
plt.axis("off")
|
235 |
+
plt.tight_layout()
|
236 |
buf = BytesIO()
|
237 |
+
plt.savefig(buf, format='png', bbox_inches='tight')
|
238 |
buf.seek(0)
|
239 |
+
img = Image.open(buf)
|
240 |
+
plt.close() # Close the figure
|
241 |
+
return img
|
242 |
except Exception as e:
|
243 |
print(f"Word cloud error: {e}")
|
244 |
+
traceback.print_exc()
|
245 |
+
return None # Return None on error
|
246 |
|
247 |
def get_all_phases_containing_tar_wrd(target_word, tar_passage, left_margin=10, right_margin=10, numLins=4):
|
248 |
"""
|
|
|
250 |
"""
|
251 |
if not target_word or target_word.strip() == "":
|
252 |
return "Please enter a search term"
|
|
|
253 |
tokens = nltk.word_tokenize(tar_passage)
|
254 |
text = nltk.Text(tokens)
|
255 |
c = nltk.ConcordanceIndex(text.tokens, key=lambda s: s.lower())
|
256 |
offsets = c.offsets(target_word)
|
257 |
+
if not offsets:
|
258 |
+
return f"Word '{target_word}' not found."
|
259 |
concordance_txt = [
|
260 |
text.tokens[max(0, offset - left_margin):offset + right_margin]
|
261 |
for offset in offsets[:numLins]
|
262 |
]
|
|
|
263 |
result = [' '.join(con_sub) for con_sub in concordance_txt]
|
264 |
+
return '\n'.join(result) # Use newline for better readability in textbox
|
265 |
|
266 |
# --- Main Analysis Function ---
|
267 |
def analysis(Manifesto, Search):
|
|
|
270 |
return "No file uploaded", {}, None, None, None, None, None, "No file uploaded"
|
271 |
if Search.strip() == "":
|
272 |
Search = "government"
|
|
|
273 |
raw_party = Parsing(Manifesto)
|
274 |
if isinstance(raw_party, str) and raw_party.startswith("Error"):
|
275 |
return raw_party, {}, None, None, None, None, None, "Parsing failed"
|
276 |
|
277 |
text_Party = clean_text(raw_party)
|
278 |
text_Party_processed = Preprocess(text_Party)
|
|
|
279 |
|
280 |
+
summary = generate_summary(raw_party) # Use raw_party for summary for more context?
|
|
|
|
|
|
|
|
|
281 |
|
282 |
+
# --- Sentiment Analysis ---
|
283 |
+
if not text_Party_processed.strip():
|
284 |
+
# Handle empty text after processing
|
285 |
+
df_dummy = pd.DataFrame({'Polarity_Label': ['Neutral'], 'Subjectivity_Label': ['Low']})
|
286 |
+
polarity_val = 0.0
|
287 |
+
subjectivity_val = 0.0
|
288 |
+
else:
|
289 |
+
polarity_val = TextBlob(text_Party_processed).sentiment.polarity
|
290 |
+
subjectivity_val = TextBlob(text_Party_processed).sentiment.subjectivity
|
291 |
+
polarity_label = 'Positive' if polarity_val > 0 else 'Negative' if polarity_val < 0 else 'Neutral'
|
292 |
+
subjectivity_label = 'High' if subjectivity_val > 0.5 else 'Low'
|
293 |
+
df_dummy = pd.DataFrame({'Polarity_Label': [polarity_label], 'Subjectivity_Label': [subjectivity_label]})
|
294 |
+
|
295 |
+
# --- Generate Plots with Safe Plotting ---
|
296 |
+
# Pass the potentially empty text and handle inside plotting functions
|
297 |
+
sentiment_plot = safe_plot(lambda: df_dummy['Polarity_Label'].value_counts().plot(kind='bar', color="#FF9F45", title='Sentiment Analysis'))
|
298 |
+
subjectivity_plot = safe_plot(lambda: df_dummy['Subjectivity_Label'].value_counts().plot(kind='bar', color="#B667F1", title='Subjectivity Analysis'))
|
299 |
freq_plot = fDistancePlot(text_Party_processed)
|
300 |
dispersion_plot = DispersionPlot(text_Party_processed)
|
301 |
+
wordcloud = word_cloud_generator(Manifesto, text_Party_processed) # Pass Manifesto object itself
|
302 |
|
303 |
fdist_Party = fDistance(text_Party_processed)
|
304 |
searChRes = get_all_phases_containing_tar_wrd(Search, text_Party_processed)
|
|
|
306 |
return searChRes, fdist_Party, sentiment_plot, subjectivity_plot, wordcloud, freq_plot, dispersion_plot, summary
|
307 |
|
308 |
except Exception as e:
|
309 |
+
error_msg = f"Critical error in analysis function: {str(e)}"
|
310 |
print(error_msg)
|
311 |
traceback.print_exc()
|
312 |
+
# Return error messages/images in the correct order
|
313 |
return error_msg, {}, None, None, None, None, None, "Analysis failed"
|
314 |
|
|
|
|
|
|
|
315 |
|
316 |
+
# --- Gradio Interface ---
|
317 |
+
# Use Blocks for custom layout
|
318 |
with gr.Blocks(title='Manifesto Analysis') as demo:
|
319 |
gr.Markdown("# Manifesto Analysis")
|
320 |
+
|
321 |
+
# Input Section
|
322 |
with gr.Row():
|
323 |
+
with gr.Column(scale=1): # Adjust scale if needed
|
324 |
+
file_input = gr.File(label="Upload Manifesto PDF", file_types=[".pdf"])
|
325 |
+
with gr.Column(scale=1):
|
326 |
+
search_input = gr.Textbox(label="Search Term", placeholder="Enter a term to search in the manifesto")
|
327 |
+
submit_btn = gr.Button("Analyze Manifesto", variant='primary') # Make button prominent
|
328 |
+
|
329 |
+
# Output Section using Tabs
|
330 |
with gr.Tabs():
|
331 |
+
# --- Summary Tab ---
|
332 |
+
with gr.TabItem("Summary"):
|
333 |
+
summary_output = gr.Textbox(label='AI-Generated Summary', lines=10, interactive=False)
|
334 |
+
|
335 |
+
# --- Search Results Tab ---
|
336 |
+
with gr.TabItem("Search Results"):
|
337 |
+
search_output = gr.Textbox(label='Context Based Search Results', lines=10, interactive=False)
|
338 |
+
|
339 |
+
# --- Key Topics Tab ---
|
340 |
+
with gr.TabItem("Key Topics"):
|
341 |
+
topics_output = gr.Label(label="Most Relevant Topics (LLM Enhanced)", num_top_classes=10) # Show top 10
|
342 |
+
|
343 |
+
# --- Visualizations Tab ---
|
344 |
with gr.TabItem("Visualizations"):
|
345 |
+
# Use Rows and Columns for better arrangement
|
346 |
+
with gr.Row(): # Row 1: Sentiment & Subjectivity
|
347 |
+
with gr.Column():
|
348 |
+
sentiment_output = gr.Image(label='Sentiment Analysis', interactive=False, height=400) # Set height
|
349 |
+
with gr.Column():
|
350 |
+
subjectivity_output = gr.Image(label='Subjectivity Analysis', interactive=False, height=400)
|
351 |
+
|
352 |
+
with gr.Row(): # Row 2: Word Cloud & Frequency
|
353 |
+
with gr.Column():
|
354 |
+
wordcloud_output = gr.Image(label='Word Cloud', interactive=False, height=400)
|
355 |
+
with gr.Column():
|
356 |
+
freq_output = gr.Image(label='Frequency Distribution', interactive=False, height=400)
|
357 |
|
358 |
+
with gr.Row(): # Row 3: Dispersion Plot (Full width)
|
359 |
+
with gr.Column():
|
360 |
+
dispersion_output = gr.Image(label='Dispersion Plot', interactive=False, height=400) # Adjust height as needed
|
361 |
+
|
362 |
+
# --- Link Button Click to Function and Outputs ---
|
363 |
+
# Ensure the order of outputs matches the function return order
|
364 |
submit_btn.click(
|
365 |
fn=analysis,
|
366 |
inputs=[file_input, search_input],
|
367 |
outputs=[
|
368 |
+
search_output, # 1
|
369 |
+
topics_output, # 2
|
370 |
+
sentiment_output, # 3
|
371 |
+
subjectivity_output, # 4
|
372 |
+
wordcloud_output, # 5
|
373 |
+
freq_output, # 6
|
374 |
+
dispersion_output, # 7
|
375 |
+
summary_output # 8
|
376 |
+
],
|
377 |
+
concurrency_limit=1 # Limit concurrent analyses if needed
|
378 |
)
|
379 |
|
380 |
+
# --- Examples ---
|
381 |
gr.Examples(
|
382 |
examples=[
|
383 |
["Example/AAP_Manifesto_2019.pdf", "government"],
|
384 |
["Example/Bjp_Manifesto_2019.pdf", "environment"],
|
385 |
["Example/Congress_Manifesto_2019.pdf", "safety"]
|
386 |
],
|
387 |
+
inputs=[file_input, search_input],
|
388 |
+
outputs=[search_output, topics_output, sentiment_output, subjectivity_output, wordcloud_output, freq_output, dispersion_output, summary_output], # Link examples to outputs
|
389 |
+
fn=analysis # Run analysis on example click
|
390 |
)
|
391 |
|
392 |
+
# Launch the app
|
393 |
+
if __name__ == "__main__":
|
394 |
+
demo.launch(debug=True, share=False, show_error=True)
|
395 |
+
|
396 |
+
# import random
|
397 |
+
# import matplotlib.pyplot as plt
|
398 |
+
# import nltk
|
399 |
+
# from nltk.tokenize import word_tokenize, sent_tokenize
|
400 |
+
# from nltk.corpus import stopwords
|
401 |
+
# from nltk.stem import WordNetLemmatizer
|
402 |
+
# from nltk.text import Text
|
403 |
+
# from nltk.probability import FreqDist
|
404 |
+
# from cleantext import clean
|
405 |
+
# import textract
|
406 |
+
# import urllib.request
|
407 |
+
# from io import BytesIO
|
408 |
+
# import sys
|
409 |
+
# import pandas as pd
|
410 |
+
# import cv2
|
411 |
+
# import re
|
412 |
+
# from wordcloud import WordCloud, ImageColorGenerator
|
413 |
+
# from textblob import TextBlob
|
414 |
+
# from PIL import Image
|
415 |
+
# import os
|
416 |
+
# import gradio as gr
|
417 |
+
# from dotenv import load_dotenv
|
418 |
+
# import groq
|
419 |
+
# import json
|
420 |
+
# import traceback
|
421 |
+
# import numpy as np
|
422 |
+
# import unidecode
|
423 |
+
# import contractions
|
424 |
+
# from sklearn.feature_extraction.text import TfidfVectorizer
|
425 |
+
|
426 |
+
|
427 |
+
# # Load environment variables
|
428 |
+
# load_dotenv()
|
429 |
+
|
430 |
+
# # Download NLTK resources
|
431 |
+
# nltk.download(['stopwords', 'wordnet', 'words'])
|
432 |
+
# nltk.download('punkt')
|
433 |
+
# nltk.download('punkt_tab')
|
434 |
+
# # Initialize Groq client
|
435 |
+
# groq_api_key = os.getenv("GROQ_API_KEY")
|
436 |
+
# groq_client = groq.Groq(api_key=groq_api_key) if groq_api_key else None
|
437 |
+
|
438 |
+
# # Stopwords customization
|
439 |
+
# stop_words = set(stopwords.words('english'))
|
440 |
+
# stop_words.update('ask','much','thank','etc.', 'e', 'We', 'In', 'ed','pa', 'This','also', 'A', 'fu','To','5','ing', 'er', '2')
|
441 |
+
|
442 |
+
# # --- Parsing & Preprocessing Functions ---
|
443 |
+
# def Parsing(parsed_text):
|
444 |
+
# try:
|
445 |
+
# if hasattr(parsed_text, 'name'):
|
446 |
+
# file_path = parsed_text.name
|
447 |
+
# else:
|
448 |
+
# file_path = parsed_text
|
449 |
+
# raw_party = textract.process(file_path, encoding='ascii', method='pdfminer')
|
450 |
+
# return clean(raw_party)
|
451 |
+
# except Exception as e:
|
452 |
+
# print(f"Error parsing PDF: {e}")
|
453 |
+
# return f"Error parsing PDF: {e}"
|
454 |
+
|
455 |
+
# def clean_text(text):
|
456 |
+
# text = text.encode("ascii", errors="ignore").decode("ascii")
|
457 |
+
# text = unidecode.unidecode(text)
|
458 |
+
# text = contractions.fix(text)
|
459 |
+
# text = re.sub(r"\n", " ", text)
|
460 |
+
# text = re.sub(r"\t", " ", text)
|
461 |
+
# text = re.sub(r"/ ", " ", text)
|
462 |
+
# text = text.strip()
|
463 |
+
# text = re.sub(" +", " ", text).strip()
|
464 |
+
# text = [word for word in text.split() if word not in stop_words]
|
465 |
+
# return ' '.join(text)
|
466 |
+
|
467 |
+
# def Preprocess(textParty):
|
468 |
+
# text1Party = re.sub('[^A-Za-z0-9]+', ' ', textParty)
|
469 |
+
# pattern = re.compile(r'\b(' + r'|'.join(stopwords.words('english')) + r')\b\s*')
|
470 |
+
# text2Party = pattern.sub('', text1Party)
|
471 |
+
# return text2Party
|
472 |
+
|
473 |
+
# # --- Core Analysis Functions ---
|
474 |
+
# def generate_summary(text):
|
475 |
+
# if not groq_client:
|
476 |
+
# return "Summarization is not available. Please set up your GROQ_API_KEY in the .env file."
|
477 |
+
# if len(text) > 10000:
|
478 |
+
# text = text[:10000]
|
479 |
+
# try:
|
480 |
+
# completion = groq_client.chat.completions.create(
|
481 |
+
# model="llama3-8b-8192",
|
482 |
+
# messages=[
|
483 |
+
# {"role": "system", "content": "You are a helpful assistant that summarizes political manifestos. Provide a concise, objective summary that captures the key policy proposals, themes, and promises in the manifesto."},
|
484 |
+
# {"role": "user", "content": f"Please summarize the following political manifesto text in about 300-500 words, focusing on the main policy areas, promises, and themes:\n\n{text}"}
|
485 |
+
# ],
|
486 |
+
# temperature=0.3,
|
487 |
+
# max_tokens=800
|
488 |
+
# )
|
489 |
+
# return completion.choices[0].message.content
|
490 |
+
# except Exception as e:
|
491 |
+
# return f"Error generating summary: {str(e)}"
|
492 |
+
|
493 |
+
# def fDistance(text2Party):
|
494 |
+
# word_tokens_party = word_tokenize(text2Party)
|
495 |
+
# fdistance = FreqDist(word_tokens_party).most_common(10)
|
496 |
+
# mem = {x[0]: x[1] for x in fdistance}
|
497 |
+
|
498 |
+
# vectorizer = TfidfVectorizer(max_features=15, stop_words='english')
|
499 |
+
# tfidf_matrix = vectorizer.fit_transform(sent_tokenize(text2Party))
|
500 |
+
# feature_names = vectorizer.get_feature_names_out()
|
501 |
+
|
502 |
+
# tfidf_scores = {}
|
503 |
+
# for i, word in enumerate(feature_names):
|
504 |
+
# scores = [tfidf_matrix[j, i] for j in range(len(sent_tokenize(text2Party))) if i < tfidf_matrix[j].shape[1]]
|
505 |
+
# if scores:
|
506 |
+
# tfidf_scores[word] = sum(scores) / len(scores)
|
507 |
+
|
508 |
+
# combined_scores = {}
|
509 |
+
# for word in set(list(mem.keys()) + list(tfidf_scores.keys())):
|
510 |
+
# freq_score = mem.get(word, 0) / max(mem.values()) if mem else 0
|
511 |
+
# tfidf_score = tfidf_scores.get(word, 0) / max(tfidf_scores.values()) if tfidf_scores else 0
|
512 |
+
# combined_scores[word] = (freq_score * 0.3) + (tfidf_score * 0.7)
|
513 |
+
|
514 |
+
# top_words = dict(sorted(combined_scores.items(), key=lambda x: x[1], reverse=True)[:10])
|
515 |
+
# return normalize(top_words)
|
516 |
+
|
517 |
+
# def normalize(d, target=1.0):
|
518 |
+
# raw = sum(d.values())
|
519 |
+
# factor = target / raw if raw != 0 else 0
|
520 |
+
# return {key: value * factor for key, value in d.items()}
|
521 |
+
|
522 |
+
# # --- Visualization Functions with Error Handling ---
|
523 |
+
# def safe_plot(func, *args, **kwargs):
|
524 |
+
# try:
|
525 |
+
# plt.clf()
|
526 |
+
# func(*args, **kwargs)
|
527 |
+
# buf = BytesIO()
|
528 |
+
# plt.savefig(buf, format='png')
|
529 |
+
# buf.seek(0)
|
530 |
+
# return Image.open(buf)
|
531 |
+
# except Exception as e:
|
532 |
+
# print(f"Plotting error: {e}")
|
533 |
+
# return None
|
534 |
+
|
535 |
+
# def fDistancePlot(text2Party):
|
536 |
+
# return safe_plot(lambda: FreqDist(word_tokenize(text2Party)).plot(15, title='Frequency Distribution'))
|
537 |
+
|
538 |
+
# def DispersionPlot(textParty):
|
539 |
+
# try:
|
540 |
+
# word_tokens_party = word_tokenize(textParty)
|
541 |
+
# moby = Text(word_tokens_party) # Ensure Text is imported
|
542 |
+
# fdistance = FreqDist(word_tokens_party)
|
543 |
+
# word_Lst = [fdistance.most_common(6)[x][0] for x in range(5)]
|
544 |
+
# plt.figure(figsize=(4, 3))
|
545 |
+
# plt.title('Dispersion Plot')
|
546 |
+
# moby.dispersion_plot(word_Lst)
|
547 |
+
# plt.tight_layout()
|
548 |
+
# buf = BytesIO()
|
549 |
+
# plt.savefig(buf, format='png')
|
550 |
+
# buf.seek(0)
|
551 |
+
# img = Image.open(buf)
|
552 |
+
# plt.clf()
|
553 |
+
# return img
|
554 |
+
# except Exception as e:
|
555 |
+
# print(f"Dispersion plot error: {e}")
|
556 |
+
# return None
|
557 |
+
|
558 |
+
# def word_cloud_generator(parsed_text_name, text_Party):
|
559 |
+
# try:
|
560 |
+
# parsed = parsed_text_name.lower()
|
561 |
+
# if 'bjp' in parsed:
|
562 |
+
# mask_path = 'bjpImg2.jpeg'
|
563 |
+
# elif 'congress' in parsed:
|
564 |
+
# mask_path = 'congress3.jpeg'
|
565 |
+
# elif 'aap' in parsed:
|
566 |
+
# mask_path = 'aapMain2.jpg'
|
567 |
+
# else:
|
568 |
+
# mask_path = None
|
569 |
+
|
570 |
+
# if mask_path and os.path.exists(mask_path):
|
571 |
+
# orgImg = Image.open(mask_path)
|
572 |
+
# mask = np.array(orgImg)
|
573 |
+
# wordcloud = WordCloud(max_words=3000, mask=mask).generate(text_Party)
|
574 |
+
# plt.imshow(wordcloud)
|
575 |
+
# else:
|
576 |
+
# wordcloud = WordCloud(max_words=2000).generate(text_Party)
|
577 |
+
# plt.imshow(wordcloud)
|
578 |
+
# plt.axis("off")
|
579 |
+
# buf = BytesIO()
|
580 |
+
# plt.savefig(buf, format='png')
|
581 |
+
# buf.seek(0)
|
582 |
+
# return Image.open(buf)
|
583 |
+
# except Exception as e:
|
584 |
+
# print(f"Word cloud error: {e}")
|
585 |
+
# return None
|
586 |
+
|
587 |
+
# def get_all_phases_containing_tar_wrd(target_word, tar_passage, left_margin=10, right_margin=10, numLins=4):
|
588 |
+
# """
|
589 |
+
# Function to get all the phrases that contain the target word in a text/passage.
|
590 |
+
# """
|
591 |
+
# if not target_word or target_word.strip() == "":
|
592 |
+
# return "Please enter a search term"
|
593 |
+
|
594 |
+
# tokens = nltk.word_tokenize(tar_passage)
|
595 |
+
# text = nltk.Text(tokens)
|
596 |
+
# c = nltk.ConcordanceIndex(text.tokens, key=lambda s: s.lower())
|
597 |
+
# offsets = c.offsets(target_word)
|
598 |
+
|
599 |
+
# concordance_txt = [
|
600 |
+
# text.tokens[max(0, offset - left_margin):offset + right_margin]
|
601 |
+
# for offset in offsets[:numLins]
|
602 |
+
# ]
|
603 |
+
|
604 |
+
# result = [' '.join(con_sub) for con_sub in concordance_txt]
|
605 |
+
# return '\n'.join(result)
|
606 |
+
|
607 |
+
# # --- Main Analysis Function ---
|
608 |
+
# def analysis(Manifesto, Search):
|
609 |
+
# try:
|
610 |
+
# if Manifesto is None:
|
611 |
+
# return "No file uploaded", {}, None, None, None, None, None, "No file uploaded"
|
612 |
+
# if Search.strip() == "":
|
613 |
+
# Search = "government"
|
614 |
+
|
615 |
+
# raw_party = Parsing(Manifesto)
|
616 |
+
# if isinstance(raw_party, str) and raw_party.startswith("Error"):
|
617 |
+
# return raw_party, {}, None, None, None, None, None, "Parsing failed"
|
618 |
+
|
619 |
+
# text_Party = clean_text(raw_party)
|
620 |
+
# text_Party_processed = Preprocess(text_Party)
|
621 |
+
# summary = generate_summary(raw_party)
|
622 |
+
|
623 |
+
# df = pd.DataFrame([{'Content': text_Party_processed}], columns=['Content'])
|
624 |
+
# df['Subjectivity'] = df['Content'].apply(lambda x: TextBlob(x).sentiment.subjectivity)
|
625 |
+
# df['Polarity'] = df['Content'].apply(lambda x: TextBlob(x).sentiment.polarity)
|
626 |
+
# df['Polarity_Label'] = df['Polarity'].apply(lambda x: 'Positive' if x > 0 else 'Negative' if x < 0 else 'Neutral')
|
627 |
+
# df['Subjectivity_Label'] = df['Subjectivity'].apply(lambda x: 'High' if x > 0.5 else 'Low')
|
628 |
+
|
629 |
+
# # Generate Plots with Safe Plotting
|
630 |
+
# sentiment_plot = safe_plot(lambda: df['Polarity_Label'].value_counts().plot(kind='bar', color="#FF9F45", title='Sentiment Analysis'))
|
631 |
+
# subjectivity_plot = safe_plot(lambda: df['Subjectivity_Label'].value_counts().plot(kind='bar', color="#B667F1", title='Subjectivity Analysis'))
|
632 |
+
# freq_plot = fDistancePlot(text_Party_processed)
|
633 |
+
# dispersion_plot = DispersionPlot(text_Party_processed)
|
634 |
+
# wordcloud = word_cloud_generator(Manifesto.name, text_Party_processed)
|
635 |
+
|
636 |
+
# fdist_Party = fDistance(text_Party_processed)
|
637 |
+
# searChRes = get_all_phases_containing_tar_wrd(Search, text_Party_processed)
|
638 |
+
|
639 |
+
# return searChRes, fdist_Party, sentiment_plot, subjectivity_plot, wordcloud, freq_plot, dispersion_plot, summary
|
640 |
+
|
641 |
+
# except Exception as e:
|
642 |
+
# error_msg = f"Critical error: {str(e)}"
|
643 |
+
# print(error_msg)
|
644 |
+
# traceback.print_exc()
|
645 |
+
# return error_msg, {}, None, None, None, None, None, "Analysis failed"
|
646 |
+
|
647 |
+
# # --- Gradio Interface ---
|
648 |
+
# Search_txt = "text"
|
649 |
+
# filePdf = "file"
|
650 |
+
|
651 |
+
# with gr.Blocks(title='Manifesto Analysis') as demo:
|
652 |
+
# gr.Markdown("# Manifesto Analysis")
|
653 |
+
# with gr.Row():
|
654 |
+
# with gr.Column():
|
655 |
+
# file_input = gr.File(label="Upload Manifesto PDF", file_types=[".pdf"])
|
656 |
+
# search_input = gr.Textbox(label="Search Term", placeholder="Enter a term to search in the manifesto")
|
657 |
+
# submit_btn = gr.Button("Analyze Manifesto")
|
658 |
+
# with gr.Tabs():
|
659 |
+
# with gr.TabItem("Summary"): gr.Textbox(label='LLM Based Summary', lines=10)
|
660 |
+
# with gr.TabItem("Search Results"): gr.Textbox(label='Context Based Search')
|
661 |
+
# with gr.TabItem("Key Topics"): gr.Label(label="Most Relevant Topics (LLM Enhanced)")
|
662 |
+
# with gr.TabItem("Visualizations"):
|
663 |
+
# with gr.Row():
|
664 |
+
# gr.Image(label='Sentiment Analysis'), gr.Image(label='Subjectivity Analysis')
|
665 |
+
# with gr.Row():
|
666 |
+
# gr.Image(label='Word Cloud'), gr.Image(label='Frequency Distribution')
|
667 |
+
# gr.Image(label='Dispersion Plot')
|
668 |
+
|
669 |
+
# submit_btn.click(
|
670 |
+
# fn=analysis,
|
671 |
+
# inputs=[file_input, search_input],
|
672 |
+
# outputs=[
|
673 |
+
# gr.Textbox(label='Context Based Search'),
|
674 |
+
# gr.Label(label="Most Relevant Topics (LLM Enhanced)"),
|
675 |
+
# gr.Image(label='Sentiment Analysis'),
|
676 |
+
# gr.Image(label='Subjectivity Analysis'),
|
677 |
+
# gr.Image(label='Word Cloud'),
|
678 |
+
# gr.Image(label='Frequency Distribution'),
|
679 |
+
# gr.Image(label='Dispersion Plot'),
|
680 |
+
# gr.Textbox(label='AI-Generated Summary', lines=10)
|
681 |
+
# ]
|
682 |
+
# )
|
683 |
+
|
684 |
+
# gr.Examples(
|
685 |
+
# examples=[
|
686 |
+
# ["Example/AAP_Manifesto_2019.pdf", "government"],
|
687 |
+
# ["Example/Bjp_Manifesto_2019.pdf", "environment"],
|
688 |
+
# ["Example/Congress_Manifesto_2019.pdf", "safety"]
|
689 |
+
# ],
|
690 |
+
# inputs=[file_input, search_input]
|
691 |
+
# )
|
692 |
+
|
693 |
+
# demo.launch(debug=True, share=False, show_error=True)
|