TabasumDev commited on
Commit
b8ea32a
Β·
verified Β·
1 Parent(s): 838eb5c

Update app.py

Browse files
Files changed (1) hide show
  1. app.py +136 -123
app.py CHANGED
@@ -262,137 +262,150 @@
262
  # if __name__ == '__main__':
263
  # main()
264
 
 
 
 
265
  import streamlit as st
266
- import os
267
- import re
268
- import torch
269
- from transformers import AutoModelForCausalLM, AutoTokenizer
270
- from PyPDF2 import PdfReader
271
- from peft import get_peft_model, LoraConfig, TaskType
272
-
273
- # βœ… Force CPU execution
274
- device = torch.device("cpu")
275
-
276
- # πŸ”Ή Load IBM Granite Model (CPU-Compatible)
277
- MODEL_NAME = "ibm-granite/granite-3.1-2b-instruct"
278
-
279
- model = AutoModelForCausalLM.from_pretrained(
280
- MODEL_NAME,
281
- device_map="cpu", # Force CPU execution
282
- torch_dtype=torch.float32 # Use float32 since Hugging Face runs on CPU
283
- )
284
-
285
- tokenizer = AutoTokenizer.from_pretrained(MODEL_NAME)
286
-
287
- # πŸ”Ή Apply LoRA Fine-Tuning Configuration
288
- lora_config = LoraConfig(
289
- r=8,
290
- lora_alpha=32,
291
- target_modules=["q_proj", "v_proj"],
292
- lora_dropout=0.1,
293
- bias="none",
294
- task_type=TaskType.CAUSAL_LM
295
- )
296
- model = get_peft_model(model, lora_config)
297
- model.eval()
298
-
299
- # πŸ›  Function to Read & Extract Text from PDFs
300
- def read_files(file):
301
- file_context = ""
302
- reader = PdfReader(file)
 
 
 
 
 
 
 
 
 
 
303
 
304
- for page in reader.pages:
305
- text = page.extract_text()
306
- if text:
307
- file_context += text + "\n"
308
 
309
- return file_context.strip()
310
-
311
- # πŸ›  Function to Format AI Prompts
312
- def format_prompt(system_msg, user_msg, file_context=""):
313
- if file_context:
314
- system_msg += f" The user has provided a contract document. Use its context to generate insights, but do not repeat or summarize the document itself."
315
- return [
316
- {"role": "system", "content": system_msg},
317
- {"role": "user", "content": user_msg}
318
- ]
319
-
320
- # πŸ›  Function to Generate AI Responses
321
- def generate_response(input_text, max_tokens=1000, top_p=0.9, temperature=0.7):
322
- model_inputs = tokenizer([input_text], return_tensors="pt").to(device)
323
 
324
- with torch.no_grad():
325
- output = model.generate(
326
- **model_inputs,
327
- max_new_tokens=max_tokens,
328
- do_sample=True,
329
- top_p=top_p,
330
- temperature=temperature,
331
- num_return_sequences=1,
332
- pad_token_id=tokenizer.eos_token_id
333
- )
334
 
335
- return tokenizer.decode(output[0], skip_special_tokens=True)
336
-
337
- # πŸ›  Function to Clean AI Output
338
- def post_process(text):
339
- cleaned = re.sub(r'ζˆ₯+', '', text) # Remove unwanted symbols
340
- lines = cleaned.splitlines()
341
- unique_lines = list(dict.fromkeys([line.strip() for line in lines if line.strip()]))
342
- return "\n".join(unique_lines)
343
-
344
- # πŸ›  Function to Handle RAG with IBM Granite & Streamlit
345
- def granite_simple(prompt, file):
346
- file_context = read_files(file) if file else ""
347
 
348
- system_message = "You are IBM Granite, a legal AI assistant specializing in contract analysis."
349
 
350
- messages = format_prompt(system_message, prompt, file_context)
351
- input_text = tokenizer.apply_chat_template(messages, tokenize=False, add_generation_prompt=True)
352
 
353
- response = generate_response(input_text)
354
- return post_process(response)
355
-
356
- # πŸ”Ή Streamlit UI
357
- def main():
358
- st.set_page_config(page_title="Contract Analysis AI", page_icon="πŸ“œ")
359
-
360
- st.title("πŸ“œ AI-Powered Contract Analysis Tool")
361
- st.write("Upload a contract document (PDF) for a detailed AI-driven legal and technical analysis.")
362
-
363
- # πŸ”Ή Sidebar Settings
364
- with st.sidebar:
365
- st.header("βš™οΈ Settings")
366
- max_tokens = st.slider("Max Tokens", 50, 1000, 250, 50)
367
- top_p = st.slider("Top P (sampling)", 0.1, 1.0, 0.9, 0.1)
368
- temperature = st.slider("Temperature (creativity)", 0.1, 1.0, 0.7, 0.1)
369
-
370
- # πŸ”Ή File Upload Section
371
- uploaded_file = st.file_uploader("πŸ“‚ Upload a contract document (PDF)", type="pdf")
372
-
373
- # βœ… Ensure file upload message is displayed
374
- if uploaded_file is not None:
375
- st.session_state["uploaded_file"] = uploaded_file # Persist file in session state
376
- st.success("βœ… File uploaded successfully!")
377
- st.write("Click the button below to analyze the contract.")
378
-
379
- # Force button to always render
380
- st.markdown('<style>div.stButton > button {display: block; width: 100%;}</style>', unsafe_allow_html=True)
381
-
382
- if st.button("πŸ” Analyze Document"):
383
- with st.spinner("Analyzing contract document... ⏳"):
384
- final_answer = granite_simple(
385
- "Perform a detailed technical analysis of the attached contract document, highlighting potential risks, legal pitfalls, compliance issues, and areas where contractual terms may lead to future disputes or operational challenges.",
386
- uploaded_file
387
- )
388
-
389
- # πŸ”Ή Display Analysis Result
390
- st.subheader("πŸ“‘ Analysis Result")
391
- st.write(final_answer)
392
-
393
- # πŸ”₯ Run Streamlit App
394
- if __name__ == '__main__':
395
- main()
396
 
397
 
398
 
 
262
  # if __name__ == '__main__':
263
  # main()
264
 
265
+
266
+
267
+
268
  import streamlit as st
269
+
270
+ st.title("File Upload Debugging")
271
+
272
+ uploaded_file = st.file_uploader("Upload a PDF file", type="pdf")
273
+
274
+ if uploaded_file:
275
+ st.success(f"File uploaded: {uploaded_file.name}")
276
+ st.write(f"File Size: {uploaded_file.size / 1024:.2f} KB")
277
+
278
+ # import streamlit as st
279
+ # import os
280
+ # import re
281
+ # import torch
282
+ # from transformers import AutoModelForCausalLM, AutoTokenizer
283
+ # from PyPDF2 import PdfReader
284
+ # from peft import get_peft_model, LoraConfig, TaskType
285
+
286
+ # # βœ… Force CPU execution
287
+ # device = torch.device("cpu")
288
+
289
+ # # πŸ”Ή Load IBM Granite Model (CPU-Compatible)
290
+ # MODEL_NAME = "ibm-granite/granite-3.1-2b-instruct"
291
+
292
+ # model = AutoModelForCausalLM.from_pretrained(
293
+ # MODEL_NAME,
294
+ # device_map="cpu", # Force CPU execution
295
+ # torch_dtype=torch.float32 # Use float32 since Hugging Face runs on CPU
296
+ # )
297
+
298
+ # tokenizer = AutoTokenizer.from_pretrained(MODEL_NAME)
299
+
300
+ # # πŸ”Ή Apply LoRA Fine-Tuning Configuration
301
+ # lora_config = LoraConfig(
302
+ # r=8,
303
+ # lora_alpha=32,
304
+ # target_modules=["q_proj", "v_proj"],
305
+ # lora_dropout=0.1,
306
+ # bias="none",
307
+ # task_type=TaskType.CAUSAL_LM
308
+ # )
309
+ # model = get_peft_model(model, lora_config)
310
+ # model.eval()
311
+
312
+ # # πŸ›  Function to Read & Extract Text from PDFs
313
+ # def read_files(file):
314
+ # file_context = ""
315
+ # reader = PdfReader(file)
316
 
317
+ # for page in reader.pages:
318
+ # text = page.extract_text()
319
+ # if text:
320
+ # file_context += text + "\n"
321
 
322
+ # return file_context.strip()
323
+
324
+ # # πŸ›  Function to Format AI Prompts
325
+ # def format_prompt(system_msg, user_msg, file_context=""):
326
+ # if file_context:
327
+ # system_msg += f" The user has provided a contract document. Use its context to generate insights, but do not repeat or summarize the document itself."
328
+ # return [
329
+ # {"role": "system", "content": system_msg},
330
+ # {"role": "user", "content": user_msg}
331
+ # ]
332
+
333
+ # # πŸ›  Function to Generate AI Responses
334
+ # def generate_response(input_text, max_tokens=1000, top_p=0.9, temperature=0.7):
335
+ # model_inputs = tokenizer([input_text], return_tensors="pt").to(device)
336
 
337
+ # with torch.no_grad():
338
+ # output = model.generate(
339
+ # **model_inputs,
340
+ # max_new_tokens=max_tokens,
341
+ # do_sample=True,
342
+ # top_p=top_p,
343
+ # temperature=temperature,
344
+ # num_return_sequences=1,
345
+ # pad_token_id=tokenizer.eos_token_id
346
+ # )
347
 
348
+ # return tokenizer.decode(output[0], skip_special_tokens=True)
349
+
350
+ # # πŸ›  Function to Clean AI Output
351
+ # def post_process(text):
352
+ # cleaned = re.sub(r'ζˆ₯+', '', text) # Remove unwanted symbols
353
+ # lines = cleaned.splitlines()
354
+ # unique_lines = list(dict.fromkeys([line.strip() for line in lines if line.strip()]))
355
+ # return "\n".join(unique_lines)
356
+
357
+ # # πŸ›  Function to Handle RAG with IBM Granite & Streamlit
358
+ # def granite_simple(prompt, file):
359
+ # file_context = read_files(file) if file else ""
360
 
361
+ # system_message = "You are IBM Granite, a legal AI assistant specializing in contract analysis."
362
 
363
+ # messages = format_prompt(system_message, prompt, file_context)
364
+ # input_text = tokenizer.apply_chat_template(messages, tokenize=False, add_generation_prompt=True)
365
 
366
+ # response = generate_response(input_text)
367
+ # return post_process(response)
368
+
369
+ # # πŸ”Ή Streamlit UI
370
+ # def main():
371
+ # st.set_page_config(page_title="Contract Analysis AI", page_icon="πŸ“œ")
372
+
373
+ # st.title("πŸ“œ AI-Powered Contract Analysis Tool")
374
+ # st.write("Upload a contract document (PDF) for a detailed AI-driven legal and technical analysis.")
375
+
376
+ # # πŸ”Ή Sidebar Settings
377
+ # with st.sidebar:
378
+ # st.header("βš™οΈ Settings")
379
+ # max_tokens = st.slider("Max Tokens", 50, 1000, 250, 50)
380
+ # top_p = st.slider("Top P (sampling)", 0.1, 1.0, 0.9, 0.1)
381
+ # temperature = st.slider("Temperature (creativity)", 0.1, 1.0, 0.7, 0.1)
382
+
383
+ # # πŸ”Ή File Upload Section
384
+ # uploaded_file = st.file_uploader("πŸ“‚ Upload a contract document (PDF)", type="pdf")
385
+
386
+ # # βœ… Ensure file upload message is displayed
387
+ # if uploaded_file is not None:
388
+ # st.session_state["uploaded_file"] = uploaded_file # Persist file in session state
389
+ # st.success("βœ… File uploaded successfully!")
390
+ # st.write("Click the button below to analyze the contract.")
391
+
392
+ # # Force button to always render
393
+ # st.markdown('<style>div.stButton > button {display: block; width: 100%;}</style>', unsafe_allow_html=True)
394
+
395
+ # if st.button("πŸ” Analyze Document"):
396
+ # with st.spinner("Analyzing contract document... ⏳"):
397
+ # final_answer = granite_simple(
398
+ # "Perform a detailed technical analysis of the attached contract document, highlighting potential risks, legal pitfalls, compliance issues, and areas where contractual terms may lead to future disputes or operational challenges.",
399
+ # uploaded_file
400
+ # )
401
+
402
+ # # πŸ”Ή Display Analysis Result
403
+ # st.subheader("πŸ“‘ Analysis Result")
404
+ # st.write(final_answer)
405
+
406
+ # # πŸ”₯ Run Streamlit App
407
+ # if __name__ == '__main__':
408
+ # main()
409
 
410
 
411