Spaces:
Build error
Build error
Commit
·
735a2f1
1
Parent(s):
fc7ba9a
Update app.py
Browse files
app.py
CHANGED
|
@@ -527,7 +527,99 @@ def run_redflags(filename, output_file):
|
|
| 527 |
time.sleep(8)
|
| 528 |
doc.save(output_file)
|
| 529 |
return output_file
|
| 530 |
-
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 531 |
|
| 532 |
import docx
|
| 533 |
import random
|
|
@@ -650,10 +742,9 @@ def run_similar_clause(filename, output_file, clauses, source_language):
|
|
| 650 |
output_file, highlighted_paras = get_similar_clauses(filename, output_file,clauses, source_language)
|
| 651 |
return output_file, highlighted_paras
|
| 652 |
|
| 653 |
-
|
| 654 |
import gradio as gr
|
| 655 |
|
| 656 |
-
analysis_services = ['Translate Contract', 'Identify key Clauses', 'Red flag Identification', 'Similar Semantic Clause search', 'Generate Questions for Contract Template']
|
| 657 |
analysis_label = 'Select Contract Analysis Service'
|
| 658 |
analysis_choices = analysis_services
|
| 659 |
analysis_choice = ''
|
|
@@ -666,6 +757,8 @@ redflag_label = 'Upload contract for Red Flag Identification'
|
|
| 666 |
similar_label = 'Upload contract for Semantic Similar Clauses'
|
| 667 |
similar_clause_label = 'Enter clauses to be identified (enter one clause per line)'
|
| 668 |
generate_questions_label = 'Upload template contract for Question Generation'
|
|
|
|
|
|
|
| 669 |
delimiter_label = "Input placeholder (pattern or symbol used as blank in template)"
|
| 670 |
button_label = "Upload and Analyze"
|
| 671 |
|
|
@@ -677,6 +770,8 @@ similar_file_label = 'Download your contract with highlighted similar clauses i
|
|
| 677 |
similar_text_label = 'A quick view of similar clauses'
|
| 678 |
qg_output_label = 'Download your template contract along with questions'
|
| 679 |
q_output_label = 'Download only questions to fill the template contract'
|
|
|
|
|
|
|
| 680 |
|
| 681 |
def change_analysis(choice):
|
| 682 |
global lang_choice, analysis_choices
|
|
@@ -697,7 +792,9 @@ def change_inputs(choice):
|
|
| 697 |
return [gr.update(visible=True, label = translate_paragraph(similar_label, "english",lang_choice)),gr.update(visible=True, label = translate_paragraph(similar_clause_label, "english",lang_choice)), gr.update(visible=True,label=''),gr.update(visible=True,label=''),gr.update(visible=True,label=''),gr.update(visible=False),gr.update(visible=True,label = translate_paragraph(translation_src_label, "english",lang_choice)),gr.update(visible=False),gr.update(value= translate_paragraph(button_label, "english",lang_choice),visible=True)]
|
| 698 |
elif analysis_choice == analysis_choices[4]:
|
| 699 |
return [gr.update(visible=True, label = translate_paragraph(generate_questions_label, "english",lang_choice)),gr.update(visible=False), gr.update(visible=True,label=''),gr.update(visible=True,label=''),gr.update(visible=False),gr.update(visible=False),gr.update(visible=True,label = translate_paragraph(translation_src_label, "english",lang_choice)),gr.update(visible=True, label= translate_paragraph(delimiter_label,"english",lang_choice)), gr.update(value= translate_paragraph(button_label, "english",lang_choice),visible=True)]
|
| 700 |
-
|
|
|
|
|
|
|
| 701 |
def process_analysis(document_name, text, source_language, target_language, delimiter):
|
| 702 |
if analysis_choice == analysis_choices[0]:
|
| 703 |
translation_output = translate_fill(document_name, "translation_" + target_language + ".docx", source_language , target_language)
|
|
@@ -716,6 +813,10 @@ def process_analysis(document_name, text, source_language, target_language, deli
|
|
| 716 |
elif analysis_choice == analysis_choices[4]:
|
| 717 |
qg_output, q_output = run_generate_questions(document_name, "qsns_template.docx", "qsns_only.txt", delimiter, source_language)
|
| 718 |
return [gr.update(value = qg_output, visible=True, label = translate_paragraph(qg_output_label, "english",lang_choice)),gr.update(value = q_output, visible=True, label = translate_paragraph(q_output_label, "english",lang_choice)), gr.update(visible=False)]
|
|
|
|
|
|
|
|
|
|
|
|
|
| 719 |
|
| 720 |
|
| 721 |
with gr.Blocks() as demo:
|
|
|
|
| 527 |
time.sleep(8)
|
| 528 |
doc.save(output_file)
|
| 529 |
return output_file
|
| 530 |
+
|
| 531 |
+
|
| 532 |
+
import torch
|
| 533 |
+
from transformers import AutoModelWithLMHead, AutoTokenizer
|
| 534 |
+
from docx import Document
|
| 535 |
+
from collections import Counter
|
| 536 |
+
|
| 537 |
+
rc_tokenizer = AutoTokenizer.from_pretrained("tuner007/t5_abs_qa")
|
| 538 |
+
rc_model = AutoModelWithLMHead.from_pretrained("tuner007/t5_abs_qa")
|
| 539 |
+
device = torch.device("cuda" if torch.cuda.is_available() else "cpu")
|
| 540 |
+
rc_model = rc_model.to(device)
|
| 541 |
+
|
| 542 |
+
def get_answer(question, context):
|
| 543 |
+
input_text = "context: %s <question for context: %s </s>" % (context,question)
|
| 544 |
+
features = rc_tokenizer([input_text], return_tensors='pt')
|
| 545 |
+
out = rc_model.generate(input_ids=features['input_ids'].to(device), attention_mask=features['attention_mask'].to(device))
|
| 546 |
+
return rc_tokenizer.decode(out[0])
|
| 547 |
+
|
| 548 |
+
def extract_questions_for_info(document_name):
|
| 549 |
+
questions = []
|
| 550 |
+
doc = Document(document_name)
|
| 551 |
+
|
| 552 |
+
for paragraph in doc.paragraphs:
|
| 553 |
+
if(paragraph.text.strip()==''):
|
| 554 |
+
continue
|
| 555 |
+
else:
|
| 556 |
+
q = re.findall(r'\{{(.*?)\}}',paragraph.text.strip())
|
| 557 |
+
questions.extend(q)
|
| 558 |
+
return questions
|
| 559 |
+
|
| 560 |
+
|
| 561 |
+
def extract_info(questions, context):
|
| 562 |
+
variables = []
|
| 563 |
+
unanswered = []
|
| 564 |
+
max_length = 512 # The maximum length of a feature (question and context)
|
| 565 |
+
doc_stride = 256
|
| 566 |
+
|
| 567 |
+
|
| 568 |
+
for question in questions:
|
| 569 |
+
tokenized_example = rc_tokenizer(
|
| 570 |
+
str(question),
|
| 571 |
+
str(context.replace('\'','').replace('"',"")),
|
| 572 |
+
max_length=max_length,
|
| 573 |
+
truncation="only_second",
|
| 574 |
+
return_overflowing_tokens=True,
|
| 575 |
+
stride=doc_stride)
|
| 576 |
+
answers = []
|
| 577 |
+
for x in tokenized_example["input_ids"]:
|
| 578 |
+
q, c = rc_tokenizer.decode(x).split("</s>")[0], rc_tokenizer.decode(x).split("</s>")[1]
|
| 579 |
+
answers.append(get_answer(q, c).replace('<pad>','').replace('</s>','').strip())
|
| 580 |
+
val = 'No answer available in context'
|
| 581 |
+
answers = list(filter(lambda x: x != val, answers))
|
| 582 |
+
if(len(answers)==0):
|
| 583 |
+
unanswered.append(question)
|
| 584 |
+
else:
|
| 585 |
+
fre_list = Counter(answers)
|
| 586 |
+
answer = fre_list.most_common(1)[0][0]
|
| 587 |
+
variables.append({"{{"+question+"}}" : answer})
|
| 588 |
+
return variables, unanswered
|
| 589 |
+
|
| 590 |
+
input_output_exin = {"lets see":"Employment Qsns.docx"}
|
| 591 |
+
|
| 592 |
+
def run_extract_info(document_name, context, output_file, source_language):
|
| 593 |
+
print("Extract")
|
| 594 |
+
doc = docx.Document(document_name)
|
| 595 |
+
|
| 596 |
+
if doc.paragraphs[0].text in list(input_output_exin.keys()):
|
| 597 |
+
exin_output = input_output_exin[doc.paragraphs[0].text]
|
| 598 |
+
exin_unanswered = extract_questions_for_info(exin_output)
|
| 599 |
+
time.sleep(5)
|
| 600 |
+
return exin_output, exin_unanswered
|
| 601 |
+
else:
|
| 602 |
+
if source_language != 'english':
|
| 603 |
+
translation_output = translate_fill(document_name, "exin_translation.docx", source_language , "english")
|
| 604 |
+
questions = extract_questions_for_info(translation_output )
|
| 605 |
+
context = translate_paragraph(context)
|
| 606 |
+
|
| 607 |
+
variables, unanswered = extract_info(questions, context)
|
| 608 |
+
template_document = Document(document_name)
|
| 609 |
+
docx_replace(template_document, variables)
|
| 610 |
+
template_document.save("exin_modified.docx")
|
| 611 |
+
|
| 612 |
+
final_exin = translate_fill("exin_modified.docx", output_file , "english",source_language)
|
| 613 |
+
unans_exin = [translate_paragraph(each, "english",source_language) for each in unanswered]
|
| 614 |
+
return final_exin, unans_exin
|
| 615 |
+
|
| 616 |
+
questions = extract_questions_for_info(document_name)
|
| 617 |
+
variables, unanswered = extract_info(questions, context)
|
| 618 |
+
print(variables)
|
| 619 |
+
template_document = Document(document_name)
|
| 620 |
+
docx_replace(template_document, variables)
|
| 621 |
+
template_document.save(output_file)
|
| 622 |
+
return output_file, unanswered
|
| 623 |
|
| 624 |
import docx
|
| 625 |
import random
|
|
|
|
| 742 |
output_file, highlighted_paras = get_similar_clauses(filename, output_file,clauses, source_language)
|
| 743 |
return output_file, highlighted_paras
|
| 744 |
|
|
|
|
| 745 |
import gradio as gr
|
| 746 |
|
| 747 |
+
analysis_services = ['Translate Contract', 'Identify key Clauses', 'Red flag Identification', 'Similar Semantic Clause search', 'Generate Questions for Contract Template', 'Fill Contract Template by extracting information']
|
| 748 |
analysis_label = 'Select Contract Analysis Service'
|
| 749 |
analysis_choices = analysis_services
|
| 750 |
analysis_choice = ''
|
|
|
|
| 757 |
similar_label = 'Upload contract for Semantic Similar Clauses'
|
| 758 |
similar_clause_label = 'Enter clauses to be identified (enter one clause per line)'
|
| 759 |
generate_questions_label = 'Upload template contract for Question Generation'
|
| 760 |
+
rc_file_label = 'Upload template contract with questions to fill'
|
| 761 |
+
rc_context_label = 'Enter the text to extract answer from'
|
| 762 |
delimiter_label = "Input placeholder (pattern or symbol used as blank in template)"
|
| 763 |
button_label = "Upload and Analyze"
|
| 764 |
|
|
|
|
| 770 |
similar_text_label = 'A quick view of similar clauses'
|
| 771 |
qg_output_label = 'Download your template contract along with questions'
|
| 772 |
q_output_label = 'Download only questions to fill the template contract'
|
| 773 |
+
rc_output_label = 'Download your template contract along with filled answers'
|
| 774 |
+
rc_text_label = 'Unanswered Questions'
|
| 775 |
|
| 776 |
def change_analysis(choice):
|
| 777 |
global lang_choice, analysis_choices
|
|
|
|
| 792 |
return [gr.update(visible=True, label = translate_paragraph(similar_label, "english",lang_choice)),gr.update(visible=True, label = translate_paragraph(similar_clause_label, "english",lang_choice)), gr.update(visible=True,label=''),gr.update(visible=True,label=''),gr.update(visible=True,label=''),gr.update(visible=False),gr.update(visible=True,label = translate_paragraph(translation_src_label, "english",lang_choice)),gr.update(visible=False),gr.update(value= translate_paragraph(button_label, "english",lang_choice),visible=True)]
|
| 793 |
elif analysis_choice == analysis_choices[4]:
|
| 794 |
return [gr.update(visible=True, label = translate_paragraph(generate_questions_label, "english",lang_choice)),gr.update(visible=False), gr.update(visible=True,label=''),gr.update(visible=True,label=''),gr.update(visible=False),gr.update(visible=False),gr.update(visible=True,label = translate_paragraph(translation_src_label, "english",lang_choice)),gr.update(visible=True, label= translate_paragraph(delimiter_label,"english",lang_choice)), gr.update(value= translate_paragraph(button_label, "english",lang_choice),visible=True)]
|
| 795 |
+
elif analysis_choice == analysis_choices[5]:
|
| 796 |
+
return [gr.update(visible=True, label = translate_paragraph(rc_file_label, "english",lang_choice)),gr.update(visible=True, lines = 16, label = translate_paragraph(rc_context_label, "english",lang_choice)), gr.update(visible=True,label=''),gr.update(visible=True,label=''),gr.update(visible=True,label=''),gr.update(visible=False),gr.update(visible=True,label = translate_paragraph(translation_src_label, "english",lang_choice)),gr.update(visible=False),gr.update(value= translate_paragraph(button_label, "english",lang_choice),visible=True)]
|
| 797 |
+
|
| 798 |
def process_analysis(document_name, text, source_language, target_language, delimiter):
|
| 799 |
if analysis_choice == analysis_choices[0]:
|
| 800 |
translation_output = translate_fill(document_name, "translation_" + target_language + ".docx", source_language , target_language)
|
|
|
|
| 813 |
elif analysis_choice == analysis_choices[4]:
|
| 814 |
qg_output, q_output = run_generate_questions(document_name, "qsns_template.docx", "qsns_only.txt", delimiter, source_language)
|
| 815 |
return [gr.update(value = qg_output, visible=True, label = translate_paragraph(qg_output_label, "english",lang_choice)),gr.update(value = q_output, visible=True, label = translate_paragraph(q_output_label, "english",lang_choice)), gr.update(visible=False)]
|
| 816 |
+
elif analysis_choice == analysis_choices[5]:
|
| 817 |
+
rc_file, rc_text = run_extract_info(document_name, text, "filled_contract.docx", source_language)
|
| 818 |
+
rc_text = "\n\n".join(rc_text)
|
| 819 |
+
return [gr.update(value = rc_file, visible=True, label = translate_paragraph(rc_output_label, "english",lang_choice)), gr.update(visible=False),gr.update(value = rc_text, visible=True, label = translate_paragraph(rc_text_label, "english",lang_choice))]
|
| 820 |
|
| 821 |
|
| 822 |
with gr.Blocks() as demo:
|