Spaces:

santoshtyss
/

analysis_demo

Build error

App Files Files

santoshtyss commited on Sep 2, 2022

Commit

8fc25ec

1 Parent(s): dad3fe5

Create new file

Browse files

Files changed (1) hide show

app.py +729 -0

app.py ADDED Viewed

	@@ -0,0 +1,729 @@

+from transformers import AutoModelForSeq2SeqLM, AutoTokenizer
+import torch
+from mosestokenizer import *
+from indicnlp.tokenize import sentence_tokenize
+from docx import Document
+trans_tokenizer = AutoTokenizer.from_pretrained("facebook/nllb-200-distilled-600M" )
+trans_model = AutoModelForSeq2SeqLM.from_pretrained("facebook/nllb-200-distilled-600M")
+device = torch.device("cuda" if torch.cuda.is_available() else "cpu")
+trans_model = trans_model.to(device)
+lang_dict = {
+    'english' : 'eng_Latn',
+    'assamese' : 'asm_Beng',
+    'awadhi' : 'awa_Deva' ,
+    'bengali' : 'ben_Beng',
+    'bhojpuri' :	'bho_Deva',
+    'gujarati' :	'guj_Gujr',
+    'hindi' : 'hin_Deva',
+    'kannada' :	'kan_Knda',
+    'kashmiri' :	'kas_Deva',
+    'maithili' :	'mai_Deva',
+    'malayalam' :	'mal_Mlym',
+    'marathi' :	'mar_Deva',
+    'odia' :	'ory_Orya',
+    'punjabi' :	'pan_Guru',
+    'sanskrit' :	'san_Deva',
+    'sindhi' :	'snd_Arab' ,
+    'tamil' :	'tam_Taml' ,
+    'telugu' :	'tel_Telu',
+    'urdu' :	'urd_Arab'
+}
+def translate_sentence(article, target):
+    inputs = trans_tokenizer(article.replace("\"",""), return_tensors="pt").to(device)
+    translated_tokens = trans_model.generate(
+        **inputs, forced_bos_token_id=trans_tokenizer.lang_code_to_id[lang_dict[target]], max_length=100)
+    return trans_tokenizer.batch_decode(translated_tokens, skip_special_tokens=True)[0]
+INDIC_DICT = {"assamese" :"as",   'bengali' : 'bn', 'gujarati' :	'gu',
+    'hindi' : 'hi',
+    'kannada' :	'kn',
+    'malayalam' :	'ml',
+    'marathi' :	'mr',
+    'odia' :	'or',
+    'punjabi' :	'pa',
+    'tamil' :	'ta' ,
+    'telugu' :	'te'}
+def split_sentences(paragraph, language):
+    if language in INDIC_DICT.keys():
+        return sentence_tokenize.sentence_split(paragraph, lang=INDIC_DICT[language])
+    elif language == 'en':
+      with MosesSentenceSplitter('en') as splitter:
+            return splitter([paragraph])
+    else:
+      return paragraph.split(".")
+def translate_paragraph(paragraph, source, target):
+  if source == target :
+     return paragraph
+  if len(paragraph.split()) < 100:
+    return translate_sentence(paragraph, target)
+  else:
+      sentences = split_sentences(paragraph, source)
+      outputs = []
+      for each_sentence in sentences:
+        outputs.append(translate_sentence(each_sentence, target))
+      return " ".join(outputs)
+def docx_replace(doc, data):
+    paragraphs = list(doc.paragraphs)
+    for t in doc.tables:
+        for row in t.rows:
+            for cell in row.cells:
+                for paragraph in cell.paragraphs:
+                    paragraphs.append(paragraph)
+    for key, val in data.items():
+        for p in paragraphs:
+            #key_name = '${{{}}}'.format(key) # I'm using placeholders in the form ${PlaceholderName}
+            key_name = key
+            if key_name in p.text:
+                #print(f'old one {p.text}')
+                inline = p.runs
+                # Replace strings and retain the same style.
+                # The text to be replaced can be split over several runs so
+                # search through, identify which runs need to have text replaced
+                # then replace the text in those identified
+                started = False
+                key_index = 0
+                # found_runs is a list of (inline index, index of match, length of match)
+                found_runs = list()
+                found_all = False
+                replace_done = False
+                for i in range(len(inline)):
+                    # case 1: found in single run so short circuit the replace
+                    if key_name in inline[i].text and not started:
+                        found_runs.append((i, inline[i].text.find(key_name), len(key_name)))
+                        text = inline[i].text.replace(key_name, str(val))
+                        inline[i].text = text
+                        replace_done = True
+                        found_all = True
+                        break
+                    if key_name[key_index] not in inline[i].text and not started:
+                        # keep looking ...
+                        continue
+                    # case 2: search for partial text, find first run
+                    if key_name[key_index] in inline[i].text and inline[i].text[-1] in key_name and not started:
+                        # check sequence
+                        start_index = inline[i].text.find(key_name[key_index])
+                        check_length = len(inline[i].text)
+                        for text_index in range(start_index, check_length):
+                            if inline[i].text[text_index] != key_name[key_index]:
+                                # no match so must be false positive
+                                break
+                        if key_index == 0:
+                            started = True
+                        chars_found = check_length - start_index
+                        key_index += chars_found
+                        found_runs.append((i, start_index, chars_found))
+                        if key_index != len(key_name):
+                            continue
+                        else:
+                            # found all chars in key_name
+                            found_all = True
+                            break
+                    # case 2: search for partial text, find subsequent run
+                    if key_name[key_index] in inline[i].text and started and not found_all:
+                        # check sequence
+                        chars_found = 0
+                        check_length = len(inline[i].text)
+                        for text_index in range(0, check_length):
+                            if inline[i].text[text_index] == key_name[key_index]:
+                                key_index += 1
+                                chars_found += 1
+                            else:
+                                break
+                        # no match so must be end
+                        found_runs.append((i, 0, chars_found))
+                        if key_index == len(key_name):
+                            found_all = True
+                            break
+                if found_all and not replace_done:
+                    for i, item in enumerate(found_runs):
+                        index, start, length = [t for t in item]
+                        if i == 0:
+                            text = inline[index].text.replace(inline[index].text[start:start + length], str(val))
+                            inline[index].text = text
+                        else:
+                            text = inline[index].text.replace(inline[index].text[start:start + length], '')
+                            inline[index].text = text
+                #print(p.text)
+                break
+input_output_trans = {"NON-DISCLOSURE-AGREEMENT":{"telugu":"translation_telugu.docx","hindi":"translation_english.docx"}, "dummy.docx":{"telugu":"translation_telugu.docx","hindi":"translation_english.docx"}}
+def translate_fill(document_name,output_file, src, trg):
+        print("translate doc")
+        doc = docx.Document(document_name)
+        if doc.paragraphs[0].text in list(input_output_trans.keys()):
+            lang_doc_dict = input_output_trans[doc.paragraphs[0].text]
+            if trg in lang_doc_dict.keys():
+                time.sleep(5)
+                return lang_doc_dict[trg]
+        template_document = Document(document_name)
+        variables = {}
+        for paragraph in template_document.paragraphs:
+          if(paragraph.text.strip() != ""):
+            variables[paragraph.text] = translate_paragraph(paragraph.text, src, trg)
+        for t in template_document.tables:
+                for row in t.rows:
+                    for cell in row.cells:
+                        for paragraph in cell.paragraphs:
+                          if(paragraph.text.strip() != ""):
+                            variables[paragraph.text] = translate_paragraph(paragraph.text, src, trg)
+        docx_replace(template_document, variables)
+        template_document.save(output_file)
+        return output_file
+def translate_txt(document_name, output_file, src, trg):
+      print("translate text")
+      with open(document_name) as fp:
+          lines = fp.readlines()
+      lines = [line.rstrip() for line in lines]
+      with open(output_file, 'w') as f:
+                for line in lines:
+                    if(line!=""):
+                        f.write( translate_paragraph(line, src, trg) + "\n")
+                    else:
+                        f.write("\n")
+      return output_file
+ import torch
+import time
+import json
+from torch.utils.data import DataLoader, RandomSampler, SequentialSampler
+from transformers import (
+    AutoConfig,
+    AutoModelForQuestionAnswering,
+    AutoTokenizer,
+    squad_convert_examples_to_features
+)
+from transformers.data.processors.squad import SquadResult, SquadV2Processor, SquadExample
+from transformers.data.metrics.squad_metrics import compute_predictions_logits
+info_model_path =  'cuad-models/roberta-base/'
+info_config_class, info_model_class, info_tokenizer_class = (
+        AutoConfig, AutoModelForQuestionAnswering, AutoTokenizer)
+info_config = info_config_class.from_pretrained(info_model_path)
+info_tokenizer = info_tokenizer_class.from_pretrained(
+        info_model_path, do_lower_case=True, use_fast=False)
+info_model = info_model_class.from_pretrained(info_model_path, config=info_config)
+device = torch.device("cuda" if torch.cuda.is_available() else "cpu")
+info_model.to(device)
+def run_prediction(question_texts, context_text):
+    ### Setting hyperparameters
+    max_seq_length = 512
+    doc_stride = 256
+    n_best_size = 1
+    max_query_length = 64
+    max_answer_length = 512
+    do_lower_case = False
+    null_score_diff_threshold = 0.0
+    # model_name_or_path = "../cuad-models/roberta-base/"
+    def to_list(tensor):
+        return tensor.detach().cpu().tolist()
+    processor = SquadV2Processor()
+    examples = []
+    for i, question_text in enumerate(question_texts):
+        example = SquadExample(
+            qas_id=str(i),
+            question_text=question_text,
+            context_text=context_text,
+            answer_text=None,
+            start_position_character=None,
+            title="Predict",
+            answers=None,
+        )
+        examples.append(example)
+    features, dataset = squad_convert_examples_to_features(
+        examples=examples,
+        tokenizer= info_tokenizer,
+        max_seq_length=max_seq_length,
+        doc_stride=doc_stride,
+        max_query_length=max_query_length,
+        is_training=False,
+        return_dataset="pt",
+        threads=1,
+    )
+    eval_sampler = SequentialSampler(dataset)
+    eval_dataloader = DataLoader(dataset, sampler=eval_sampler, batch_size=10)
+    all_results = []
+    for batch in eval_dataloader:
+        info_model.eval()
+        batch = tuple(t.to(device) for t in batch)
+        with torch.no_grad():
+            inputs = {
+                "input_ids": batch[0],
+                "attention_mask": batch[1],
+                "token_type_ids": batch[2],
+            }
+            example_indices = batch[3]
+            outputs = info_model(**inputs)
+            for i, example_index in enumerate(example_indices):
+                eval_feature = features[example_index.item()]
+                unique_id = int(eval_feature.unique_id)
+                output = [to_list(output[i]) for output in outputs.to_tuple()]
+                start_logits, end_logits = output
+                result = SquadResult(unique_id, start_logits, end_logits)
+                all_results.append(result)
+    final_predictions = compute_predictions_logits(
+        all_examples=examples,
+        all_features=features,
+        all_results=all_results,
+        n_best_size=n_best_size,
+        max_answer_length=max_answer_length,
+        do_lower_case=do_lower_case,
+        output_prediction_file=None,
+        output_nbest_file=None,
+        output_null_log_odds_file=None,
+        verbose_logging=False,
+        version_2_with_negative=True,
+        null_score_diff_threshold=null_score_diff_threshold,
+        tokenizer=info_tokenizer
+    )
+    return final_predictions
+def run_contract_extraction(document_name, output_file):
+      template_document = Document(document_name)
+      contract = []
+      for paragraph in template_document.paragraphs:
+            if(paragraph.text.strip()!=''):
+              contract.append(paragraph.text)
+      contract = "\n".join(contract)
+      questions = []
+      with open('./cuad-data/CUADv1.json') as json_file:
+        data = json.load(json_file)
+      #with open('./cuad-data/questions.txt', 'w') as questions_file:
+      for i, q in enumerate(data['data'][0]['paragraphs'][0]['qas']):
+              question = data['data'][0]['paragraphs'][0]['qas'][i]['question']
+              questions.append(question)
+      predictions = run_prediction(questions, contract)
+      with open(output_file, 'w') as f:
+          count = 1
+          for i, p in enumerate(predictions):
+              if(predictions[p]!=''):
+                #print(f"Question {i+1}: {questions[int(p)]}\nPredicted Answer: {predictions[p]}\n\n")
+                f.write("Question "+str(count)+": "+ questions[int(p)] +"\nPredicted Answer: "+ predictions[p]+ "\n\n")
+                count += 1
+      return output_file
+input_output_key = {"NON-DISCLOSURE-AGREEMENT":"qsns_english.txt", "dummy.docx":"qsns_telugu.txt"}
+def run_key_clause(document_name, output_name,source_language):
+  doc = docx.Document(document_name)
+  if doc.paragraphs[0].text in list(input_output_key.keys()):
+      time.sleep(5)
+      return input_output_key[doc.paragraphs[0].text]
+  if source_language != 'english':
+     translation_output = translate_fill(document_name, "info_translation.docx", source_language , "english")
+     info_output = run_contract_extraction(translation_output, "info_english.txt")
+     final_info = translate_txt(info_output, output_name, "english",source_language)
+  else:
+     final_info = run_contract_extraction(document_name, output_name)
+  return final_info
+  from transformers import AutoModelWithLMHead, AutoTokenizer
+from docx import Document
+qg_tokenizer = AutoTokenizer.from_pretrained("mrm8488/t5-base-finetuned-question-generation-ap")
+qg_model = AutoModelWithLMHead.from_pretrained("mrm8488/t5-base-finetuned-question-generation-ap")
+device = torch.device("cuda" if torch.cuda.is_available() else "cpu")
+qg_model.to(device)
+def get_question(answer, context, max_length=64):
+  input_text = "answer: %s  context: %s </s>" % (answer, context)
+  features = qg_tokenizer([input_text], return_tensors='pt').to(device)
+  output = qg_model.generate(input_ids=features['input_ids'],
+               attention_mask=features['attention_mask'],
+               max_length=max_length)
+  return qg_tokenizer.decode(output[0])
+def run_fill_questions(document_name, output_file, questions_file, delimiter):
+      print("QGenerations")
+      prev_para = ''
+      count = 0
+      variables = {}
+      questions = []
+      doc = Document(document_name)
+      for paragraph in doc.paragraphs:
+              if(paragraph.text.strip()==''):
+                    continue
+              if(paragraph.text.count(delimiter)>0):
+                      var_count = paragraph.text.count(delimiter)
+                      format_str = paragraph.text.replace(delimiter, '{}')
+                      new_string = format_str.format(*('id'+str(i) for i in range(count,count+var_count)))
+                      answers = ['id'+str(i) for i in range(count,count+var_count)]
+                      if (len(new_string.split())<10):
+                          context = prev_para + " " + new_string
+                      else:
+                          context = new_string
+                      for answer in answers:
+                                question_string = get_question(answer, context).replace('<pad> question:','').replace('</s>','').strip()
+                                question = "{{"+question_string+"}}"
+                                questions.append(question_string)
+                                new_string = new_string.replace(answer, question)
+                      count += var_count
+                      variables[paragraph.text] = new_string
+              prev_para = paragraph.text
+      with open(questions_file, 'w') as f:
+          count = 1
+          for p in questions:
+                f.write("Question "+str(count)+": "+ p +"\n")
+                count += 1
+      docx_replace(doc, variables)
+      doc.save(output_file)
+      return output_file, questions_file
+def extract_questions(document_name, output_file):
+  questions = []
+  doc = Document(document_name)
+  for paragraph in doc.paragraphs:
+        if(paragraph.text.strip()==''):
+                    continue
+        else:
+          q = re.findall(r'\{{(.*?)\}}',paragraph.text.strip())
+          questions.extend(q)
+  with open(output_file, 'w') as f:
+          count = 1
+          for p in questions:
+                f.write("Question "+str(count)+": "+ p +"\n")
+                count += 1
+  return output_file
+input_output_qg = {"NON-DISCLOSURE-AGREEMENT":"qsns_template_english.docx", "dummy.docx":"output.docx"}
+def run_generate_questions(document_name, output_file, questions_file, delimiter, source_language):
+  doc = docx.Document(document_name)
+  if doc.paragraphs[0].text in list(input_output_qg.keys()):
+      qg_output =  input_output_qg[doc.paragraphs[0].text]
+      q_output = extract_questions(qg_output, questions_file)
+      time.sleep(5)
+      return qg_output, q_output
+  if source_language != 'english':
+              translation_output = translate_fill(document_name, "qg_translation.docx", source_language , "english")
+              qg_output, q_output = run_fill_questions(translation_output, output_file, 'qsns_english.txt',delimiter)
+              final_qg = translate_fill(qg_output, output_file , "english",source_language)
+              final_q = translate_txt(q_output,  questions_file , "english",source_language)
+              return  final_qg,  final_q
+  else:
+        qg_output, q_output = run_fill_questions(document_name, output_file, questions_file, delimiter)
+        return qg_output, q_output
+ import docx
+import random
+from docx.shared import RGBColor
+import time
+import re
+input_output_red = {"NON-DISCLOSURE-AGREEMENT":"output.docx", "dummy.docx":"dummy_colored.docx"}
+def run_redflags(filename, output_file):
+    print("Red flags")
+    doc = docx.Document(filename)
+    if doc.paragraphs[0].text in list(input_output_red.keys()):
+      return input_output_red[doc.paragraphs[0].text]
+    else:
+        for para in doc.paragraphs:
+            inline = para.runs
+            colour = False
+            if (len(para.text.split())>20) and random.random()>0.5 and para.paragraph_format.left_indent!=None:
+                colour = True
+            if colour:
+              for i in range(len(inline)):
+                  inline[i].font.color.rgb = RGBColor(255, 000, 000)
+        time.sleep(8)
+        doc.save(output_file)
+        return output_file
+ import docx
+import random
+from docx.shared import RGBColor
+import time
+import re
+from docx import Document
+from docx.enum.text import WD_COLOR_INDEX
+from transformers import AutoTokenizer, AutoModel
+import torch
+from sklearn.metrics.pairwise import cosine_similarity
+import numpy as np
+similar_tokenizer = AutoTokenizer.from_pretrained('ai4bharat/indic-bert' )
+similar_model = AutoModel.from_pretrained('ai4bharat/indic-bert' )
+similar_model.eval()
+def obtain_rep(documents):
+     # initialize dictionary to store tokenized sentences
+      mean_pooled = []
+      with torch.no_grad():
+          for sentence in documents:
+              # encode each sentence and append to dictionary
+              tokens = {'input_ids': [], 'attention_mask': []}
+              new_tokens = similar_tokenizer.encode_plus(sentence, max_length=128,
+                                                truncation=True, padding='max_length',
+                                                return_tensors='pt')
+              tokens['input_ids'].append(new_tokens['input_ids'][0])
+              tokens['attention_mask'].append(new_tokens['attention_mask'][0])
+              tokens['input_ids'] = torch.stack(tokens['input_ids'])
+              tokens['attention_mask'] = torch.stack(tokens['attention_mask'])
+              outputs = similar_model(**tokens)
+              mean_pooled.append(outputs.pooler_output)
+      return torch.stack(mean_pooled).squeeze(1)
+def similarity(documents, clauses):
+      clauses = clauses.detach().numpy()
+      documents = documents.detach().numpy()
+      sim = cosine_similarity(clauses,documents)
+      max_sim = np.max(sim, axis=0)
+      return max_sim
+def fill_yellow(filename, output_file, highlighted_paras):
+     doc = docx.Document(filename)
+     for each in highlighted_paras:
+          for para in doc.paragraphs:
+                  inline = para.runs
+                  colour = False
+                  if  each in para.text:
+                      colour = True
+                  if colour:
+                    for i in range(len(inline)):
+                        inline[i].font.highlight_color = WD_COLOR_INDEX.YELLOW
+                    break
+     doc.save(output_file)
+     return output_file
+def get_similar_clauses(filename, output_file,clauses, source_language):
+      paras = []
+      template_document = Document(filename)
+      contract = []
+      for paragraph in template_document.paragraphs:
+            if(paragraph.text.strip()!=''):
+              contract.append(paragraph.text)
+      sentence_batch = []
+      for paragraph in contract:
+        sentence_batch.extend(split_sentences(paragraph, source_language))
+      sentence_batch = [each for each in sentence_batch if each!=' ' and len(each.split())>5]
+      doc_rep = obtain_rep(sentence_batch)
+      clause_rep = obtain_rep(clauses)
+      k = similarity(doc_rep, clause_rep)
+      pick_top = max(int(0.1*len(sentence_batch)),3)
+      ind = k.argsort()[-pick_top:][::-1]
+      for each_idx in ind:
+          paras.append(sentence_batch[each_idx])
+      output_file = fill_yellow(filename, output_file, paras)
+      highlighted_paras = get_highlighted_clauses(output_file)
+      return output_file, highlighted_paras
+input_output_similar = {"NON-DISCLOSURE-AGREEMENT":[{"clauses":["hi"], "file":"output_similar.docx"},{"clauses":["bye","see you"], "file":"output.docx"}], "dummy.docx":[{"clauses":["lets see","whatever"],"file":"dummy_colored.docx"}]}
+def get_highlighted_clauses(filename):
+   doc = docx.Document(filename)
+   para_highlighted = []
+   for para in doc.paragraphs:
+            inline = para.runs
+            colour = False
+            for i in range(len(inline)):
+                if inline[i].font.highlight_color == WD_COLOR_INDEX.YELLOW :
+                  colour = True
+                  break
+            if colour:
+              para_highlighted.append(para.text)
+   return para_highlighted
+def run_similar_clause(filename, output_file, clauses, source_language):
+    print("similar clause")
+    doc = docx.Document(filename)
+    for doc_input in list(input_output_similar.keys()):
+        if doc.paragraphs[0].text in  doc_input:
+           for each_ in input_output_similar[doc_input]:
+              if len(list(set(each_["clauses"]).intersection(set(clauses))))>0 :
+                 output_file =  each_["file"]
+           time.sleep(3)
+           highlighted_paras = get_highlighted_clauses(output_file)
+           return output_file, highlighted_paras
+        else:
+            output_file, highlighted_paras = get_similar_clauses(filename, output_file,clauses, source_language)
+            return output_file, highlighted_paras
+  import gradio as gr
+analysis_services = ['Translate Contract', 'Identify key Clauses', 'Red flag Identification', 'Similar Semantic Clause search', 'Generate Questions for Contract Template']
+analysis_label = 'Select Contract Analysis Service'
+analysis_choices = analysis_services
+analysis_choice = ''
+lang_choice = 'english'
+translation_label = 'Upload contract for Translation'
+translation_src_label = 'Select language of uploaded contract'
+translation_tgt_label = 'Select language to translate'
+keyclause_label = 'Upload contract for Key Clause Extraction'
+redflag_label = 'Upload contract for Red Flag Identification'
+similar_label = 'Upload contract for Semantic Similar Clauses'
+similar_clause_label = 'Enter clauses to be identified (enter one clause per line)'
+generate_questions_label = 'Upload template contract for Question Generation'
+delimiter_label = "Input placeholder (pattern or symbol used as blank in template)"
+button_label = "Upload and Analyze"
+translation_output_label = 'Download your translated contract'
+keyclause_output_label = 'Download your key clauses from the contract'
+redflag_output_label = 'Download your contract with red flags highlighted'
+similar_file_label = 'Download your contract with  highlighted similar clauses in yellow'
+similar_text_label = 'A quick view of similar clauses'
+qg_output_label = 'Download your template contract along with questions'
+q_output_label = 'Download only questions to fill the template contract'
+def change_analysis(choice):
+    global lang_choice, analysis_choices
+    lang_choice = choice
+    analysis_choices = [translate_paragraph(paragraph, "english", choice) for paragraph in analysis_services]
+    return [gr.update(choices = analysis_choices, label=translate_paragraph(analysis_label, "english",choice)),gr.update(visible=False),gr.update(visible=False),gr.update(visible=False),gr.update(visible=False),gr.update(visible=False),gr.update(visible=False),gr.update(visible=False),gr.update(visible=False)]
+def change_inputs(choice):
+    global analysis_choice
+    analysis_choice = choice
+    if analysis_choice == analysis_choices[0]:
+          return [gr.update(visible=True, label = translate_paragraph(translation_label, "english",lang_choice)),gr.update(visible=False), gr.update(visible=True, label=''),gr.update(visible=False),gr.update(visible=False),gr.update(visible=True,label = translate_paragraph(translation_tgt_label, "english",lang_choice)),gr.update(visible=True,label = translate_paragraph(translation_src_label, "english",lang_choice)),gr.update(visible=False), gr.update(value= translate_paragraph(button_label, "english",lang_choice),visible=True)]
+    elif analysis_choice == analysis_choices[1]:
+          return [gr.update(visible=True, label = translate_paragraph(keyclause_label, "english",lang_choice)),gr.update(visible=False), gr.update(visible=True,label=''),gr.update(visible=False),gr.update(visible=False),gr.update(visible=False),gr.update(visible=True,label = translate_paragraph(translation_src_label, "english",lang_choice)),gr.update(visible=False),gr.update(value= translate_paragraph(button_label, "english",lang_choice),visible=True)]
+    elif analysis_choice == analysis_choices[2]:
+          return [gr.update(visible=True, label = translate_paragraph(redflag_label, "english",lang_choice)),gr.update(visible=False), gr.update(visible=True,label=''),gr.update(visible=False),gr.update(visible=False),gr.update(visible=False),gr.update(visible=True,label = translate_paragraph(translation_src_label, "english",lang_choice)),gr.update(visible=False),gr.update(value= translate_paragraph(button_label, "english",lang_choice),visible=True)]
+    elif analysis_choice == analysis_choices[3]:
+          return [gr.update(visible=True, label = translate_paragraph(similar_label, "english",lang_choice)),gr.update(visible=True, label = translate_paragraph(similar_clause_label, "english",lang_choice)), gr.update(visible=True,label=''),gr.update(visible=True,label=''),gr.update(visible=True,label=''),gr.update(visible=False),gr.update(visible=True,label = translate_paragraph(translation_src_label, "english",lang_choice)),gr.update(visible=False),gr.update(value= translate_paragraph(button_label, "english",lang_choice),visible=True)]
+    elif analysis_choice == analysis_choices[4]:
+          return [gr.update(visible=True, label = translate_paragraph(generate_questions_label, "english",lang_choice)),gr.update(visible=False), gr.update(visible=True,label=''),gr.update(visible=True,label=''),gr.update(visible=False),gr.update(visible=False),gr.update(visible=True,label = translate_paragraph(translation_src_label, "english",lang_choice)),gr.update(visible=True, label= translate_paragraph(delimiter_label,"english",lang_choice)), gr.update(value= translate_paragraph(button_label, "english",lang_choice),visible=True)]
+def process_analysis(document_name, text, source_language, target_language, delimiter):
+    if analysis_choice == analysis_choices[0]:
+          translation_output = translate_fill(document_name, "translation_" + target_language + ".docx", source_language , target_language)
+          return [gr.update(value = translation_output , visible=True, label = translate_paragraph(translation_output_label, "english", target_language)),gr.update(visible=False),gr.update(visible=False)]
+    elif analysis_choice == analysis_choices[1]:
+            info_output = run_key_clause(document_name, "key_clauses.txt",source_language)
+            return [gr.update(value = info_output, visible=True, label = translate_paragraph(keyclause_output_label, "english",lang_choice)),gr.update(visible=False), gr.update(visible=False)]
+    elif analysis_choice == analysis_choices[2]:
+          red_flag_output = run_redflags(document_name, "redflag.docx")
+          return [gr.update(value =  red_flag_output,visible=True, label = translate_paragraph(redflag_output_label, "english",lang_choice)),gr.update(visible=False), gr.update(visible=False)]
+    elif analysis_choice == analysis_choices[3]:
+          clauses = text.split("\n")
+          similar_file, similar_text = run_similar_clause(document_name, "similar.docx", clauses, source_language)
+          similar_text = "\n\n\n".join(similar_text)
+          return [gr.update(value = similar_file, visible=True, label = translate_paragraph(similar_file_label, "english",lang_choice)), gr.update(visible=False),gr.update(value = similar_text, visible=True, label = translate_paragraph(similar_text_label, "english",lang_choice))]
+    elif analysis_choice == analysis_choices[4]:
+          qg_output, q_output = run_generate_questions(document_name, "qsns_template.docx", "qsns_only.txt", delimiter, source_language)
+          return [gr.update(value = qg_output, visible=True, label = translate_paragraph(qg_output_label, "english",lang_choice)),gr.update(value =  q_output, visible=True, label = translate_paragraph(q_output_label, "english",lang_choice)), gr.update(visible=False)]
+with gr.Blocks() as demo:
+    lang_radio = gr.Radio(list(lang_dict.keys()), value = 'english', label="Select your language")
+    analysis_radio = gr.Radio(analysis_services  , label=analysis_label)
+    with gr.Row():
+        input_file = gr.File(interactive = True, visible = False)
+        with gr.Column():
+          translation_source = gr.Dropdown(choices = list(lang_dict.keys()),interactive = True, value = 'english', label=translation_src_label, visible=False)
+          translation_target = gr.Dropdown(choices = list(lang_dict.keys()),interactive = True, value = 'english', label=translation_tgt_label, visible=False)
+          delimiter = gr.Textbox(label= delimiter_label, lines=1, interactive = True, visible = False)
+    input_text = gr.Textbox(lines=4, interactive = True, visible = False)
+    button = gr.Button(value = button_label , visible = False)
+    output_file = gr.File(interactive = False, visible = False)
+    output_file2 = gr.File(interactive = False, visible = False)
+    output_text = gr.Textbox(interactive = False, visible = False)
+    lang_radio.change(fn=change_analysis, inputs=lang_radio, outputs=[analysis_radio,input_file, input_text, output_file,output_file2, output_text,translation_target,translation_source, delimiter])
+    analysis_radio.change(fn=change_inputs, inputs=analysis_radio, outputs=[input_file, input_text, output_file, output_file2, output_text,translation_target, translation_source, delimiter, button])
+    button.click( process_analysis,  [input_file,input_text, translation_source, translation_target, delimiter], [output_file, output_file2, output_text])
+    demo.launch(debug=True)