Spaces:

acecalisto3
/

urld

Running

App Files Files Community

acecalisto3 commited on Mar 28

Commit

c00eec9

verified ·

1 Parent(s): 4340847

Update app2.py

Browse files

Files changed (1) hide show

app2.py +145 -532

app2.py CHANGED Viewed

@@ -1,19 +1,15 @@
 import gradio as gr
-#import urllib.request
 import requests
 import zipfile
 import uuid
 import bs4
 import lxml
 import os
-#import subprocess
-from huggingface_hub import InferenceClient,HfApi
 import random
 import json
 import datetime
 from pypdf import PdfReader
-import uuid
-#from query import tasks
 from agent import (
     PREFIX,
     COMPRESS_DATA_PROMPT,
@@ -21,13 +17,22 @@ from agent import (
     LOG_PROMPT,
     LOG_RESPONSE,
 )
-client = InferenceClient(
-    "mistralai/Mixtral-8x7B-Instruct-v0.1"
-)
-reponame="acecalisto3/tmp"
-save_data=f'https://huggingface.co/datasets/{reponame}/raw/main/'
-token_self = os.environ['HF_TOKEN']
-api=HfApi(token=token_self)
 def find_all(purpose, task, history, url, result, steps):
     return_list = []
@@ -56,83 +61,43 @@ def find_all(purpose, task, history, url, result, steps):
     return True, return_list
 def read_txt(txt_path):
-    text=""
-    with open(txt_path,"r") as f:
         text = f.read()
-    f.close()
-    print (text)
     return text
 def read_pdf(pdf_path):
-    text=""
-    reader = PdfReader(f'{pdf_path}')
-    number_of_pages = len(reader.pages)
-    for i in range(number_of_pages):
-        page = reader.pages[i]
         text = f'{text}\n{page.extract_text()}'
-    print (text)
     return text
-error_box=[]
 def read_pdf_online(url):
-    uid=uuid.uuid4()
     print(f"reading {url}")
     response = requests.get(url, stream=True)
-    print(response.status_code)
-    text=""
-#################
-#####################
-    try:
-        if response.status_code == 200:
-            with open("test.pdf", "wb") as f:
-                f.write(response.content)
-            #f.close()
-            #out = Path("./data.pdf")
-            #print (out)
-            reader = PdfReader("test.pdf")
-            number_of_pages = len(reader.pages)
-            print(number_of_pages)
-            for i in range(number_of_pages):
-                page = reader.pages[i]
-                text = f'{text}\n{page.extract_text()}'
-                print(f"PDF_TEXT:: {text}")
-            return text
-        else:
-            text = response.status_code
-            error_box.append(url)
-            print(text)
-            return text
-    except Exception as e:
-        print (e)
-        return e
-VERBOSE = True
-MAX_HISTORY = 100
-MAX_DATA = 20000
 def format_prompt(message, history):
-  prompt = "<s>"
-  for user_prompt, bot_response in history:
-    prompt += f"[INST] {user_prompt} [/INST]"
-    prompt += f" {bot_response}</s> "
-  prompt += f"[INST] {message} [/INST]"
-  return prompt
-def run_gpt(
-    prompt_template,
-    stop_tokens,
-    max_tokens,
-    seed,
-    **prompt_kwargs,
-):
-    print(seed)
-    timestamp=datetime.datetime.now()
     generate_kwargs = dict(
         temperature=0.9,
@@ -147,48 +112,30 @@ def run_gpt(
         timestamp=timestamp,
         purpose="Compile the provided data and complete the users task"
     ) + prompt_template.format(**prompt_kwargs)
     if VERBOSE:
         print(LOG_PROMPT.format(content))
-    #formatted_prompt = format_prompt(f"{system_prompt}, {prompt}", history)
-    #formatted_prompt = format_prompt(f'{content}', history)
     stream = client.text_generation(content, **generate_kwargs, stream=True, details=True, return_full_text=False)
     resp = ""
     for response in stream:
         resp += response.token.text
-        #yield resp
     if VERBOSE:
         print(LOG_RESPONSE.format(resp))
     return resp
 def compress_data(c, instruct, history):
-    seed=random.randint(1,1000000000)
-    print (c)
-    #tot=len(purpose)
-    #print(tot)
-    divr=int(c)/MAX_DATA
-    divi=int(divr)+1 if divr != int(divr) else int(divr)
     chunk = int(int(c)/divr)
-    print(f'chunk:: {chunk}')
-    print(f'divr:: {divr}')
-    print (f'divi:: {divi}')
     out = []
-    #out=""
-    s=0
-    e=chunk
-    print(f'e:: {e}')
-    new_history=""
-    #task = f'Compile this data to fulfill the task: {task}, and complete the purpose: {purpose}\n'
     for z in range(divi):
-        print(f's:e :: {s}:{e}')
         hist = history[s:e]
         resp = run_gpt(
             COMPRESS_DATA_PROMPT_SMALL,
             stop_tokens=["observation:", "task:", "action:", "thought:"],
@@ -199,506 +146,172 @@ def compress_data(c, instruct, history):
             history=hist,
         )
         out.append(resp)
-        #new_history = resp
-        print (resp)
-        #out+=resp
-        e=e+chunk
-        s=s+chunk
     return out
-def compress_data_og(c, instruct, history):
-    seed=random.randint(1,1000000000)
-    print (c)
-    #tot=len(purpose)
-    #print(tot)
-    divr=int(c)/MAX_DATA
-    divi=int(divr)+1 if divr != int(divr) else int(divr)
-    chunk = int(int(c)/divr)
-    print(f'chunk:: {chunk}')
-    print(f'divr:: {divr}')
-    print (f'divi:: {divi}')
-    out = []
-    #out=""
-    s=0
-    e=chunk
-    print(f'e:: {e}')
-    new_history=""
-    #task = f'Compile this data to fulfill the task: {task}, and complete the purpose: {purpose}\n'
-    for z in range(divi):
-        print(f's:e :: {s}:{e}')
-        hist = history[s:e]
-        resp = run_gpt(
-            COMPRESS_DATA_PROMPT,
-            stop_tokens=["observation:", "task:", "action:", "thought:"],
-            max_tokens=8192,
-            seed=seed,
-            direction=instruct,
-            knowledge=new_history,
-            history=hist,
-        )
-        new_history = resp
-        print (resp)
-        out+=resp
-        e=e+chunk
-        s=s+chunk
-    '''
-    resp = run_gpt(
-        COMPRESS_DATA_PROMPT,
-        stop_tokens=["observation:", "task:", "action:", "thought:"],
-        max_tokens=8192,
-        seed=seed,
-        direction=instruct,
-        knowledge=new_history,
-        history="All data has been recieved.",
-    )'''
-    print ("final" + resp)
-    #history = "observation: {}\n".format(resp)
-    return resp
-def summarize(
-    inp: str,
-    history: list,
-    report_check: bool,
-    sum_mem_check: str,
-    data: str = None,
-    files: list = None,
-    url: str = None,
-    pdf_url: str = None,
-    pdf_batch: str = None
-) -> str:
-    """
-    Summarizes the provided input data, processes files, URLs, and PDFs, and yields the results.
-    Parameters:
-    - inp (str): The input data to be processed. If empty, defaults to "Process this data".
-    - history (list): A list to keep track of the conversation history.
-    - report_check (bool): A flag indicating whether to return a report.
-    - sum_mem_check (str): A string indicating whether to summarize or save memory.
-    - data (str, optional): Additional data to process. Defaults to None.
-    - files (list, optional): A list of file paths to process. Defaults to None.
-    - url (str, optional): A URL to fetch data from. Defaults to None.
-    - pdf_url (str, optional): A URL pointing to a PDF file to read. Defaults to None.
-    - pdf_batch (str, optional): A batch of PDF URLs (comma-separated) to read. Defaults to None.
-    Yields:
-    - A tuple containing:
-        - An empty string (for future use).
-        - The updated history list.
-        - An error box (if any errors occurred).
-        - A JSON box for structured output.
-    The function processes the input data, reads from specified URLs, PDFs, and files, and summarizes or saves the data based on the provided parameters.
-    """
-    json_box = []
-    rawp = ""
-    json_out = None
-    if inp == "":
-        inp = "Process this data"
-    history.clear()
-    history = [(inp, "Working on it...")]
-    yield "", history, error_box, json_box
-    # Process PDF batch URLs
-    if pdf_batch and pdf_batch.startswith("http"):
-        c = pdf_batch.count(",") + 1  # Count the number of URLs
-        data = ""
-        try:
-            for i in range(c):
-                batch_url = pdf_batch.split(",", c)[i]
-                bb = read_pdf_online(batch_url)
-                data = f'{data}\nFile Name URL ({batch_url}):\n{bb}'
-        except Exception as e:
-            print(e)
-    # Process single PDF URL
-    if pdf_url and pdf_url.startswith("http"):
-        print("PDF_URL")
-        out = read_pdf_online(pdf_url)
-        data = out
-    # Process regular URL
-    if url and url.startswith("http"):
-        val, out = find_all(inp, "", history, url, "")  # Add missing arguments
-        if not val:
-            data = "Error"
-            rawp = str(out)  # Assign rawp here
         else:
-            data = out
-    # Process uploaded files
-    if files:
-        for i, file in enumerate(files):
-            try:
-                print(file)
-                if file.endswith(".pdf"):
-                    zz = read_pdf(file)
-                    print(zz)
-                    data = f'{data}\nFile Name ({file}):\n{zz}'
-                elif file.endswith(".txt"):
-                    zz = read_txt(file)
-                    print(zz)
-                    data = f'{data}\nFile Name ({file}):\n{zz}'
-            except Exception as e:
-                data = f'{data}\nError opening File Name ({file})'
-                print(e)
-    # Process the collected data
-    if data != "Error" and data != "":
-        print(inp)
-        out = str(data)
-        rl = len(out)
-        print(f'rl:: {rl}')
-        c = sum(1 for i in str(out) if i in [" ", ",", "\n"])  # Count delimiters
-        print(f'c:: {c}')
-        if sum_mem_check == "Memory":
-            json_out = save_memory(inp, out)
-            rawp = "Complete"  # Assign rawp here
-        if sum_mem_check == "Summarize":
-            json_out = compress_data(c, inp, out)
-            out = str(json_out)
-            if report_check:
-                rl = len(out)
-                print(f'rl:: {rl}')
-                c = sum(1 for i in str(out) if i in [" ", ",", "\n"])  # Count delimiters
-                print(f'c2:: {c}')
-                rawp = compress_data_og(c, inp, out)  # Assign rawp here
             else:
-                rawp = out  # Assign rawp here
-    else:
-        rawp = "Provide a valid data source"  # Assign rawp here
-    history.clear()
-    history.append((inp, rawp))
-    yield "", history, error_box, json_out
-SAVE_MEMORY = """
-You are attempting to complete the task
-task: {task}
-Data:
-{history}
-Instructions:
-Compile and categorize the data above into a JSON dictionary string
-Include ALL text, datapoints, titles, descriptions, and source urls indexed into an easy to search JSON format
-Your final response should be only the final formatted JSON string enclosed in brackets, and nothing else.
-Required keys:
-"keywords":["short", "list", "of", "important", "keywords", "found", "in", "this", "entry"]
-"title":"title of entry"
-"description":"A sentence summarizing the topic of this entry"
-"content":"A brief paragraph summarizing the important datapoints found in this entry"
-"url":"https://url.source"
-"""
-def save_memory(purpose, history):
-    uid=uuid.uuid4()
-    history=str(history)
-    c=1
-    inp = str(history)
-    rl = len(inp)
-    print(f'rl:: {rl}')
-    for i in str(inp):
-        if i == " " or i=="," or i=="\n" or i=="/" or i=="\\" or i=="." or i=="<":
-            c +=1
-    print (f'c:: {c}')
-    seed=random.randint(1,1000000000)
-    print (c)
-    #tot=len(purpose)
-    #print(tot)
-    divr=int(c)/MAX_DATA
-    divi=int(divr)+1 if divr != int(divr) else int(divr)
-    chunk = int(int(c)/divr)
-    print(f'chunk:: {chunk}')
-    print(f'divr:: {divr}')
-    print (f'divi:: {divi}')
-    out_box = []
-    #out=""
-    s=0
-    ee=chunk
-    print(f'e:: {ee}')
-    new_history=""
-    task = f'Index this Data\n'
-    for z in range(divi):
-        print(f's:e :: {s}:{ee}')
-        hist = inp[s:ee]
-        resp = run_gpt(
-            SAVE_MEMORY,
-            stop_tokens=["observation:", "task:", "action:", "thought:"],
-            max_tokens=4096,
-            seed=seed,
-            purpose=purpose,
-            task=task,
-            history=hist,
-        ).strip('\n')
-        #new_history = resp
-        #print (resp)
-        #out+=resp
-        #print ("final1" + resp)
-        try:
-            resp='[{'+resp.split('[{')[1].split('</s>')[0]
-            #print ("final2\n" + resp)
-            #print(f"keywords:: {resp['keywords']}")
-        except Exception as e:
-            resp = resp
-            print(e)
-        timestamp=str(datetime.datetime.now())
-        timename=timestamp.replace(" ","--").replace(":","-").replace(".","-")
-        json_object=resp
-        #json_object = json.dumps(out_box)
-        #json_object = json.dumps(out_box,indent=4)
-        with open(f"tmp-{uid}.json", "w") as outfile:
-            outfile.write(json_object)
-        outfile.close()
-        api.upload_file(
-        path_or_fileobj=f"tmp-{uid}.json",
-        path_in_repo=f"/mem-test2/{timename}---{s}-{ee}.json",
-        repo_id=reponame,
-        #repo_id=save_data.split('datasets/',1)[1].split('/raw',1)[0],
-        token=token_self,
-        repo_type="dataset",
-        )
-        lines = resp.strip().strip("\n").split("\n")
-        r = requests.get(f'{save_data}mem-test2/main.json')
-        print(f'status code main:: {r.status_code}')
-        if r.status_code==200:
-            lod = json.loads(r.text)
-            #lod = eval(lod)
-            print (f'lod:: {lod}')
-        if not r.status_code==200:
-            lod = []
-        for i,line in enumerate(lines):
-            key_box=[]
-            print(f'LINE:: {line}')
-            if ":" in line:
-                print(f'line:: {line}')
-            if "keywords" in line:
-                print(f'trying:: {line}')
-                keyw=line.split(":")[1]
-                print (keyw)
-                print (keyw.split("[")[1].split("]")[0])
-                keyw=keyw.split("[")[1].split("]")[0]
-                for ea in keyw.split(","):
-                    s1=""
-                    ea=ea.strip().strip("\n")
-                    for ev in ea:
-                        if ev.isalnum():
-                            s1+=ev
-                        if ev == " ":
-                            s1+=ev
-                        #ea=s1
-                    print(s1)
-                    key_box.append(s1)
-                lod.append({"file_name":f"{timename}---{s}-{ee}","keywords":key_box,"index":f"{s}:{ee}"})
-                json_object = json.dumps(lod, indent=4)
-                with open(f"tmp2-{uid}.json", "w") as outfile2:
-                    outfile2.write(json_object)
-                outfile2.close()
-                api.upload_file(
-                path_or_fileobj=f"tmp2-{uid}.json",
-                path_in_repo=f"/mem-test2/main.json",
-                repo_id=reponame,
-                #repo_id=save_data.split('datasets/',1)[1].split('/raw',1)[0],
-                token=token_self,
-                repo_type="dataset",
-                )
-        ee=ee+chunk
-        s=s+chunk
-        out_box.append(resp)
-    return out_box
-def create_zip_file(output_data, zip_name):
-    with zipfile.ZipFile(zip_name, 'w') as zipf:
-        for i, data in enumerate(output_data):
-            zipf.writestr(f'data_{i}.txt', data)
-    return zip_name
 def clear_fn():
-    return "", [(None, None)]
 with gr.Blocks() as app:
     gr.HTML("""<center><h1>Mixtral 8x7B TLDR Summarizer + Web</h1><h3>Summarize Data of unlimited length</h3></center>""")
     # Main chat interface
-    chatbot = gr.Chatbot(
-        label="Mixtral 8x7B Chatbot",
-        show_copy_button=True,
-        type='messages',
-        height=400,
-        purpose_input = gr.Textbox(label="Purpose"),
-        task_input = gr.Textbox(label="Task"),
-        history_input = gr.Textbox(label="History"),
-        url_input = gr.Textbox(label="URL"),
-        result_input = gr.Textbox(label="Result"),
-        steps_input = gr.Number(label="Steps", value=3),  # Default value of 3 steps
-        output_component = gr.Textbox(label="Output"),
-        button = gr.Button("Search"),
-    )
     # Control Panel
     with gr.Row():
         with gr.Column(scale=3):
             prompt = gr.Textbox(
-                label="Instructions (optional)",
                 placeholder="Enter processing instructions here..."
             )
             steps = gr.Slider(
-                label="Crawl Steps",
-                minimum=1,
-                maximum=5,
                 value=1,
                 info="Number of levels to crawl for web content"
             )
         with gr.Column(scale=1):
             report_check = gr.Checkbox(
-                label="Return Report",
                 value=True,
                 info="Generate detailed analysis report"
             )
             sum_mem_check = gr.Radio(
-                label="Output Type",
-                choices=["Summary", "Memory"],
                 value="Summary",
                 info="Choose between summarized or memory-based output"
             )
-            button = gr.Button("Process", variant="primary")
-    # Clear button
-    with gr.Row():
-        clear_btn = gr.Button("Clear", variant="secondary")
     # Input Tabs
     with gr.Tabs() as input_tabs:
         with gr.Tab("📝 Text"):
-            data = gr.Textbox(
-                label="Input Data",
                 lines=6,
                 placeholder="Paste your text here..."
             )
         with gr.Tab("📁 File"):
-            files = gr.File(
                 label="Upload Files",
                 file_types=[".pdf", ".txt"],
                 file_count="multiple"
             )
         with gr.Tab("🌐 Web URL"):
-            url = gr.Textbox(
                 label="Website URL",
                 placeholder="https://example.com"
             )
         with gr.Tab("📄 PDF URL"):
-            pdf_url = gr.Textbox(
                 label="PDF URL",
                 placeholder="https://example.com/document.pdf"
             )
-        with gr.Tab("📚 PDF Batch"):
-            pdf_batch = gr.Textbox(
-                label="PDF URLs (comma separated)",
-                placeholder="url1.pdf, url2.pdf, url3.pdf"
-            )
     # Output Section
     with gr.Row():
         with gr.Column():
-            json_out = gr.JSON(
                 label="Structured Output",
                 show_label=True
             )
         with gr.Column():
-            e_box = gr.Textbox(
-                label="Status & Errors",
                 interactive=False
             )
-    def process_and_format_response(instructions, chat_history, report, summary_memory,
-                                  input_data, uploaded_files, input_url, pdf_input_url):  # Removed extra parameters
-        try:
-            # Process the inputs with reduced parameters
-            result = None
-            for _ in summarize(
-                instructions,
-                chat_history if chat_history else [],
-                report,
-                summary_memory,
-                input_data,
-                uploaded_files,
-                input_url,
-                pdf_input_url  # Removed extra parameters
-            ):
-                result = _
-            if result:
-                _, history, errors, json_data = result
-                # Convert history to ChatMessage format
-                formatted_messages = []
-                if isinstance(history, list):
-                    for msg in history:
-                        if isinstance(msg, tuple) and len(msg) == 2:
-                            formatted_messages.extend([
-                                gr.ChatMessage(content=str(msg[0]), role="user"),
-                                gr.ChatMessage(content=str(msg[1]), role="assistant")
-                            ])
-                else:
-                    formatted_messages.extend([
-                        gr.ChatMessage(content=str(instructions), role="user"),
-                        gr.ChatMessage(content=str(history), role="assistant")
-                    ])
-                # Format error messages
-                error_message = "\n".join(errors) if errors else "Processing completed successfully"
-                return (
-                    "",  # Clear the prompt
-                    formatted_messages,
-                    error_message,
-                    json_data
-                )
-        except Exception as e:
-            error_msg = f"Error: {str(e)}"
-            return (
-                "",
-                [
-                    gr.ChatMessage(content=str(instructions), role="user"),
-                    gr.ChatMessage(content=error_msg, role="assistant")
-                ],
-                error_msg,
-                None
-            )
-    def clear_fn():
-        return "", []
-    # Update the button click event to match parameters
-    button.click(
-        find_all,
         inputs=[
-            purpose_input,    # Add these input components to your Gradio interface
-            task_input,
-            history_input,
             url_input,
-            result_input,
-            steps_input
         ],
-        outputs=[output_component]
     )
     # Launch the app
@@ -706,5 +319,5 @@ with gr.Blocks() as app:
         show_api=False,
         share=True,
         server_name="0.0.0.0",
-        server_port=7860
-)

 import gradio as gr
 import requests
 import zipfile
 import uuid
 import bs4
 import lxml
 import os
+from huggingface_hub import InferenceClient, HfApi
 import random
 import json
 import datetime
 from pypdf import PdfReader
 from agent import (
     PREFIX,
     COMPRESS_DATA_PROMPT,
     LOG_PROMPT,
     LOG_RESPONSE,
 )
+# Initialize Hugging Face client
+client = InferenceClient("mistralai/Mixtral-8x7B-Instruct-v0.1")
+reponame = "acecalisto3/tmp"
+save_data = f'https://huggingface.co/datasets/{reponame}/raw/main/'
+# Get HF token from environment or use demo mode
+token_self = os.environ.get('HF_TOKEN', 'dummy_token')  # Use dummy token for demo
+if token_self == 'dummy_token':
+    print("Warning: Running in demo mode without HuggingFace token. Some features may be limited.")
+api = HfApi(token=token_self)
+# Constants
+VERBOSE = True
+MAX_HISTORY = 100
+MAX_DATA = 20000
 def find_all(purpose, task, history, url, result, steps):
     return_list = []
     return True, return_list
 def read_txt(txt_path):
+    with open(txt_path, "r") as f:
         text = f.read()
     return text
 def read_pdf(pdf_path):
+    text = ""
+    reader = PdfReader(pdf_path)
+    for page in reader.pages:
         text = f'{text}\n{page.extract_text()}'
     return text
+error_box = []
 def read_pdf_online(url):
     print(f"reading {url}")
     response = requests.get(url, stream=True)
+    if response.status_code == 200:
+        with open("test.pdf", "wb") as f:
+            f.write(response.content)
+        reader = PdfReader("test.pdf")
+        text = ""
+        for page in reader.pages:
+            text = f'{text}\n{page.extract_text()}'
+        return text
+    else:
+        error_box.append(url)
+        return str(response.status_code)
 def format_prompt(message, history):
+    prompt = "<s>"
+    for user_prompt, bot_response in history:
+        prompt += f"[INST] {user_prompt} [/INST]"
+        prompt += f" {bot_response}</s> "
+    prompt += f"[INST] {message} [/INST]"
+    return prompt
+def run_gpt(prompt_template, stop_tokens, max_tokens, seed, **prompt_kwargs):
+    timestamp = datetime.datetime.now()
     generate_kwargs = dict(
         temperature=0.9,
         timestamp=timestamp,
         purpose="Compile the provided data and complete the users task"
     ) + prompt_template.format(**prompt_kwargs)
     if VERBOSE:
         print(LOG_PROMPT.format(content))
     stream = client.text_generation(content, **generate_kwargs, stream=True, details=True, return_full_text=False)
     resp = ""
     for response in stream:
         resp += response.token.text
     if VERBOSE:
         print(LOG_RESPONSE.format(resp))
     return resp
 def compress_data(c, instruct, history):
+    seed = random.randint(1, 1000000000)
+    divr = int(c)/MAX_DATA
+    divi = int(divr)+1 if divr != int(divr) else int(divr)
     chunk = int(int(c)/divr)
     out = []
+    s = 0
+    e = chunk
     for z in range(divi):
         hist = history[s:e]
         resp = run_gpt(
             COMPRESS_DATA_PROMPT_SMALL,
             stop_tokens=["observation:", "task:", "action:", "thought:"],
             history=hist,
         )
         out.append(resp)
+        e = e+chunk
+        s = s+chunk
     return out
+def create_zip_file(output_data, zip_name):
+    with zipfile.ZipFile(zip_name, 'w') as zipf:
+        for i, data in enumerate(output_data):
+            zipf.writestr(f'data_{i}.txt', data)
+    return zip_name
+def process_and_format_response(instructions, chat_history, report, summary_memory,
+                              input_data, uploaded_files, input_url, pdf_input_url):
+    try:
+        # Process URL if provided
+        if input_url:
+            success, content = find_all("Extract content", "", [], input_url, "", 1)
+            if success and content:
+                processed_text = "\n".join(content)
+            else:
+                return "", [["Error", "Failed to fetch URL content"]], "URL processing failed", None
+        # Process uploaded files
+        elif uploaded_files:
+            processed_text = ""
+            for file in uploaded_files:
+                if file.name.endswith('.pdf'):
+                    processed_text += read_pdf(file.name) + "\n\n"
+                elif file.name.endswith('.txt'):
+                    processed_text += read_txt(file.name) + "\n\n"
+        # Process direct text input
+        elif input_data:
+            processed_text = input_data
         else:
+            return "", [["Error", "No input provided"]], "No input data", None
+        # Generate summary using compress_data
+        if processed_text:
+            c = len(processed_text.split())
+            summary = compress_data(c, instructions or "Summarize this text", processed_text)
+            # Format the response
+            if isinstance(summary, list):
+                summary_text = "\n".join(summary)
             else:
+                summary_text = str(summary)
+            # Create chat messages
+            messages = [
+                ["Input", processed_text[:500] + "..."],  # Show first 500 chars of input
+                ["Summary", summary_text]
+            ]
+            # Create JSON output
+            json_output = {
+                "input_length": len(processed_text),
+                "summary_length": len(summary_text),
+                "summary": summary_text
+            }
+            return "", messages, "Processing completed successfully", json_output
+    except Exception as e:
+        error_msg = f"Error: {str(e)}"
+        return "", [["Error", error_msg]], error_msg, None
 def clear_fn():
+    return "", []
+# Create Gradio interface
 with gr.Blocks() as app:
     gr.HTML("""<center><h1>Mixtral 8x7B TLDR Summarizer + Web</h1><h3>Summarize Data of unlimited length</h3></center>""")
     # Main chat interface
+    with gr.Row():
+        chatbot = gr.Chatbot(
+            label="Mixtral 8x7B Chatbot",
+            show_copy_button=True,
+            height=400
+        )
     # Control Panel
     with gr.Row():
         with gr.Column(scale=3):
             prompt = gr.Textbox(
+                label="Instructions",
                 placeholder="Enter processing instructions here..."
             )
             steps = gr.Slider(
+                label="Crawl Steps",
+                minimum=1,
+                maximum=5,
                 value=1,
                 info="Number of levels to crawl for web content"
             )
         with gr.Column(scale=1):
             report_check = gr.Checkbox(
+                label="Return Report",
                 value=True,
                 info="Generate detailed analysis report"
             )
             sum_mem_check = gr.Radio(
+                label="Output Type",
+                choices=["Summary", "Memory"],
                 value="Summary",
                 info="Choose between summarized or memory-based output"
             )
+            process_btn = gr.Button("Process", variant="primary")
     # Input Tabs
     with gr.Tabs() as input_tabs:
         with gr.Tab("📝 Text"):
+            text_input = gr.Textbox(
+                label="Input Text",
                 lines=6,
                 placeholder="Paste your text here..."
             )
         with gr.Tab("📁 File"):
+            file_input = gr.File(
                 label="Upload Files",
                 file_types=[".pdf", ".txt"],
                 file_count="multiple"
             )
         with gr.Tab("🌐 Web URL"):
+            url_input = gr.Textbox(
                 label="Website URL",
                 placeholder="https://example.com"
             )
         with gr.Tab("📄 PDF URL"):
+            pdf_url_input = gr.Textbox(
                 label="PDF URL",
                 placeholder="https://example.com/document.pdf"
             )
     # Output Section
     with gr.Row():
         with gr.Column():
+            json_output = gr.JSON(
                 label="Structured Output",
                 show_label=True
             )
         with gr.Column():
+            error_output = gr.Textbox(
+                label="Status & Errors",
                 interactive=False
             )
+    # Event handlers
+    process_btn.click(
+        process_and_format_response,
         inputs=[
+            prompt,
+            chatbot,
+            report_check,
+            sum_mem_check,
+            text_input,
+            file_input,
             url_input,
+            pdf_url_input
         ],
+        outputs=[
+            prompt,
+            chatbot,
+            error_output,
+            json_output
+        ]
     )
     # Launch the app
         show_api=False,
         share=True,
         server_name="0.0.0.0",
+        server_port=8000
+    )