File size: 9,186 Bytes
0bfd27d
 
 
 
 
 
 
5728110
0bfd27d
 
 
 
 
 
 
6f98b16
0bfd27d
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
6f98b16
0bfd27d
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
6f98b16
 
0bfd27d
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
6f98b16
 
 
0bfd27d
 
 
6f98b16
0bfd27d
 
 
 
 
6f98b16
0bfd27d
6f98b16
0bfd27d
 
 
6f98b16
0bfd27d
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
6f98b16
 
 
 
0bfd27d
 
6f98b16
0bfd27d
 
 
 
 
 
 
6f98b16
16613ab
6f98b16
16613ab
 
6f98b16
16613ab
 
 
6f98b16
16613ab
6f98b16
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
0bfd27d
 
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
161
162
163
164
165
166
167
168
169
170
171
172
173
174
175
176
177
178
179
180
181
182
183
184
185
186
187
188
189
190
191
192
193
194
195
196
197
198
199
200
201
202
203
204
205
206
207
208
209
210
211
212
213
214
215
216
217
218
219
220
221
222
223
224
225
226
227
228
229
230
231
232
233
234
235
236
import os
import logging
import gradio as gr
from dotenv import load_dotenv
from langchain.document_loaders import ArxivLoader, PyPDFLoader
from langchain.text_splitter import TokenTextSplitter
from langchain.vectorstores import Chroma
from langchain_community.embeddings import HuggingFaceHubEmbeddings
from langchain.chains import RetrievalQA
from langchain.chains.summarize import load_summarize_chain
from langchain_groq import ChatGroq
from transformers import pipeline
from PyPDF2 import PdfReader
from huggingface_hub import login
from groq import AsyncGroq, Groq
import asyncio

# Load environment variables
load_dotenv()
HUGGING_API_KEY = os.getenv("HUGGING_API_KEY")
GROQ_API_KEY = os.getenv("GROQ_API_KEY")

# Ensure API keys are set
if not HUGGING_API_KEY or not GROQ_API_KEY:
    raise ValueError("API keys for HuggingFace or Groq are missing. Set them in your environment variables.")

# Configure Logging
logging.basicConfig(level=logging.INFO)
logger = logging.getLogger(__name__)

# Authenticate with Hugging Face
login(HUGGING_API_KEY)

# Load models and embeddings
embedding_model = HuggingFaceHubEmbeddings(huggingfacehub_api_token=HUGGING_API_KEY)
llm = ChatGroq(temperature=0, model_name="llama3-70b-8192", api_key=GROQ_API_KEY)

def display_results(result):
    """Format and display results properly."""
    return "\n".join(result)

def summarize_text(text):
    """Summarize text using the Groq API."""
    try:
        sum_client = Groq(api_key=GROQ_API_KEY)
        messages = [
            {"role": "system", "content": "You are an excellent analyst who excels in summarization task. If I give you the whole text, you should summarize it."},
            {"role": "user", "content": f"Summarize the paper: {text}"}
        ]

        response = sum_client.chat.completions.create(
            messages=messages,
            model="llama3-70b-8192",
            temperature=0,
            max_tokens=8192,
            top_p=1,
        )
        return response.choices[0].message.content

    except Exception as e:
        logger.error(f"Error summarizing text: {e}")
        return "Error in summarization."

def summarize_pdf(pdf_file_path, max_length):
    """Extract text from a PDF and summarize it."""
    try:
        reader = PdfReader(pdf_file_path)
        text = "\n".join(page.extract_text() or "" for page in reader.pages)

        text_splitter = TokenTextSplitter(chunk_size=8192, chunk_overlap=1000)
        chunks = text_splitter.split_text(text)

        summary = ""
        for chunk in chunks:
            summary += summarize_text(chunk)

        return summary

    except Exception as e:
        logger.error(f"Error summarizing PDF: {e}")
        return "Failed to process the PDF."

def summarize_arxiv_pdf(query):
    """Summarize an arXiv paper given a query."""
    try:
        loader = ArxivLoader(query=query, load_max_docs=10)
        documents = loader.load()
        text_splitter = TokenTextSplitter(chunk_size=5700, chunk_overlap=100)
        chunks = text_splitter.split_documents(documents)

        ref_summary = ""
        for chunk in chunks:
            ref_summary += summarize_text(chunk.page_content)

        arxiv_summary = loader.get_summaries_as_docs()

        summaries = []
        for doc in arxiv_summary:
            title = doc.metadata.get("Title", "Unknown Title")
            authors = doc.metadata.get("Authors", "Unknown Authors")
            url = doc.metadata.get("Entry ID", "No URL")

            summaries.append(f"**{title}**\n")
            summaries.append(f"**Authors:** {authors}\n")
            summaries.append(f"**View full paper:** [Link to paper]({url})\n")
            summaries.append(f"**Summary:** {doc.page_content}\n")
            summaries.append(f"**Enhanced Summary:**\n {ref_summary}")

        return display_results(summaries)

    except Exception as e:
        logger.error(f"Error summarizing arXiv paper: {e}")
        return "Failed to process arXiv paper."

client = AsyncGroq(api_key=GROQ_API_KEY)

async def chat_with_replit(message, history):
    """Chat functionality using Groq API."""
    try:
        messages = [{"role": "system", "content": "You are an assistant answering user questions."}]
        
        for chat in history:
            user_msg, assistant_msg = chat
            messages.append({"role": "user", "content": user_msg})
            messages.append({"role": "assistant", "content": assistant_msg})

        messages.append({"role": "user", "content": message})

        response = await client.chat.completions.create(
            messages=messages,
            model="llama3-70b-8192",
            temperature=0,
            max_tokens=1024,
            top_p=1,
            stream=False,  # Using non-streaming for simplicity in this integration.
        )
        return response.choices[0].message.content

    except Exception as e:
        logger.error(f"Chat error: {e}")
        return "Error in chat response."

async def chat_with_replit_pdf(message, history, doi_num):
    """Chat with arXiv papers using document retrieval."""
    try:
        loader = ArxivLoader(query=str(doi_num), load_max_docs=10)
        documents = loader.load_and_split()
        metadata = documents[0].metadata

        vector_store = Chroma.from_documents(documents, embedding_model)

        def retrieve_relevant_content(user_query):
            results = vector_store.similarity_search(user_query, k=3)
            return "\n\n".join(doc.page_content for doc in results)

        relevant_content = retrieve_relevant_content(message)

        messages = [
            {"role": "user", "content": message},
            {"role": "system", "content": f"Answer based on this arXiv paper {doi_num}.\n"
                                          f"Metadata: {metadata}.\n"
                                          f"Relevant Content: {relevant_content}"}
        ]

        response = await client.chat.completions.create(
            messages=messages,
            model="llama3-70b-8192",
            temperature=0,
            max_tokens=1024,
            top_p=1,
            stream=False,
        )
        return response.choices[0].message.content

    except Exception as e:
        logger.error(f"Error in chat with PDF: {e}")
        return "Error processing chat with PDF."

# Define a synchronous wrapper for the async chat function
def chat_with_replit_sync(message, history):
    return asyncio.run(chat_with_replit(message, history))

# Gradio UI
with gr.Blocks() as app:
    # Tab for Local PDF Summarization
    with gr.Tab(label="Local PDF Summarization"):
        with gr.Row():
            input_pdf = gr.File(label="Upload PDF file")
            max_length_slider = gr.Slider(512, 4096, value=2048, step=512, label="Max Length")
            summarize_pdf_btn = gr.Button(value="Summarize PDF")
        with gr.Row():
            output_pdf_summary = gr.Markdown(label="Summary", height=1000)
        summarize_pdf_btn.click(summarize_pdf, inputs=[input_pdf, max_length_slider], outputs=output_pdf_summary)
    
    # Tab for Arxiv Summarization
    with gr.Tab(label="Arxiv Summarization"):
        with gr.Column():
            arxiv_number = gr.Textbox(label="Enter arXiv number, i.e 2502.02523")
            summarize_btn = gr.Button(value="Summarize arXiv Paper")
        with gr.Column():
            output_summary = gr.Markdown(label="Summary", height=1000)
        summarize_btn.click(summarize_arxiv_pdf, inputs=arxiv_number, outputs=output_summary)

    # New Tab for Chat functionality
    with gr.Tab(label="Chat with Assistant"):
        gr.Markdown("### Chat with the Assistant")
        with gr.Row():
            chat_input = gr.Textbox(placeholder="Type your message here...", label="Your Message")
            send_button = gr.Button("Send")
        # A Markdown to display the conversation history (or you could use gr.Chatbot)
        chat_output = gr.Markdown(label="Chat Output", height=300)
        # Maintain chat history as a list of [user, assistant] pairs
        chat_history = gr.State([])

        # When the send button is clicked, update the chat history and get a response.
        def update_chat(user_message, history):
            # Append the new user message to history with an empty assistant response for now.
            history = history or []
            history.append([user_message, ""])
            return history, history

        def update_assistant_response(history):
            # Get the last user message and call the chat function
            user_message = history[-1][0]
            response = chat_with_replit_sync(user_message, history[:-1])
            # Update the last entry with the assistant's response
            history[-1][1] = response
            # Format the conversation for display
            formatted = "\n\n".join([f"**User:** {u}\n\n**Assistant:** {a}" for u, a in history])
            return history, formatted

        send_button.click(update_chat, inputs=[chat_input, chat_history], outputs=[chat_history, chat_output])
        send_button.click(update_assistant_response, inputs=chat_history, outputs=[chat_history, chat_output])

app.launch()