Spaces:
Running
Running
push chat
Browse files- app.py +25 -9
- df/PaperCentral.py +18 -1
- paper_chat_tab.py +281 -0
- requirements.txt +14 -0
- style.css +63 -1
app.py
CHANGED
|
@@ -10,6 +10,8 @@ import json
|
|
| 10 |
import requests
|
| 11 |
|
| 12 |
from author_leaderboard_contrib_tab import author_resource_leaderboard_tab
|
|
|
|
|
|
|
| 13 |
from zoneinfo import ZoneInfo # Available in Python 3.9 and later
|
| 14 |
|
| 15 |
# Initialize the PaperCentral class instance
|
|
@@ -60,6 +62,9 @@ with gr.Blocks(css_paths="style.css") as demo:
|
|
| 60 |
with gr.Column(scale=1):
|
| 61 |
with gr.Accordion(label="⭐Release notes", open=False):
|
| 62 |
gr.Markdown("""
|
|
|
|
|
|
|
|
|
|
| 63 |
- **October 24, 2024** – CoRL 2024 proceedings added.
|
| 64 |
- **October 20, 2024** – You can now add or edit papers.
|
| 65 |
- **October 19, 2024** – Papers with github now have github stars.
|
|
@@ -182,6 +187,12 @@ with gr.Blocks(css_paths="style.css") as demo:
|
|
| 182 |
author_resource_leaderboard_tab()
|
| 183 |
|
| 184 |
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 185 |
# Define function to move to the next day
|
| 186 |
def go_to_next_day(
|
| 187 |
date: Union[str, datetime],
|
|
@@ -468,13 +479,14 @@ with gr.Blocks(css_paths="style.css") as demo:
|
|
| 468 |
date_range = gr.update(value=None)
|
| 469 |
conferences = gr.update(value=[])
|
| 470 |
hf_options = gr.update(value=[])
|
| 471 |
-
|
|
|
|
| 472 |
|
| 473 |
if request:
|
| 474 |
-
print("Request headers dictionary:", dict(request.headers))
|
| 475 |
-
print("IP address:", request.client.host)
|
| 476 |
-
print("Query parameters:", dict(request.query_params))
|
| 477 |
-
print("Session hash:", request.session_hash)
|
| 478 |
|
| 479 |
if 'date' in request.query_params:
|
| 480 |
calendar = gr.update(value=request.query_params['date'])
|
|
@@ -502,9 +514,13 @@ with gr.Blocks(css_paths="style.css") as demo:
|
|
| 502 |
if "tab" in request.query_params:
|
| 503 |
tab = request.query_params['tab']
|
| 504 |
if tab == "tab-leaderboards":
|
| 505 |
-
|
|
|
|
|
|
|
|
|
|
|
|
|
| 506 |
|
| 507 |
-
return calendar, date_range, conferences, hf_options,
|
| 508 |
|
| 509 |
|
| 510 |
demo.load(
|
|
@@ -514,7 +530,7 @@ with gr.Blocks(css_paths="style.css") as demo:
|
|
| 514 |
api_name="update_data",
|
| 515 |
).then(
|
| 516 |
fn=echo,
|
| 517 |
-
outputs=[calendar, date_range_radio, conference_options, hf_options, tabs],
|
| 518 |
api_name=False,
|
| 519 |
).then(
|
| 520 |
# New then to handle LoginButton and HTML components
|
|
@@ -529,7 +545,7 @@ def main():
|
|
| 529 |
"""
|
| 530 |
Launches the Gradio app.
|
| 531 |
"""
|
| 532 |
-
demo.launch()
|
| 533 |
|
| 534 |
|
| 535 |
# Run the main function when the script is executed
|
|
|
|
| 10 |
import requests
|
| 11 |
|
| 12 |
from author_leaderboard_contrib_tab import author_resource_leaderboard_tab
|
| 13 |
+
from paper_chat_tab import paper_chat_tab
|
| 14 |
+
|
| 15 |
from zoneinfo import ZoneInfo # Available in Python 3.9 and later
|
| 16 |
|
| 17 |
# Initialize the PaperCentral class instance
|
|
|
|
| 62 |
with gr.Column(scale=1):
|
| 63 |
with gr.Accordion(label="⭐Release notes", open=False):
|
| 64 |
gr.Markdown("""
|
| 65 |
+
- **November 21, 2024** – Neurips D&B 2024 proceedings added.
|
| 66 |
+
- **November 20, 2024** – Neurips 2024 proceedings added.
|
| 67 |
+
- **November 15, 2024** – EMNLP 2024 proceedings added.
|
| 68 |
- **October 24, 2024** – CoRL 2024 proceedings added.
|
| 69 |
- **October 20, 2024** – You can now add or edit papers.
|
| 70 |
- **October 19, 2024** – Papers with github now have github stars.
|
|
|
|
| 187 |
author_resource_leaderboard_tab()
|
| 188 |
|
| 189 |
|
| 190 |
+
with gr.Tab("Chat With Paper", id="tab-chat-with-paper"):
|
| 191 |
+
gr.Markdown("## Chat with Paper")
|
| 192 |
+
arxiv_id = gr.State(value=None)
|
| 193 |
+
paper_chat_tab(arxiv_id)
|
| 194 |
+
|
| 195 |
+
|
| 196 |
# Define function to move to the next day
|
| 197 |
def go_to_next_day(
|
| 198 |
date: Union[str, datetime],
|
|
|
|
| 479 |
date_range = gr.update(value=None)
|
| 480 |
conferences = gr.update(value=[])
|
| 481 |
hf_options = gr.update(value=[])
|
| 482 |
+
selected_tab = gr.Tabs()
|
| 483 |
+
paper_id = gr.update(value=None)
|
| 484 |
|
| 485 |
if request:
|
| 486 |
+
# print("Request headers dictionary:", dict(request.headers))
|
| 487 |
+
# print("IP address:", request.client.host)
|
| 488 |
+
# print("Query parameters:", dict(request.query_params))
|
| 489 |
+
# print("Session hash:", request.session_hash)
|
| 490 |
|
| 491 |
if 'date' in request.query_params:
|
| 492 |
calendar = gr.update(value=request.query_params['date'])
|
|
|
|
| 514 |
if "tab" in request.query_params:
|
| 515 |
tab = request.query_params['tab']
|
| 516 |
if tab == "tab-leaderboards":
|
| 517 |
+
selected_tab = gr.Tabs(selected="tab-leaderboards")
|
| 518 |
+
elif tab == "tab-chat-with-paper":
|
| 519 |
+
selected_tab = gr.Tabs(selected="tab-chat-with-paper")
|
| 520 |
+
if "paper_id" in request.query_params:
|
| 521 |
+
paper_id = request.query_params['paper_id']
|
| 522 |
|
| 523 |
+
return calendar, date_range, conferences, hf_options, selected_tab, paper_id
|
| 524 |
|
| 525 |
|
| 526 |
demo.load(
|
|
|
|
| 530 |
api_name="update_data",
|
| 531 |
).then(
|
| 532 |
fn=echo,
|
| 533 |
+
outputs=[calendar, date_range_radio, conference_options, hf_options, tabs, arxiv_id],
|
| 534 |
api_name=False,
|
| 535 |
).then(
|
| 536 |
# New then to handle LoginButton and HTML components
|
|
|
|
| 545 |
"""
|
| 546 |
Launches the Gradio app.
|
| 547 |
"""
|
| 548 |
+
demo.launch(ssr_mode=False)
|
| 549 |
|
| 550 |
|
| 551 |
# Run the main function when the script is executed
|
df/PaperCentral.py
CHANGED
|
@@ -15,7 +15,7 @@ import gradio as gr
|
|
| 15 |
from utils import load_and_process
|
| 16 |
import numpy as np
|
| 17 |
from datetime import datetime, timedelta
|
| 18 |
-
|
| 19 |
|
| 20 |
class PaperCentral:
|
| 21 |
"""
|
|
@@ -53,6 +53,7 @@ class PaperCentral:
|
|
| 53 |
]
|
| 54 |
|
| 55 |
COLUMNS_ORDER_PAPER_PAGE: List[str] = [
|
|
|
|
| 56 |
'date',
|
| 57 |
'arxiv_id',
|
| 58 |
'paper_page',
|
|
@@ -90,6 +91,7 @@ class PaperCentral:
|
|
| 90 |
'authors': 'str',
|
| 91 |
'github_stars': 'number',
|
| 92 |
'project_page': 'markdown',
|
|
|
|
| 93 |
}
|
| 94 |
|
| 95 |
# Mapping for renaming columns for display purposes
|
|
@@ -101,6 +103,7 @@ class PaperCentral:
|
|
| 101 |
'github_stars': 'GitHub⭐',
|
| 102 |
'num_comments': '💬',
|
| 103 |
'upvotes': '👍',
|
|
|
|
| 104 |
}
|
| 105 |
|
| 106 |
def __init__(self):
|
|
@@ -475,6 +478,20 @@ class PaperCentral:
|
|
| 475 |
)
|
| 476 |
filtered_df = filtered_df[conference_filter]
|
| 477 |
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 478 |
# Prettify the DataFrame
|
| 479 |
filtered_df = self.prettify(filtered_df)
|
| 480 |
|
|
|
|
| 15 |
from utils import load_and_process
|
| 16 |
import numpy as np
|
| 17 |
from datetime import datetime, timedelta
|
| 18 |
+
import re
|
| 19 |
|
| 20 |
class PaperCentral:
|
| 21 |
"""
|
|
|
|
| 53 |
]
|
| 54 |
|
| 55 |
COLUMNS_ORDER_PAPER_PAGE: List[str] = [
|
| 56 |
+
'chat_with_paper',
|
| 57 |
'date',
|
| 58 |
'arxiv_id',
|
| 59 |
'paper_page',
|
|
|
|
| 91 |
'authors': 'str',
|
| 92 |
'github_stars': 'number',
|
| 93 |
'project_page': 'markdown',
|
| 94 |
+
'chat_with_paper': 'markdown',
|
| 95 |
}
|
| 96 |
|
| 97 |
# Mapping for renaming columns for display purposes
|
|
|
|
| 103 |
'github_stars': 'GitHub⭐',
|
| 104 |
'num_comments': '💬',
|
| 105 |
'upvotes': '👍',
|
| 106 |
+
'chat_with_paper': 'Chat',
|
| 107 |
}
|
| 108 |
|
| 109 |
def __init__(self):
|
|
|
|
| 478 |
)
|
| 479 |
filtered_df = filtered_df[conference_filter]
|
| 480 |
|
| 481 |
+
if any(conf in ["NeurIPS2024 D&B", "NeurIPS2024"] for conf in conference_options):
|
| 482 |
+
def create_chat_link(row):
|
| 483 |
+
neurips_id = re.search(r'id=([^&]+)', row["proceedings"])
|
| 484 |
+
if neurips_id:
|
| 485 |
+
neurips_id = neurips_id.group(1)
|
| 486 |
+
return f'<a href="/?tab=tab-chat-with-paper&paper_id={neurips_id}" id="custom_button" target="_blank" rel="noopener noreferrer" aria-disabled="false">✨ Chat with paper</a>'
|
| 487 |
+
else:
|
| 488 |
+
return ""
|
| 489 |
+
|
| 490 |
+
# Add the "chat_with_paper" column
|
| 491 |
+
filtered_df['chat_with_paper'] = filtered_df.apply(create_chat_link, axis=1)
|
| 492 |
+
if 'chat_with_paper' not in columns_to_show:
|
| 493 |
+
columns_to_show.append('chat_with_paper')
|
| 494 |
+
|
| 495 |
# Prettify the DataFrame
|
| 496 |
filtered_df = self.prettify(filtered_df)
|
| 497 |
|
paper_chat_tab.py
ADDED
|
@@ -0,0 +1,281 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
import gradio as gr
|
| 2 |
+
from PyPDF2 import PdfReader
|
| 3 |
+
from bs4 import BeautifulSoup
|
| 4 |
+
|
| 5 |
+
import requests
|
| 6 |
+
from io import BytesIO
|
| 7 |
+
from transformers import AutoTokenizer
|
| 8 |
+
|
| 9 |
+
import os
|
| 10 |
+
from openai import OpenAI
|
| 11 |
+
|
| 12 |
+
# Cache for tokenizers to avoid reloading
|
| 13 |
+
tokenizer_cache = {}
|
| 14 |
+
|
| 15 |
+
|
| 16 |
+
# Function to fetch paper information from OpenReview
|
| 17 |
+
def fetch_paper_info_neurips(paper_id):
|
| 18 |
+
url = f"https://openreview.net/forum?id={paper_id}"
|
| 19 |
+
response = requests.get(url)
|
| 20 |
+
if response.status_code != 200:
|
| 21 |
+
return None, None
|
| 22 |
+
|
| 23 |
+
html_content = response.content
|
| 24 |
+
soup = BeautifulSoup(html_content, 'html.parser')
|
| 25 |
+
|
| 26 |
+
# Extract title
|
| 27 |
+
title_tag = soup.find('h2', class_='citation_title')
|
| 28 |
+
title = title_tag.get_text(strip=True) if title_tag else 'Title not found'
|
| 29 |
+
|
| 30 |
+
# Extract authors
|
| 31 |
+
authors = []
|
| 32 |
+
author_div = soup.find('div', class_='forum-authors')
|
| 33 |
+
if author_div:
|
| 34 |
+
author_tags = author_div.find_all('a')
|
| 35 |
+
authors = [tag.get_text(strip=True) for tag in author_tags]
|
| 36 |
+
author_list = ', '.join(authors) if authors else 'Authors not found'
|
| 37 |
+
|
| 38 |
+
# Extract abstract
|
| 39 |
+
abstract_div = soup.find('strong', text='Abstract:')
|
| 40 |
+
if abstract_div:
|
| 41 |
+
abstract_paragraph = abstract_div.find_next_sibling('div')
|
| 42 |
+
abstract = abstract_paragraph.get_text(strip=True) if abstract_paragraph else 'Abstract not found'
|
| 43 |
+
else:
|
| 44 |
+
abstract = 'Abstract not found'
|
| 45 |
+
|
| 46 |
+
# Construct preamble in Markdown
|
| 47 |
+
# preamble = f"**[{title}](https://openreview.net/forum?id={paper_id})**\n\n{author_list}\n\n**Abstract:**\n{abstract}"
|
| 48 |
+
preamble = f"**[{title}](https://openreview.net/forum?id={paper_id})**\n\n{author_list}\n\n"
|
| 49 |
+
|
| 50 |
+
return preamble
|
| 51 |
+
|
| 52 |
+
|
| 53 |
+
def fetch_paper_content(paper_id):
|
| 54 |
+
try:
|
| 55 |
+
# Construct the URL
|
| 56 |
+
url = f"https://openreview.net/pdf?id={paper_id}"
|
| 57 |
+
|
| 58 |
+
# Fetch the PDF
|
| 59 |
+
response = requests.get(url)
|
| 60 |
+
response.raise_for_status() # Raise an exception for HTTP errors
|
| 61 |
+
|
| 62 |
+
# Read the PDF content
|
| 63 |
+
pdf_content = BytesIO(response.content)
|
| 64 |
+
reader = PdfReader(pdf_content)
|
| 65 |
+
|
| 66 |
+
# Extract text from the PDF
|
| 67 |
+
text = ""
|
| 68 |
+
for page in reader.pages:
|
| 69 |
+
text += page.extract_text()
|
| 70 |
+
|
| 71 |
+
return text # Return full text; truncation will be handled later
|
| 72 |
+
|
| 73 |
+
except Exception as e:
|
| 74 |
+
print(f"An error occurred: {e}")
|
| 75 |
+
return None
|
| 76 |
+
|
| 77 |
+
|
| 78 |
+
def paper_chat_tab(paper_id):
|
| 79 |
+
with gr.Blocks() as demo:
|
| 80 |
+
with gr.Column():
|
| 81 |
+
# Textbox to display the paper title and authors
|
| 82 |
+
content = gr.Markdown(value="")
|
| 83 |
+
|
| 84 |
+
# Preamble message to hint the user
|
| 85 |
+
gr.Markdown("**Note:** Providing your own sambanova token can help you avoid rate limits.")
|
| 86 |
+
|
| 87 |
+
# Input for Hugging Face token
|
| 88 |
+
hf_token_input = gr.Textbox(
|
| 89 |
+
label="Enter your sambanova token (optional)",
|
| 90 |
+
type="password",
|
| 91 |
+
placeholder="Enter your sambanova token to avoid rate limits"
|
| 92 |
+
)
|
| 93 |
+
|
| 94 |
+
models = [
|
| 95 |
+
"Meta-Llama-3.1-8B-Instruct",
|
| 96 |
+
"Meta-Llama-3.1-70B-Instruct",
|
| 97 |
+
"Meta-Llama-3.1-405B-Instruct",
|
| 98 |
+
]
|
| 99 |
+
|
| 100 |
+
default_model = models[-1]
|
| 101 |
+
|
| 102 |
+
# Dropdown for selecting the model
|
| 103 |
+
model_dropdown = gr.Dropdown(
|
| 104 |
+
label="Select Model",
|
| 105 |
+
choices=models,
|
| 106 |
+
value=default_model
|
| 107 |
+
)
|
| 108 |
+
|
| 109 |
+
# State to store the paper content
|
| 110 |
+
paper_content = gr.State()
|
| 111 |
+
|
| 112 |
+
# Create a column for each model, only visible if it's the default model
|
| 113 |
+
columns = []
|
| 114 |
+
for model_name in models:
|
| 115 |
+
column = gr.Column(visible=(model_name == default_model))
|
| 116 |
+
with column:
|
| 117 |
+
chatbot = create_chat_interface(model_name, paper_content, hf_token_input)
|
| 118 |
+
columns.append(column)
|
| 119 |
+
gr.HTML(
|
| 120 |
+
'<img src="https://venturebeat.com/wp-content/uploads/2020/02/SambaNovaLogo_H_F.jpg" width="100px" />')
|
| 121 |
+
gr.Markdown("**Note:** This model is supported by SambaNova.")
|
| 122 |
+
|
| 123 |
+
# Update visibility of columns based on the selected model
|
| 124 |
+
def update_columns(selected_model):
|
| 125 |
+
visibility = []
|
| 126 |
+
for model_name in models:
|
| 127 |
+
is_visible = model_name == selected_model
|
| 128 |
+
visibility.append(gr.update(visible=is_visible))
|
| 129 |
+
return visibility
|
| 130 |
+
|
| 131 |
+
model_dropdown.change(
|
| 132 |
+
fn=update_columns,
|
| 133 |
+
inputs=model_dropdown,
|
| 134 |
+
outputs=columns,
|
| 135 |
+
api_name=False,
|
| 136 |
+
queue=False,
|
| 137 |
+
)
|
| 138 |
+
|
| 139 |
+
# Function to update the content Markdown and paper_content when paper ID or model changes
|
| 140 |
+
def update_paper_info(paper_id, selected_model):
|
| 141 |
+
preamble = fetch_paper_info_neurips(paper_id)
|
| 142 |
+
text = fetch_paper_content(paper_id)
|
| 143 |
+
if text is None:
|
| 144 |
+
return preamble, None
|
| 145 |
+
|
| 146 |
+
return preamble, text
|
| 147 |
+
|
| 148 |
+
# Update paper content when paper ID or model changes
|
| 149 |
+
paper_id.change(
|
| 150 |
+
fn=update_paper_info,
|
| 151 |
+
inputs=[paper_id, model_dropdown],
|
| 152 |
+
outputs=[content, paper_content]
|
| 153 |
+
)
|
| 154 |
+
|
| 155 |
+
model_dropdown.change(
|
| 156 |
+
fn=update_paper_info,
|
| 157 |
+
inputs=[paper_id, model_dropdown],
|
| 158 |
+
outputs=[content, paper_content],
|
| 159 |
+
queue=False,
|
| 160 |
+
)
|
| 161 |
+
return demo
|
| 162 |
+
|
| 163 |
+
|
| 164 |
+
def create_chat_interface(model_name, paper_content, hf_token_input):
|
| 165 |
+
# Load tokenizer and cache it
|
| 166 |
+
if model_name not in tokenizer_cache:
|
| 167 |
+
# Load the tokenizer from Hugging Face
|
| 168 |
+
# tokenizer = AutoTokenizer.from_pretrained(model_name)
|
| 169 |
+
tokenizer = AutoTokenizer.from_pretrained("meta-llama/Llama-3.2-1B-Instruct")
|
| 170 |
+
tokenizer_cache[model_name] = tokenizer
|
| 171 |
+
else:
|
| 172 |
+
tokenizer = tokenizer_cache[model_name]
|
| 173 |
+
|
| 174 |
+
max_total_tokens = 50000 # Maximum tokens allowed
|
| 175 |
+
|
| 176 |
+
# Define the function to handle the chat
|
| 177 |
+
def get_fn(message, history, paper_content_value, hf_token_value):
|
| 178 |
+
# Include the paper content as context
|
| 179 |
+
if paper_content_value:
|
| 180 |
+
context = f"The following is the content of the paper:\n{paper_content_value}\n\n"
|
| 181 |
+
else:
|
| 182 |
+
context = ""
|
| 183 |
+
|
| 184 |
+
# Tokenize the context
|
| 185 |
+
context_tokens = tokenizer.encode(context)
|
| 186 |
+
context_token_length = len(context_tokens)
|
| 187 |
+
|
| 188 |
+
# Prepare the messages without context
|
| 189 |
+
messages = []
|
| 190 |
+
message_tokens_list = []
|
| 191 |
+
total_tokens = context_token_length # Start with context tokens
|
| 192 |
+
|
| 193 |
+
for user_msg, assistant_msg in history:
|
| 194 |
+
# Tokenize user message
|
| 195 |
+
user_tokens = tokenizer.encode(user_msg)
|
| 196 |
+
messages.append({"role": "user", "content": user_msg})
|
| 197 |
+
message_tokens_list.append(len(user_tokens))
|
| 198 |
+
total_tokens += len(user_tokens)
|
| 199 |
+
|
| 200 |
+
# Tokenize assistant message
|
| 201 |
+
if assistant_msg:
|
| 202 |
+
assistant_tokens = tokenizer.encode(assistant_msg)
|
| 203 |
+
messages.append({"role": "assistant", "content": assistant_msg})
|
| 204 |
+
message_tokens_list.append(len(assistant_tokens))
|
| 205 |
+
total_tokens += len(assistant_tokens)
|
| 206 |
+
|
| 207 |
+
# Tokenize the new user message
|
| 208 |
+
message_tokens = tokenizer.encode(message)
|
| 209 |
+
messages.append({"role": "user", "content": message})
|
| 210 |
+
message_tokens_list.append(len(message_tokens))
|
| 211 |
+
total_tokens += len(message_tokens)
|
| 212 |
+
|
| 213 |
+
# Check if total tokens exceed the maximum allowed tokens
|
| 214 |
+
if total_tokens > max_total_tokens:
|
| 215 |
+
# Attempt to truncate the context first
|
| 216 |
+
available_tokens = max_total_tokens - (total_tokens - context_token_length)
|
| 217 |
+
if available_tokens > 0:
|
| 218 |
+
# Truncate the context to fit the available tokens
|
| 219 |
+
truncated_context_tokens = context_tokens[:available_tokens]
|
| 220 |
+
context = tokenizer.decode(truncated_context_tokens)
|
| 221 |
+
context_token_length = available_tokens
|
| 222 |
+
total_tokens = total_tokens - len(context_tokens) + context_token_length
|
| 223 |
+
else:
|
| 224 |
+
# Not enough space for context; remove it
|
| 225 |
+
context = ""
|
| 226 |
+
total_tokens -= context_token_length
|
| 227 |
+
context_token_length = 0
|
| 228 |
+
|
| 229 |
+
# If total tokens still exceed the limit, truncate the message history
|
| 230 |
+
while total_tokens > max_total_tokens and len(messages) > 1:
|
| 231 |
+
# Remove the oldest message
|
| 232 |
+
removed_message = messages.pop(0)
|
| 233 |
+
removed_tokens = message_tokens_list.pop(0)
|
| 234 |
+
total_tokens -= removed_tokens
|
| 235 |
+
|
| 236 |
+
# Rebuild the final messages list including the (possibly truncated) context
|
| 237 |
+
final_messages = []
|
| 238 |
+
if context:
|
| 239 |
+
final_messages.append({"role": "system", "content": context})
|
| 240 |
+
final_messages.extend(messages)
|
| 241 |
+
|
| 242 |
+
# Use the Hugging Face token if provided
|
| 243 |
+
api_key = hf_token_value or os.environ.get("SAMBANOVA_API_KEY")
|
| 244 |
+
if not api_key:
|
| 245 |
+
raise ValueError("API token is not provided.")
|
| 246 |
+
|
| 247 |
+
# Initialize the OpenAI client
|
| 248 |
+
client = OpenAI(
|
| 249 |
+
base_url="https://api.sambanova.ai/v1/",
|
| 250 |
+
api_key=api_key,
|
| 251 |
+
)
|
| 252 |
+
|
| 253 |
+
try:
|
| 254 |
+
# Create the chat completion
|
| 255 |
+
completion = client.chat.completions.create(
|
| 256 |
+
model=model_name,
|
| 257 |
+
messages=final_messages,
|
| 258 |
+
stream=True,
|
| 259 |
+
)
|
| 260 |
+
response_text = ""
|
| 261 |
+
for chunk in completion:
|
| 262 |
+
delta = chunk.choices[0].delta.content or ""
|
| 263 |
+
response_text += delta
|
| 264 |
+
yield response_text
|
| 265 |
+
except Exception as e:
|
| 266 |
+
error_message = f"Error: {str(e)}"
|
| 267 |
+
yield error_message
|
| 268 |
+
|
| 269 |
+
# Create the ChatInterface
|
| 270 |
+
chat_interface = gr.ChatInterface(
|
| 271 |
+
fn=get_fn,
|
| 272 |
+
chatbot=gr.Chatbot(
|
| 273 |
+
label="Chatbot",
|
| 274 |
+
scale=1,
|
| 275 |
+
height=400,
|
| 276 |
+
autoscroll=True
|
| 277 |
+
),
|
| 278 |
+
additional_inputs=[paper_content, hf_token_input],
|
| 279 |
+
# examples=["What are the main findings of this paper?", "Explain the methodology used in this research."]
|
| 280 |
+
)
|
| 281 |
+
return chat_interface
|
requirements.txt
CHANGED
|
@@ -2,3 +2,17 @@ gradio==5.6.0
|
|
| 2 |
gradio_calendar
|
| 3 |
datasets
|
| 4 |
scholarly
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 2 |
gradio_calendar
|
| 3 |
datasets
|
| 4 |
scholarly
|
| 5 |
+
arxiv
|
| 6 |
+
PyPDF2
|
| 7 |
+
transformers
|
| 8 |
+
beautifulsoup4
|
| 9 |
+
# Set the primary index URL to PyTorch's CPU wheels
|
| 10 |
+
--index-url https://download.pytorch.org/whl/cpu
|
| 11 |
+
|
| 12 |
+
# Ensure PyPI is still accessible for other packages
|
| 13 |
+
--extra-index-url https://pypi.org/simple
|
| 14 |
+
|
| 15 |
+
# List all your packages
|
| 16 |
+
torch
|
| 17 |
+
torchvision
|
| 18 |
+
torchaudio
|
style.css
CHANGED
|
@@ -57,4 +57,66 @@ body a:hover {
|
|
| 57 |
height: 1.38rem;
|
| 58 |
overflow: hidden;
|
| 59 |
border-radius: 9999px;
|
| 60 |
-
}
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 57 |
height: 1.38rem;
|
| 58 |
overflow: hidden;
|
| 59 |
border-radius: 9999px;
|
| 60 |
+
}
|
| 61 |
+
|
| 62 |
+
|
| 63 |
+
/* CSS Variables for Button Styling */
|
| 64 |
+
:root {
|
| 65 |
+
/* Border and Padding */
|
| 66 |
+
--button-border-width: 0px;
|
| 67 |
+
--button-small-padding: 8px 12px; /* Example values */
|
| 68 |
+
--button-small-radius: 4px; /* Example values */
|
| 69 |
+
|
| 70 |
+
/* Colors */
|
| 71 |
+
--button-secondary-border-color: #e5e7eb; /* Example neutral-200 */
|
| 72 |
+
--button-secondary-background-fill: #f3f4f6; /* Example neutral-200 */
|
| 73 |
+
--button-secondary-background-fill-hover: #d1d5db; /* Example neutral-300 */
|
| 74 |
+
--button-secondary-text-color: #000000;
|
| 75 |
+
--button-secondary-text-color-hover: #000000;
|
| 76 |
+
|
| 77 |
+
/* Typography */
|
| 78 |
+
--button-small-text-size: 14px; /* Example text-sm */
|
| 79 |
+
--button-small-text-weight: 400;
|
| 80 |
+
|
| 81 |
+
/* Shadows and Transitions */
|
| 82 |
+
--button-secondary-shadow: none;
|
| 83 |
+
--button-secondary-shadow-hover: none;
|
| 84 |
+
--button-secondary-shadow-active: none;
|
| 85 |
+
--button-transition: all 0.2s ease;
|
| 86 |
+
}
|
| 87 |
+
|
| 88 |
+
/* Custom Button Styles */
|
| 89 |
+
#custom_button {
|
| 90 |
+
display: inline-flex;
|
| 91 |
+
align-items: center;
|
| 92 |
+
justify-content: center;
|
| 93 |
+
border: var(--button-border-width) solid var(--button-secondary-border-color);
|
| 94 |
+
background: var(--button-secondary-background-fill);
|
| 95 |
+
color: var(--button-secondary-text-color);
|
| 96 |
+
padding: var(--button-small-padding);
|
| 97 |
+
border-radius: var(--button-small-radius);
|
| 98 |
+
font-size: var(--button-small-text-size);
|
| 99 |
+
font-weight: var(--button-small-text-weight);
|
| 100 |
+
text-decoration: none;
|
| 101 |
+
box-shadow: var(--button-secondary-shadow);
|
| 102 |
+
transition: var(--button-transition);
|
| 103 |
+
}
|
| 104 |
+
|
| 105 |
+
#custom_button:hover {
|
| 106 |
+
background: var(--button-secondary-background-fill-hover);
|
| 107 |
+
border-color: var(--button-secondary-border-color-hover);
|
| 108 |
+
color: var(--button-secondary-text-color-hover);
|
| 109 |
+
box-shadow: var(--button-secondary-shadow-hover);
|
| 110 |
+
}
|
| 111 |
+
|
| 112 |
+
#custom_button:active {
|
| 113 |
+
box-shadow: var(--button-secondary-shadow-active);
|
| 114 |
+
}
|
| 115 |
+
|
| 116 |
+
/* Icon Styling */
|
| 117 |
+
#custom_button .button-icon {
|
| 118 |
+
margin-right: 8px; /* Adjust spacing between icon and text as needed */
|
| 119 |
+
width: 20px; /* Adjust icon size as needed */
|
| 120 |
+
height: 20px; /* Adjust icon size as needed */
|
| 121 |
+
}
|
| 122 |
+
|