Call_model / app.py
disLodge's picture
fx
a87b35b verified
raw
history blame
4.11 kB
import gradio as gr
import requests
from pdfminer.high_level import extract_text
from langchain_community.vectorstores import Chroma
from langchain_huggingface import HuggingFaceEmbeddings, ChatHuggingFace
from langchain_core.runnables import RunnablePassthrough, Runnable
from io import BytesIO
from langchain_openai import ChatOpenAI
from langchain_core.output_parsers import StrOutputParser
from langchain_core.documents import Document
from langchain_core.prompts import ChatPromptTemplate
from langchain.text_splitter import CharacterTextSplitter
from huggingface_hub import InferenceClient
import time
from tenacity import retry, stop_after_attempt, wait_exponential, retry_if_exception_type
import logging
import os
# lo = "hf_JyAJApaXhIrONPFSIo"
# ve = "wbnJbrXViYurrsvP"
last_call_time = 0
OPENAI_API_KEY = os.getenv("OPENAI_API_KEY", "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")
def extract_pdf_text(url: str) -> str:
response = requests.get(url)
pdf_file = BytesIO(response.content)
text = extract_text(pdf_file)
return text
pdf_url = "https://arxiv.org/pdf/2408.09869"
text = extract_pdf_text(pdf_url)
docs_list = [Document(page_content=text)]
text_splitter = CharacterTextSplitter.from_tiktoken_encoder(chunk_size=7500, chunk_overlap=100)
docs_splits = text_splitter.split_documents(docs_list)
embeddings = HuggingFaceEmbeddings(model_name="sentence-transformers/all-MiniLM-L6-v2")
vectorstore = Chroma.from_documents(
documents=docs_splits,
collection_name="rag-chroma",
embedding=embeddings,
)
retriever = vectorstore.as_retriever()
llm = ChatOpenAI(
model="gpt-3.5-turbo",
api_key=OPENAI_API_KEY,
max_tokens=512,
temperature=0.7,
top_p=0.95
)
# After RAG chain
after_rag_template = """You are a {role}. Summarize the following content for yourself and speak in terms of first person.
Only include content relevant to that role like a resume summary.
Context:
{context}
Question: Give a one paragraph summary of the key skills a {role} can have from this document.
"""
after_rag_prompt = ChatPromptTemplate.from_template(after_rag_template)
def format_query(input_dict):
return f"Give a one paragraph summary of the key skills a {input_dict['role']} can have from this document."
after_rag_chain = (
{
"context": format_query | retriever,
"role": lambda x: x["role"],
}
| after_rag_prompt
| llm
| StrOutputParser()
)
def process_query(role, system_message, max_tokens, temperature, top_p):
global last_call_time
current_time = time.time()
if current_time - last_call_time < 60:
wait_time = int(60 - (current_time - last_call_time))
return f"Rate limit exceeded. Please wait {wait_time} seconds before trying again."
# llm.update_params(max_tokens, temperature, top_p)
last_call_time = current_time
llm.max_tokens = max_tokens
llm.temperature = temperature
llm.top_p = top_p
# After RAG
after_rag_result = after_rag_chain.invoke({"role": role})
return f"**RAG Summary**\n{after_rag_result}"
with gr.Blocks() as demo:
gr.Markdown("## Zephyr Chatbot Controls")
role_dropdown = gr.Dropdown(choices=["SDE", "BA"], label="Select Role", value="SDE")
system_message = gr.Textbox(value="You are a friendly chatbot.", label="System message")
max_tokens = gr.Slider(1, 2048, value=512, label="Max tokens")
temperature = gr.Slider(0.1, 4.0, value=0.7, label="Temperature", step=0.1)
top_p = gr.Slider(0.1, 1.0, value=0.95, label="Top-p", step=0.05)
output = gr.Textbox(label="Output", lines=20)
submit_btn = gr.Button("Submit")
clear_btn = gr.Button("Clear")
submit_btn.click(
fn=process_query,
inputs=[role_dropdown, system_message, max_tokens, temperature, top_p],
outputs=output
)
clear_btn.click(
fn=lambda: ("", gr.Info("Chat cleared!")),
outputs=[output]
)
if __name__ == "__main__":
demo.launch()