PDF-RAG

Running

App Files Files Community

PDF-RAG / app.py

aiqtech

Update app.py

87dacef verified 5 months ago

raw

history blame

6.82 kB

	import os
	from typing import List
	from chainlit.types import AskFileResponse
	from aimakerspace.text_utils import CharacterTextSplitter, TextFileLoader, PDFLoader
	from aimakerspace.openai_utils.prompts import (
	UserRolePrompt,
	SystemRolePrompt,
	AssistantRolePrompt,
	)
	from aimakerspace.openai_utils.embedding import EmbeddingModel
	from aimakerspace.vectordatabase import VectorDatabase
	from aimakerspace.openai_utils.chatmodel import ChatOpenAI
	import chainlit as cl
	from chainlit import user_session
	from chainlit.element import Text

	system_template = """\
	Use the following context to answer a users question. If you cannot find the answer in the context, say you don't know the answer."""
	system_role_prompt = SystemRolePrompt(system_template)

	user_prompt_template = """\
	Context:
	{context}

	Question:
	{question}
	"""
	user_role_prompt = UserRolePrompt(user_prompt_template)

	@cl.on_chat_start
	async def init_sidebar():
	# 사이드바 헤더 꾸미기
	await cl.Sidebar(
	cl.Text(content="📁 파일 업로드 섹션", style="heading3"),
	cl.FilePicker(
	accept=[".pdf", ".txt"],
	max_size_mb=2,
	on_upload=handle_upload,
	label="📤 PDF/TXT 업로드",
	description="최대 2MB 파일만 업로드 가능합니다"
	),
	cl.Separator(),
	cl.Text(content="🔍 문서 분석 상태", style="heading4"),
	cl.ProgressRing(id="progress", visible=False),
	cl.Text(id="status", content="대기 중...", style="caption"),
	title="📚 문서 질의 시스템",
	persistent=True # 👈 사이드바 고정 설정
	).send()


	async def handle_upload(file: AskFileResponse):
	# 진행 상태 업데이트
	status = user_session.get("status")
	progress = user_session.get("progress")

	await status.update(content=f"🔍 {file.name} 분석 중...")
	await progress.update(visible=True)

	try:
	# 파일 처리 로직
	texts = process_file(file)

	# 벡터 DB 구축
	vector_db = VectorDatabase()
	vector_db = await vector_db.abuild_from_list(texts)

	# 세션에 저장
	user_session.set("vector_db", vector_db)

	# 상태 업데이트
	await status.update(content=f"✅ {len(texts)}개 청크 처리 완료!")
	await progress.update(visible=False)

	# 파일 정보 요약 표시
	await cl.Accordion(
	title="📄 업로드 문서 정보",
	content=[
	cl.Text(f"파일명: {file.name}"),
	cl.Text(f"크기: {file.size/1024:.1f}KB"),
	cl.Text(f"분석 시간: {datetime.now().strftime('%H:%M:%S')}")
	],
	expanded=False
	).send()

	except Exception as e:
	await cl.Error(
	title="파일 처리 오류",
	content=f"{str(e)}"
	).send()

	class RetrievalAugmentedQAPipeline:
	def __init__(self, llm: ChatOpenAI(), vector_db_retriever: VectorDatabase) -> None:
	self.llm = llm
	self.vector_db_retriever = vector_db_retriever

	async def arun_pipeline(self, user_query: str):
	context_list = self.vector_db_retriever.search_by_text(user_query, k=4)

	context_prompt = ""
	for context in context_list:
	context_prompt += context[0] + "\n"

	formatted_system_prompt = system_role_prompt.create_message()

	formatted_user_prompt = user_role_prompt.create_message(question=user_query, context=context_prompt)

	async def generate_response():
	async for chunk in self.llm.astream([formatted_system_prompt, formatted_user_prompt]):
	yield chunk

	return {"response": generate_response(), "context": context_list}

	text_splitter = CharacterTextSplitter()


	def process_file(file: AskFileResponse):
	import tempfile
	import shutil

	print(f"Processing file: {file.name}")

	# Create a temporary file with the correct extension
	suffix = f".{file.name.split('.')[-1]}"
	with tempfile.NamedTemporaryFile(delete=False, suffix=suffix) as temp_file:
	# Copy the uploaded file content to the temporary file
	shutil.copyfile(file.path, temp_file.name)
	print(f"Created temporary file at: {temp_file.name}")

	# Create appropriate loader
	if file.name.lower().endswith('.pdf'):
	loader = PDFLoader(temp_file.name)
	else:
	loader = TextFileLoader(temp_file.name)

	try:
	# Load and process the documents
	documents = loader.load_documents()
	texts = text_splitter.split_texts(documents)
	return texts
	finally:
	# Clean up the temporary file
	try:
	os.unlink(temp_file.name)
	except Exception as e:
	print(f"Error cleaning up temporary file: {e}")


	@cl.on_chat_start
	async def on_chat_start():
	files = None

	# Wait for the user to upload a file
	while files == None:
	files = await cl.AskFileMessage(
	content="Please upload a Text or PDF file to begin!",
	accept=["text/plain", "application/pdf"],
	max_size_mb=2,
	timeout=180,
	).send()

	file = files[0]

	msg = cl.Message(
	content=f"Processing `{file.name}`..."
	)
	await msg.send()

	# load the file
	texts = process_file(file)

	print(f"Processing {len(texts)} text chunks")

	# Create a dict vector store
	vector_db = VectorDatabase()
	vector_db = await vector_db.abuild_from_list(texts)

	chat_openai = ChatOpenAI()

	# Create a chain
	retrieval_augmented_qa_pipeline = RetrievalAugmentedQAPipeline(
	vector_db_retriever=vector_db,
	llm=chat_openai
	)

	# Let the user know that the system is ready
	msg.content = f"Processing `{file.name}` done. You can now ask questions!"
	await msg.update()

	cl.user_session.set("chain", retrieval_augmented_qa_pipeline)


	@cl.on_message
	async def main(message):
	chain = cl.user_session.get("chain")




	# 응답 스타일 개선
	msg = cl.Message(
	content="",
	actions=[
	cl.Action(name="source", value="📑 소스 보기"),
	cl.Action(name="feedback", value="💬 피드백 남기기")
	]
	)

	async for token in result["response"]:
	await msg.stream_token(token, is_final=False)

	# 최종 메시지 포맷팅
	final_content = f"""
	🧠 AI 분석 결과
	{msg.content}

	📌 참조 문장:
	{chr(10).join([f'- {ctx[0][:50]}...' for ctx in result['context']])}
	"""
	await msg.update(content=final_content)