Spaces:

VIDraft
/

ThinkFlow-llama

Running on Zero

App Files Files Community

ThinkFlow-llama / app.py

openfree

Update app.py

47cd8f1 verified 4 months ago

raw

history blame

11.1 kB

	import re
	import threading

	import gradio as gr
	import spaces
	import transformers
	from transformers import pipeline

	# 모델과 토크나이저 로딩
	model_name = "CohereForAI/c4ai-command-r7b-arabic-02-2025"
	if gr.NO_RELOAD:
	pipe = pipeline(
	"text-generation",
	model=model_name,
	device_map="auto",
	torch_dtype="auto",
	)

	# 최종 답변을 감지하기 위한 마커
	ANSWER_MARKER = "답변"

	# 단계별 추론을 시작하는 문장들
	rethink_prepends = [
	"자, 이제 다음을 파악해야 합니다 ",
	"제 생각에는 ",
	"잠시만요, 제 생각에는 ",
	"다음 사항이 맞는지 확인해 보겠습니다 ",
	"또한 기억해야 할 것은 ",
	"또 다른 주목할 점은 ",
	"그리고 저는 다음과 같은 사실도 기억합니다 ",
	"이제 충분히 이해했다고 생각합니다 ",
	"지금까지의 정보를 바탕으로, 원래 질문에 사용된 언어로 답변하겠습니다:"
	"\n{question}\n"
	f"\n{ANSWER_MARKER}\n",
	]


	# 수식 표시 문제 해결을 위한 설정
	latex_delimiters = [
	{"left": "$$", "right": "$$", "display": True},
	{"left": "$", "right": "$", "display": False},
	]


	def reformat_math(text):
	"""Gradio 구문(Katex)을 사용하도록 MathJax 구분 기호 수정.
	이것은 Gradio에서 수학 공식을 표시하기 위한 임시 해결책입니다. 현재로서는
	다른 latex_delimiters를 사용하여 예상대로 작동하게 하는 방법을 찾지 못했습니다...
	"""
	text = re.sub(r"\\\[\s(.?)\s*\\\]", r"$$\1$$", text, flags=re.DOTALL)
	text = re.sub(r"\\$\s(.?)\s*\\$", r"$\1$", text, flags=re.DOTALL)
	return text


	def user_input(message, history_original, history_thinking):
	"""사용자 입력을 히스토리에 추가하고 입력 텍스트 상자 비우기"""
	return "", history_original + [
	gr.ChatMessage(role="user", content=message.replace(ANSWER_MARKER, ""))
	], history_thinking + [
	gr.ChatMessage(role="user", content=message.replace(ANSWER_MARKER, ""))
	]


	def rebuild_messages(history: list):
	"""중간 생각 과정 없이 모델이 사용할 히스토리에서 메시지 재구성"""
	messages = []
	for h in history:
	if isinstance(h, dict) and not h.get("metadata", {}).get("title", False):
	messages.append(h)
	elif (
	isinstance(h, gr.ChatMessage)
	and h.metadata.get("title")
	and isinstance(h.content, str)
	):
	messages.append({"role": h.role, "content": h.content})
	return messages


	@spaces.GPU
	def bot_original(
	history: list,
	max_num_tokens: int,
	do_sample: bool,
	temperature: float,
	):
	"""원본 모델이 질문에 답변하도록 하기 (추론 과정 없이)"""

	# 나중에 스레드에서 토큰을 스트림으로 가져오기 위함
	streamer = transformers.TextIteratorStreamer(
	pipe.tokenizer, # pyright: ignore
	skip_special_tokens=True,
	skip_prompt=True,
	)

	# 보조자 메시지 준비
	history.append(
	gr.ChatMessage(
	role="assistant",
	content=str(""),
	)
	)

	# 현재 채팅에 표시될 메시지
	messages = rebuild_messages(history[:-1]) # 마지막 빈 메시지 제외

	# 원본 모델은 추론 없이 바로 답변
	t = threading.Thread(
	target=pipe,
	args=(messages,),
	kwargs=dict(
	max_new_tokens=max_num_tokens,
	streamer=streamer,
	do_sample=do_sample,
	temperature=temperature,
	),
	)
	t.start()

	for token in streamer:
	history[-1].content += token
	history[-1].content = reformat_math(history[-1].content)
	yield history
	t.join()

	yield history


	@spaces.GPU
	def bot_thinking(
	history: list,
	max_num_tokens: int,
	final_num_tokens: int,
	do_sample: bool,
	temperature: float,
	):
	"""추론 과정을 포함하여 모델이 질문에 답변하도록 하기"""

	# 나중에 스레드에서 토큰을 스트림으로 가져오기 위함
	streamer = transformers.TextIteratorStreamer(
	pipe.tokenizer, # pyright: ignore
	skip_special_tokens=True,
	skip_prompt=True,
	)

	# 필요한 경우 추론에 질문을 다시 삽입하기 위함
	question = history[-1]["content"]

	# 보조자 메시지 준비
	history.append(
	gr.ChatMessage(
	role="assistant",
	content=str(""),
	metadata={"title": "🧠 생각 중...", "status": "pending"},
	)
	)

	# 현재 채팅에 표시될 추론 과정
	messages = rebuild_messages(history)
	for i, prepend in enumerate(rethink_prepends):
	if i > 0:
	messages[-1]["content"] += "\n\n"
	messages[-1]["content"] += prepend.format(question=question)

	num_tokens = int(
	max_num_tokens if ANSWER_MARKER not in prepend else final_num_tokens
	)
	t = threading.Thread(
	target=pipe,
	args=(messages,),
	kwargs=dict(
	max_new_tokens=num_tokens,
	streamer=streamer,
	do_sample=do_sample,
	temperature=temperature,
	),
	)
	t.start()

	# 새 내용으로 히스토리 재구성
	history[-1].content += prepend.format(question=question)
	if ANSWER_MARKER in prepend:
	history[-1].metadata = {"title": "💭 사고 과정", "status": "done"}
	# 생각 종료, 이제 답변입니다 (중간 단계에 대한 메타데이터 없음)
	history.append(gr.ChatMessage(role="assistant", content=""))
	for token in streamer:
	history[-1].content += token
	history[-1].content = reformat_math(history[-1].content)
	yield history
	t.join()

	yield history


	with gr.Blocks(fill_height=True, title="Vidraft ThinkFlow") as demo:
	# 제목과 설명
	gr.Markdown("# Vidraft ThinkFlow")
	gr.Markdown("### 추론 기능이 없는 LLM 모델의 수정 없이도 추론 기능을 자동으로 적용하는 LLM 추론 생성 플랫폼")

	# 예제 섹션
	with gr.Accordion("EXAMPLES", open=False):
	gr.Markdown("""
	국가직 7급 시험 문제: 다음 글을 통해 추론한 것으로 적절하지 않은 것은?

	로컬푸드(local food)는 일차적으로 일정한 지역을 기준으로 해당 지역에서 생산되는 농식품을 의미한다. 로컬푸드를 물리적 거리로써 구체적으로 규정하는 경우 좁게는 반경 50 km, 넓게는 반경 100 km의 농촌 지역 내에서 생산되는 농식품을 지칭하곤 한다. 그렇다고 해서 로컬푸드가 이 정도의 물리적 거리나 농촌을 중심으로 한 지역사회의 농식품에 국한되는 것은 아니다. 일본은 행정구역을 중심으로 로컬푸드를 규정하는 경향이 있고, 미국의 경우 넓게는 반경 160 km 정도 내에서 생산되는 농식품으로까지 확대하기도 한다. 이는 생산․유통․소비에 있어서 건강성, 신뢰성, 친환경성 등이 유지될 수 있는 거리를 고려한 것이다.로컬푸드가 일정한 거리 이내에서 생산된 농식품을 의미하는 것이라면, 로컬푸드 운동은 친환경적이고 자립적이며 지속 가능한 먹거리를 생산․유통․소비하고자 하는 공동체적 노력을 일컫는다. 농업의 해체와 식품 안전성의 위기가 만나는 접점은 로컬푸드 운동이 발아하는 배경이 된다. 전통적인 농업은 관련 인구 감소, 농촌 경제 영세화, '종자에서 식탁까지' 지배하는 거대자본의 위협을 받고 있다. 농약의 과다 사용으로 인해 식품은 물론 자연환경이 위기에 처하게 되었다. 이러한 문제점에 대응하기 위해 친환경 먹거리 생산과 건강한 소비를 연결하고, 나아가 지역 정체성을 강화하는 등 대안적 공동체 운동으로 선순환시키려는 노력이 로컬푸드 운동으로 나타났다.

	① 로컬푸드의 범위는 경제적 요소를 고려해서 규정될 수 있다.
	② 식품 안전성에 주목하는 로컬푸드 운동은 환경보호 운동과도 밀접한 관련을 지닌다고 볼 수 있다.
	③ 지역적 정체성을 드러내는 하나의 전략으로 해당 지역에서 산출되는 로컬푸드를 활용할 수 있다.
	④ 지역 농가가 거대자본에 의존하여 생산과 소비를 연결하려는 시도는 로컬푸드 운동의 일환일 수 있다.
	""")

	with gr.Row(scale=1):
	with gr.Column(scale=2):
	gr.Markdown("## Before (Original)")
	chatbot_original = gr.Chatbot(
	scale=1,
	type="messages",
	latex_delimiters=latex_delimiters,
	label="Original Model (No Reasoning)"
	)

	with gr.Column(scale=2):
	gr.Markdown("## After (Thinking)")
	chatbot_thinking = gr.Chatbot(
	scale=1,
	type="messages",
	latex_delimiters=latex_delimiters,
	label="Model with Reasoning"
	)

	with gr.Row():
	msg = gr.Textbox(
	submit_btn=True,
	label="",
	show_label=False,
	placeholder="여기에 질문을 입력하세요.",
	autofocus=True,
	)

	with gr.Row():
	with gr.Column():
	gr.Markdown("""## 매개변수 조정""")
	num_tokens = gr.Slider(
	50,
	4000,
	2000,
	step=1,
	label="추론 단계당 최대 토큰 수",
	interactive=True,
	)
	final_num_tokens = gr.Slider(
	50,
	4000,
	2000,
	step=1,
	label="최종 답변의 최대 토큰 수",
	interactive=True,
	)
	do_sample = gr.Checkbox(True, label="샘플링 사용")
	temperature = gr.Slider(0.1, 1.0, 0.7, step=0.1, label="온도")

	# 사용자가 메시지를 제출하면 두 봇이 동시에 응답합니다
	msg.submit(
	user_input,
	[msg, chatbot_original, chatbot_thinking], # 입력
	[msg, chatbot_original, chatbot_thinking], # 출력
	).then(
	bot_original,
	[
	chatbot_original,
	num_tokens,
	do_sample,
	temperature,
	],
	chatbot_original, # 출력에서 새 히스토리 저장
	).then(
	bot_thinking,
	[
	chatbot_thinking,
	num_tokens,
	final_num_tokens,
	do_sample,
	temperature,
	],
	chatbot_thinking, # 출력에서 새 히스토리 저장
	)

	if __name__ == "__main__":
	demo.queue().launch()