badassgi

Running

App Files Files Community

badassgi / app.py

openfree

Update app.py

d82704d verified 10 months ago

raw

history blame

3.92 kB

	import spaces
	import torch
	import gradio as gr
	from transformers import pipeline
	from huggingface_hub import InferenceClient
	import os

	MODEL_NAME = "openai/whisper-large-v3-turbo"
	BATCH_SIZE = 8
	FILE_LIMIT_MB = 1000

	device = 0 if torch.cuda.is_available() else "cpu"

	# Whisper 파이프라인 초기화
	pipe = pipeline(
	task="automatic-speech-recognition",
	model=MODEL_NAME,
	chunk_length_s=30,
	device=device,
	)

	# Hugging Face 추론 클라이언트 설정
	hf_client = InferenceClient(
	"CohereForAI/c4ai-command-r-plus-08-2024",
	token=os.getenv("HF_TOKEN")
	)

	@spaces.GPU
	def transcribe_summarize(audio_input, task):
	if audio_input is None:
	raise gr.Error("오디오 파일이 제출되지 않았습니다!")

	# 음성을 텍스트로 변환
	result = pipe(
	audio_input,
	batch_size=BATCH_SIZE,
	generate_kwargs={"task": task},
	return_timestamps=True
	)
	transcribed_text = result["text"]

	# 텍스트 요약 (수정된 부분)
	try:
	# 요약용 프롬프트 생성
	prompt = f"""아래 텍스트를 간단히 요약해주세요:

	텍스트: {transcribed_text}

	요약:"""

	# API 호출
	response = hf_client.text_generation(
	model="CohereForAI/c4ai-command-r-plus-08-2024",
	prompt=prompt,
	max_new_tokens=150,
	temperature=0.3,
	top_p=0.9,
	repetition_penalty=1.2,
	stop_sequences=["\n", "텍스트:", "요약:"]
	)

	# API 응답 처리 (수정된 부분)
	if isinstance(response, str):
	summary_text = response
	else:
	summary_text = response.generated_text if hasattr(response, 'generated_text') else str(response)

	# 프롬프트 부분 제거
	if "요약:" in summary_text:
	summary_text = summary_text.split("요약:")[1].strip()

	if not summary_text:
	summary_text = "요약을 생성할 수 없습니다."

	except Exception as e:
	print(f"요약 생성 중 오류 발생: {str(e)}") # 디버깅용 로그
	summary_text = "요약을 생성할 수 없습니다. 잠시 후 다시 시도해주세요."

	print(f"변환된 텍스트: {transcribed_text}") # 디버깅용 로그
	print(f"생성된 요약: {summary_text}") # 디버깅용 로그

	return [transcribed_text, summary_text]

	# CSS 스타일
	css = """
	footer { visibility: hidden; }
	"""

	# 파일 업로드 인터페이스
	file_transcribe = gr.Interface(
	fn=transcribe_summarize,
	inputs=[
	gr.Audio(sources="upload", type="filepath", label="오디오 파일"),
	gr.Radio(
	choices=["transcribe", "translate"],
	label="작업",
	value="transcribe"
	),
	],
	outputs=[
	gr.Textbox(label="변환된 텍스트", lines=5),
	gr.Textbox(label="요약", lines=3)
	],
	title="받아쓰기 AI: 음성을 텍스트로 변환하고 요약하기",
	flagging_mode="never"
	)

	# 마이크 녹음 인터페이스
	mic_transcribe = gr.Interface(
	fn=transcribe_summarize,
	inputs=[
	gr.Audio(sources="microphone", type="filepath"),
	gr.Radio(
	choices=["transcribe", "translate"],
	label="작업",
	value="transcribe"
	),
	],
	outputs=[
	gr.Textbox(label="변환된 텍스트", lines=5),
	gr.Textbox(label="요약", lines=3)
	],
	title="받아쓰기 AI: 음성을 텍스트로 변환하고 요약하기",
	flagging_mode="never",
	css=css
	)

	# 메인 애플리케이션
	demo = gr.Blocks(theme="Nymbo/Nymbo_Theme",css=css)
	with demo:
	gr.TabbedInterface(
	[file_transcribe, mic_transcribe],
	["오디오 파일", "마이크"]
	)

	# 애플리케이션 실행
	demo.queue().launch(ssr_mode=False)