Spaces:

leetuan023
/

videoocr2

Configuration error

App Files Files Community

videoocr2 / app.py

leetuan023

Create app.py

a23edc5 verified 10 months ago

raw

history blame contribute delete

4.03 kB

	import cv2
	import pytesseract
	import gradio as gr
	from datetime import timedelta
	import os
	import tempfile

	# Đặt đường dẫn Tesseract nếu cần thiết (cho Windows)
	# pytesseract.pytesseract.tesseract_cmd = r'C:\Program Files\Tesseract-OCR\tesseract.exe'

	# Hàm nhận diện văn bản từ khung hình với Tesseract OCR
	def tesseract_ocr(frame):
	"""
	Sử dụng Tesseract OCR để nhận diện văn bản từ hình ảnh (frame).
	"""
	# Chuyển đổi frame sang ảnh đen trắng để tăng độ chính xác
	gray_frame = cv2.cvtColor(frame, cv2.COLOR_BGR2GRAY)
	# Nhận diện văn bản từ khung hình
	text = pytesseract.image_to_string(gray_frame, lang='vie')
	return text.strip()

	def extract_frames(video_path, interval=1):
	"""
	Tách khung hình từ video theo khoảng thời gian (mỗi giây 1 frame).
	"""
	video_capture = cv2.VideoCapture(video_path)
	fps = video_capture.get(cv2.CAP_PROP_FPS) # Lấy số khung hình trên giây
	frames = []
	count = 0

	while True:
	success, frame = video_capture.read()
	if not success:
	break

	if count % int(fps * interval) == 0:
	frames.append((frame, count / fps)) # frame và thời gian hiện tại trong video

	count += 1

	video_capture.release()
	return frames

	# Hàm chuyển đổi thời gian chính xác
	def convert_time(seconds):
	"""
	Chuyển đổi giây thành định dạng giờ:phút:giây,millisecond cho file .srt.
	"""
	milliseconds = int((seconds - int(seconds)) * 1000)
	delta = timedelta(seconds=int(seconds))
	return f"{str(delta)}.{milliseconds:03d}".replace('.', ',')

	def create_srt(transcriptions, srt_file_path):
	"""
	Tạo file SRT từ danh sách các đoạn text và thời gian.
	"""
	with open(srt_file_path, 'w', encoding='utf-8') as srt_file:
	for idx, (text, start_time, end_time) in enumerate(transcriptions, 1):
	srt_file.write(f"{idx}\n")
	srt_file.write(f"{convert_time(start_time)} --> {convert_time(end_time)}\n")
	srt_file.write(f"{text}\n\n")

	def process_video(video_path, frame_interval=1):
	"""
	Xử lý video và tạo file SRT.
	"""
	frames = extract_frames(video_path, interval=frame_interval)
	transcriptions = []
	last_text = None
	last_timestamp = None

	for idx, (frame, timestamp) in enumerate(frames):
	# Nhận diện văn bản từ khung hình với Tesseract OCR
	current_text = tesseract_ocr(frame)

	# Loại bỏ dòng trùng lặp
	if current_text != last_text:
	if last_text is not None:
	# Cập nhật end_time cho đoạn trước đó
	transcriptions[-1] = (last_text, last_timestamp, timestamp)

	# Ghi lại đoạn mới với start_time
	transcriptions.append((current_text, timestamp, timestamp + frame_interval))
	last_text = current_text
	last_timestamp = timestamp

	# Cập nhật thời gian kết thúc cho đoạn cuối cùng
	if last_text is not None:
	transcriptions[-1] = (last_text, last_timestamp, timestamp)

	# Tạo file SRT tạm thời và lưu kết quả
	with tempfile.NamedTemporaryFile(delete=False, suffix=".srt") as temp_srt_file:
	srt_output_path = temp_srt_file.name
	create_srt(transcriptions, srt_output_path)

	return srt_output_path

	# Tạo giao diện với Gradio
	interface = gr.Interface(
	fn=process_video, # Hàm xử lý video và trả về SRT
	inputs=gr.Video(), # Video input từ người dùng
	outputs=gr.File(label="Download .srt file"), # File .srt xuất ra
	title="Video to SRT with Tesseract OCR", # Tiêu đề ứng dụng
	description="Tải video lên để chuyển thành file SRT sử dụng Tesseract OCR."
	)

	# Chạy ứng dụng
	interface.launch()