VideoConverter / app.py
MegaTronX's picture
Update app.py
5379aa1 verified
raw
history blame
6.37 kB
import gradio as gr
from datasets import load_dataset
import subprocess
import os
import tempfile
import urllib.request
from pathlib import Path
def convert_ts_to_mp4(dataset_name, file_name, hf_token):
"""
Downloads a .ts video file from a Hugging Face dataset,
converts it to .mp4 using ffmpeg, and returns the path
to the .mp4 file. Handles both public and private datasets.
Args:
dataset_name (str): The name of the Hugging Face dataset.
file_name (str): The name of the .ts file within the dataset.
It should be just the filename, not the full path.
hf_token (str): The Hugging Face token. If None or empty,
it's assumed the dataset is public.
Returns:
str: The path to the converted .mp4 file, or None on error.
"""
try:
# 1. Load the dataset
if hf_token:
dataset = load_dataset(dataset_name, use_auth_token=hf_token, streaming=True)
else:
dataset = load_dataset(dataset_name, streaming=True)
# 2. Find the file. This part assumes the filename is unique
# within the dataset. For more complex datasets, you might
# need a more sophisticated search (e.g., iterating through
# splits and checking file metadata). This also assumes
# that the dataset provides the files in a way that we can
# access them directly.
file_url = None
for split in dataset.keys(): # Iterate through the splits
for example in dataset[split]:
if "file" in example and os.path.basename(example["file"]) == file_name:
file_url = example["file"]
print(file_url)
break
elif isinstance(example, dict): # Check for nested file paths.
for key, value in example.items():
if isinstance(value, str) and os.path.basename(value) == file_name:
file_url = value;
break
if file_url:
break
if not file_url:
return "Error: File not found in the dataset."
# 3. Download the .ts file to a temporary location
with tempfile.NamedTemporaryFile(suffix=".ts", delete=True) as ts_file:
# Use a simple download mechanism. For more robust
# downloading, especially with large files, consider
# using 'requests' with streaming.
try:
urllib.request.urlretrieve(file_url, ts_file.name)
except Exception as e:
return f"Error downloading file: {e}"
# 4. Convert the .ts file to .mp4 using ffmpeg in a temporary location
with tempfile.NamedTemporaryFile(suffix=".mp4", delete=False) as mp4_file:
try:
subprocess.run(
[
"ffmpeg",
"-i",
ts_file.name,
"-c:v",
"libx264", # Use libx264 for H.264 encoding (common)
"-c:a",
"aac", # Use AAC for audio encoding (common)
"-y", # Overwrite output file if it exists
mp4_file.name,
],
check=True, # Raise an exception on non-zero exit code
stdout=subprocess.PIPE,
stderr=subprocess.PIPE,
)
except subprocess.CalledProcessError as e:
# ffmpeg failed. Return the error message.
error_message = f"FFmpeg conversion failed: {e.stderr.decode('utf-8')}"
print(error_message) # Print to console for debugging in Spaces
return error_message
# 5. Return the path to the .mp4 file
return mp4_file.name
except Exception as e:
return f"An error occurred: {e}"
def gradio_interface():
"""
Defines the Gradio interface for the application.
"""
inputs = [
gr.Textbox(
label="Hugging Face Dataset Name",
placeholder="e.g., 'PolyAI/minds-14' or 'my-org/my-private-dataset'",
),
gr.Textbox(
label="TS File Name (within the dataset)",
placeholder="e.g., 'file_name.ts'",
),
gr.Textbox(
label="Hugging Face Token (for private datasets)",
placeholder="(Optional) Enter your Hugging Face token here, or set it as HF_TOKEN in Space settings",
type="password",
),
]
outputs = gr.File(label="Converted MP4 File") # Use gr.File for downloadable files
title = "TS to MP4 Converter"
description = (
"Convert .ts video files from Hugging Face datasets to .mp4 format. "
"Provide the dataset name and the name of the .ts file. The converted "
".mp4 file will be available for download. "
"For private datasets, you *must* provide a Hugging Face token, either directly in the input box, or, preferably, by setting the `HF_TOKEN` secret in your Space's settings."
)
# Example Usage (Corrected)
article = """
Example Usage:
1. For a public dataset like 'PolyAI/minds-14' and the file 'audio/en/common_voice_en_7722.ts',
enter 'PolyAI/minds-14' in the "Hugging Face Dataset Name" field and
'common_voice_en_7722.ts' in the "TS File Name" field. Leave the "Hugging Face Token" field empty.
2. For a private dataset, enter the dataset name (e.g., 'my-org/my-private-dataset')
and the .ts file name. Enter your Hugging Face token in the "Hugging Face Token" field
*or*, preferably, add your token as a secret named `HF_TOKEN` in your Space's settings.
3. Click the 'Submit' button.
4. The converted .mp4 file will be processed, and a download link will be provided.
"""
return gr.Interface(
fn=convert_ts_to_mp4,
inputs=inputs,
outputs=outputs,
title=title,
description=description,
article=article,
)
if __name__ == "__main__":
gradio_interface().launch()