Spaces:
Paused
Paused
File size: 6,367 Bytes
c2e8a2e 5379aa1 c2e8a2e be74542 c2e8a2e be74542 c2e8a2e be74542 c2e8a2e be74542 c2e8a2e 5379aa1 c2e8a2e be74542 c2e8a2e be74542 c2e8a2e be74542 c2e8a2e be74542 c2e8a2e be74542 c2e8a2e be74542 c2e8a2e be74542 c2e8a2e be74542 c2e8a2e |
1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20 21 22 23 24 25 26 27 28 29 30 31 32 33 34 35 36 37 38 39 40 41 42 43 44 45 46 47 48 49 50 51 52 53 54 55 56 57 58 59 60 61 62 63 64 65 66 67 68 69 70 71 72 73 74 75 76 77 78 79 80 81 82 83 84 85 86 87 88 89 90 91 92 93 94 95 96 97 98 99 100 101 102 103 104 105 106 107 108 109 110 111 112 113 114 115 116 117 118 119 120 121 122 123 124 125 126 127 128 129 130 131 132 133 134 135 136 137 138 139 140 141 142 143 144 145 146 147 148 149 150 151 152 153 154 155 |
import gradio as gr
from datasets import load_dataset
import subprocess
import os
import tempfile
import urllib.request
from pathlib import Path
def convert_ts_to_mp4(dataset_name, file_name, hf_token):
"""
Downloads a .ts video file from a Hugging Face dataset,
converts it to .mp4 using ffmpeg, and returns the path
to the .mp4 file. Handles both public and private datasets.
Args:
dataset_name (str): The name of the Hugging Face dataset.
file_name (str): The name of the .ts file within the dataset.
It should be just the filename, not the full path.
hf_token (str): The Hugging Face token. If None or empty,
it's assumed the dataset is public.
Returns:
str: The path to the converted .mp4 file, or None on error.
"""
try:
# 1. Load the dataset
if hf_token:
dataset = load_dataset(dataset_name, use_auth_token=hf_token, streaming=True)
else:
dataset = load_dataset(dataset_name, streaming=True)
# 2. Find the file. This part assumes the filename is unique
# within the dataset. For more complex datasets, you might
# need a more sophisticated search (e.g., iterating through
# splits and checking file metadata). This also assumes
# that the dataset provides the files in a way that we can
# access them directly.
file_url = None
for split in dataset.keys(): # Iterate through the splits
for example in dataset[split]:
if "file" in example and os.path.basename(example["file"]) == file_name:
file_url = example["file"]
print(file_url)
break
elif isinstance(example, dict): # Check for nested file paths.
for key, value in example.items():
if isinstance(value, str) and os.path.basename(value) == file_name:
file_url = value;
break
if file_url:
break
if not file_url:
return "Error: File not found in the dataset."
# 3. Download the .ts file to a temporary location
with tempfile.NamedTemporaryFile(suffix=".ts", delete=True) as ts_file:
# Use a simple download mechanism. For more robust
# downloading, especially with large files, consider
# using 'requests' with streaming.
try:
urllib.request.urlretrieve(file_url, ts_file.name)
except Exception as e:
return f"Error downloading file: {e}"
# 4. Convert the .ts file to .mp4 using ffmpeg in a temporary location
with tempfile.NamedTemporaryFile(suffix=".mp4", delete=False) as mp4_file:
try:
subprocess.run(
[
"ffmpeg",
"-i",
ts_file.name,
"-c:v",
"libx264", # Use libx264 for H.264 encoding (common)
"-c:a",
"aac", # Use AAC for audio encoding (common)
"-y", # Overwrite output file if it exists
mp4_file.name,
],
check=True, # Raise an exception on non-zero exit code
stdout=subprocess.PIPE,
stderr=subprocess.PIPE,
)
except subprocess.CalledProcessError as e:
# ffmpeg failed. Return the error message.
error_message = f"FFmpeg conversion failed: {e.stderr.decode('utf-8')}"
print(error_message) # Print to console for debugging in Spaces
return error_message
# 5. Return the path to the .mp4 file
return mp4_file.name
except Exception as e:
return f"An error occurred: {e}"
def gradio_interface():
"""
Defines the Gradio interface for the application.
"""
inputs = [
gr.Textbox(
label="Hugging Face Dataset Name",
placeholder="e.g., 'PolyAI/minds-14' or 'my-org/my-private-dataset'",
),
gr.Textbox(
label="TS File Name (within the dataset)",
placeholder="e.g., 'file_name.ts'",
),
gr.Textbox(
label="Hugging Face Token (for private datasets)",
placeholder="(Optional) Enter your Hugging Face token here, or set it as HF_TOKEN in Space settings",
type="password",
),
]
outputs = gr.File(label="Converted MP4 File") # Use gr.File for downloadable files
title = "TS to MP4 Converter"
description = (
"Convert .ts video files from Hugging Face datasets to .mp4 format. "
"Provide the dataset name and the name of the .ts file. The converted "
".mp4 file will be available for download. "
"For private datasets, you *must* provide a Hugging Face token, either directly in the input box, or, preferably, by setting the `HF_TOKEN` secret in your Space's settings."
)
# Example Usage (Corrected)
article = """
Example Usage:
1. For a public dataset like 'PolyAI/minds-14' and the file 'audio/en/common_voice_en_7722.ts',
enter 'PolyAI/minds-14' in the "Hugging Face Dataset Name" field and
'common_voice_en_7722.ts' in the "TS File Name" field. Leave the "Hugging Face Token" field empty.
2. For a private dataset, enter the dataset name (e.g., 'my-org/my-private-dataset')
and the .ts file name. Enter your Hugging Face token in the "Hugging Face Token" field
*or*, preferably, add your token as a secret named `HF_TOKEN` in your Space's settings.
3. Click the 'Submit' button.
4. The converted .mp4 file will be processed, and a download link will be provided.
"""
return gr.Interface(
fn=convert_ts_to_mp4,
inputs=inputs,
outputs=outputs,
title=title,
description=description,
article=article,
)
if __name__ == "__main__":
gradio_interface().launch()
|