MegaTronX commited on
Commit
c2e8a2e
·
verified ·
1 Parent(s): 00069f3

Create app.py

Browse files
Files changed (1) hide show
  1. app.py +137 -0
app.py ADDED
@@ -0,0 +1,137 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ import gradio as gr
2
+ from datasets import load_dataset
3
+ import subprocess
4
+ import os
5
+ import tempfile
6
+
7
+
8
+ def convert_ts_to_mp4(dataset_name, file_name):
9
+ """
10
+ Downloads a .ts video file from a Hugging Face dataset,
11
+ converts it to .mp4 using ffmpeg, and returns the path
12
+ to the .mp4 file.
13
+
14
+ Args:
15
+ dataset_name (str): The name of the Hugging Face dataset.
16
+ file_name (str): The name of the .ts file within the dataset.
17
+ It should be just the filename, not the full path.
18
+
19
+ Returns:
20
+ str: The path to the converted .mp4 file, or None on error.
21
+ """
22
+ try:
23
+ # 1. Load the dataset
24
+ dataset = load_dataset(dataset_name, streaming=True)
25
+
26
+ # 2. Find the file. This part assumes the filename is unique
27
+ # within the dataset. For more complex datasets, you might
28
+ # need a more sophisticated search (e.g., iterating through
29
+ # splits and checking file metadata). This also assumes
30
+ # that the dataset provides the files in a way that we can
31
+ # access them directly.
32
+ file_url = None
33
+ for split in dataset.keys(): # Iterate through the splits
34
+ for example in dataset[split]:
35
+ if "file" in example and os.path.basename(example["file"]) == file_name:
36
+ file_url = example["file"]
37
+ break
38
+ elif isinstance(example, dict): # Check for nested file paths.
39
+ for key, value in example.items():
40
+ if isinstance(value, str) and os.path.basename(value) == file_name:
41
+ file_url = value;
42
+ break
43
+ if file_url:
44
+ break
45
+
46
+ if not file_url:
47
+ return "Error: File not found in the dataset."
48
+
49
+ # 3. Download the .ts file to a temporary location
50
+ with tempfile.NamedTemporaryFile(suffix=".ts", delete=True) as ts_file:
51
+ # Use a simple download mechanism. For more robust
52
+ # downloading, especially with large files, consider
53
+ # using 'requests' with streaming.
54
+ try:
55
+ import urllib.request
56
+ urllib.request.urlretrieve(file_url, ts_file.name)
57
+ except Exception as e:
58
+ return f"Error downloading file: {e}"
59
+
60
+ # 4. Convert the .ts file to .mp4 using ffmpeg in a temporary location
61
+ with tempfile.NamedTemporaryFile(suffix=".mp4", delete=False) as mp4_file:
62
+ try:
63
+ subprocess.run(
64
+ [
65
+ "ffmpeg",
66
+ "-i",
67
+ ts_file.name,
68
+ "-c:v",
69
+ "libx264", # Use libx264 for H.264 encoding (common)
70
+ "-c:a",
71
+ "aac", # Use AAC for audio encoding (common)
72
+ "-y", # Overwrite output file if it exists
73
+ mp4_file.name,
74
+ ],
75
+ check=True, # Raise an exception on non-zero exit code
76
+ stdout=subprocess.PIPE,
77
+ stderr=subprocess.PIPE,
78
+ )
79
+ except subprocess.CalledProcessError as e:
80
+ # ffmpeg failed. Return the error message.
81
+ error_message = f"FFmpeg conversion failed: {e.stderr.decode('utf-8')}"
82
+ print(error_message) # Print to console for debugging in Spaces
83
+ return error_message
84
+
85
+ # 5. Return the path to the .mp4 file
86
+ return mp4_file.name
87
+
88
+ except Exception as e:
89
+ return f"An error occurred: {e}"
90
+
91
+
92
+ def gradio_interface():
93
+ """
94
+ Defines the Gradio interface for the application.
95
+ """
96
+ inputs = [
97
+ gr.Textbox(
98
+ label="Hugging Face Dataset Name",
99
+ placeholder="e.g., 'PolyAI/minds-14'",
100
+ ),
101
+ gr.Textbox(
102
+ label="TS File Name (within the dataset)",
103
+ placeholder="e.g., 'file_name.ts'",
104
+ ),
105
+ ]
106
+ outputs = gr.File(label="Converted MP4 File") # Use gr.File for downloadable files
107
+
108
+ title = "TS to MP4 Converter"
109
+ description = (
110
+ "Convert .ts video files from Hugging Face datasets to .mp4 format. "
111
+ "Provide the dataset name and the name of the .ts file. The converted "
112
+ ".mp4 file will be available for download."
113
+ )
114
+
115
+ # Example Usage (Corrected)
116
+ article = """
117
+ Example Usage:
118
+
119
+ 1. For the 'PolyAI/minds-14' dataset and the file 'audio/en/common_voice_en_7722.ts',
120
+ enter 'PolyAI/minds-14' in the \"Hugging Face Dataset Name\" field and
121
+ 'common_voice_en_7722.ts' in the \"TS File Name\" field (note: the example dataset in the original prompt did not contain .ts files, so I've provided a placeholder. You'll need to adapt this to a dataset that actually *does* have .ts files).
122
+ 2. Click the 'Submit' button.
123
+ 3. The converted .mp4 file will be processed, and a download link will be provided.
124
+ """
125
+
126
+ return gr.Interface(
127
+ fn=convert_ts_to_mp4,
128
+ inputs=inputs,
129
+ outputs=outputs,
130
+ title=title,
131
+ description=description,
132
+ article=article,
133
+ )
134
+
135
+
136
+ if __name__ == "__main__":
137
+ gradio_interface().launch()