MegaTronX commited on
Commit
be74542
·
verified ·
1 Parent(s): c2e8a2e

Update app.py

Browse files
Files changed (1) hide show
  1. app.py +28 -13
app.py CHANGED
@@ -4,24 +4,28 @@ import subprocess
4
  import os
5
  import tempfile
6
 
7
-
8
- def convert_ts_to_mp4(dataset_name, file_name):
9
  """
10
  Downloads a .ts video file from a Hugging Face dataset,
11
  converts it to .mp4 using ffmpeg, and returns the path
12
- to the .mp4 file.
13
 
14
  Args:
15
  dataset_name (str): The name of the Hugging Face dataset.
16
  file_name (str): The name of the .ts file within the dataset.
17
  It should be just the filename, not the full path.
 
 
18
 
19
  Returns:
20
  str: The path to the converted .mp4 file, or None on error.
21
  """
22
  try:
23
  # 1. Load the dataset
24
- dataset = load_dataset(dataset_name, streaming=True)
 
 
 
25
 
26
  # 2. Find the file. This part assumes the filename is unique
27
  # within the dataset. For more complex datasets, you might
@@ -35,7 +39,7 @@ def convert_ts_to_mp4(dataset_name, file_name):
35
  if "file" in example and os.path.basename(example["file"]) == file_name:
36
  file_url = example["file"]
37
  break
38
- elif isinstance(example, dict): # Check for nested file paths.
39
  for key, value in example.items():
40
  if isinstance(value, str) and os.path.basename(value) == file_name:
41
  file_url = value;
@@ -68,7 +72,7 @@ def convert_ts_to_mp4(dataset_name, file_name):
68
  "-c:v",
69
  "libx264", # Use libx264 for H.264 encoding (common)
70
  "-c:a",
71
- "aac", # Use AAC for audio encoding (common)
72
  "-y", # Overwrite output file if it exists
73
  mp4_file.name,
74
  ],
@@ -89,6 +93,7 @@ def convert_ts_to_mp4(dataset_name, file_name):
89
  return f"An error occurred: {e}"
90
 
91
 
 
92
  def gradio_interface():
93
  """
94
  Defines the Gradio interface for the application.
@@ -96,12 +101,17 @@ def gradio_interface():
96
  inputs = [
97
  gr.Textbox(
98
  label="Hugging Face Dataset Name",
99
- placeholder="e.g., 'PolyAI/minds-14'",
100
  ),
101
  gr.Textbox(
102
  label="TS File Name (within the dataset)",
103
  placeholder="e.g., 'file_name.ts'",
104
  ),
 
 
 
 
 
105
  ]
106
  outputs = gr.File(label="Converted MP4 File") # Use gr.File for downloadable files
107
 
@@ -109,18 +119,22 @@ def gradio_interface():
109
  description = (
110
  "Convert .ts video files from Hugging Face datasets to .mp4 format. "
111
  "Provide the dataset name and the name of the .ts file. The converted "
112
- ".mp4 file will be available for download."
 
113
  )
114
 
115
  # Example Usage (Corrected)
116
  article = """
117
  Example Usage:
118
 
119
- 1. For the 'PolyAI/minds-14' dataset and the file 'audio/en/common_voice_en_7722.ts',
120
- enter 'PolyAI/minds-14' in the \"Hugging Face Dataset Name\" field and
121
- 'common_voice_en_7722.ts' in the \"TS File Name\" field (note: the example dataset in the original prompt did not contain .ts files, so I've provided a placeholder. You'll need to adapt this to a dataset that actually *does* have .ts files).
122
- 2. Click the 'Submit' button.
123
- 3. The converted .mp4 file will be processed, and a download link will be provided.
 
 
 
124
  """
125
 
126
  return gr.Interface(
@@ -133,5 +147,6 @@ def gradio_interface():
133
  )
134
 
135
 
 
136
  if __name__ == "__main__":
137
  gradio_interface().launch()
 
4
  import os
5
  import tempfile
6
 
7
+ def convert_ts_to_mp4(dataset_name, file_name, hf_token):
 
8
  """
9
  Downloads a .ts video file from a Hugging Face dataset,
10
  converts it to .mp4 using ffmpeg, and returns the path
11
+ to the .mp4 file. Handles both public and private datasets.
12
 
13
  Args:
14
  dataset_name (str): The name of the Hugging Face dataset.
15
  file_name (str): The name of the .ts file within the dataset.
16
  It should be just the filename, not the full path.
17
+ hf_token (str): The Hugging Face token. If None or empty,
18
+ it's assumed the dataset is public.
19
 
20
  Returns:
21
  str: The path to the converted .mp4 file, or None on error.
22
  """
23
  try:
24
  # 1. Load the dataset
25
+ if hf_token:
26
+ dataset = load_dataset(dataset_name, use_auth_token=hf_token, streaming=True)
27
+ else:
28
+ dataset = load_dataset(dataset_name, streaming=True)
29
 
30
  # 2. Find the file. This part assumes the filename is unique
31
  # within the dataset. For more complex datasets, you might
 
39
  if "file" in example and os.path.basename(example["file"]) == file_name:
40
  file_url = example["file"]
41
  break
42
+ elif isinstance(example, dict): # Check for nested file paths.
43
  for key, value in example.items():
44
  if isinstance(value, str) and os.path.basename(value) == file_name:
45
  file_url = value;
 
72
  "-c:v",
73
  "libx264", # Use libx264 for H.264 encoding (common)
74
  "-c:a",
75
+ "aac", # Use AAC for audio encoding (common)
76
  "-y", # Overwrite output file if it exists
77
  mp4_file.name,
78
  ],
 
93
  return f"An error occurred: {e}"
94
 
95
 
96
+
97
  def gradio_interface():
98
  """
99
  Defines the Gradio interface for the application.
 
101
  inputs = [
102
  gr.Textbox(
103
  label="Hugging Face Dataset Name",
104
+ placeholder="e.g., 'PolyAI/minds-14' or 'my-org/my-private-dataset'",
105
  ),
106
  gr.Textbox(
107
  label="TS File Name (within the dataset)",
108
  placeholder="e.g., 'file_name.ts'",
109
  ),
110
+ gr.Textbox(
111
+ label="Hugging Face Token (for private datasets)",
112
+ placeholder="(Optional) Enter your Hugging Face token here, or set it as HF_TOKEN in Space settings",
113
+ type="password",
114
+ ),
115
  ]
116
  outputs = gr.File(label="Converted MP4 File") # Use gr.File for downloadable files
117
 
 
119
  description = (
120
  "Convert .ts video files from Hugging Face datasets to .mp4 format. "
121
  "Provide the dataset name and the name of the .ts file. The converted "
122
+ ".mp4 file will be available for download. "
123
+ "For private datasets, you *must* provide a Hugging Face token, either directly in the input box, or, preferably, by setting the `HF_TOKEN` secret in your Space's settings."
124
  )
125
 
126
  # Example Usage (Corrected)
127
  article = """
128
  Example Usage:
129
 
130
+ 1. For a public dataset like 'PolyAI/minds-14' and the file 'audio/en/common_voice_en_7722.ts',
131
+ enter 'PolyAI/minds-14' in the "Hugging Face Dataset Name" field and
132
+ 'common_voice_en_7722.ts' in the "TS File Name" field. Leave the "Hugging Face Token" field empty.
133
+ 2. For a private dataset, enter the dataset name (e.g., 'my-org/my-private-dataset')
134
+ and the .ts file name. Enter your Hugging Face token in the "Hugging Face Token" field
135
+ *or*, preferably, add your token as a secret named `HF_TOKEN` in your Space's settings.
136
+ 3. Click the 'Submit' button.
137
+ 4. The converted .mp4 file will be processed, and a download link will be provided.
138
  """
139
 
140
  return gr.Interface(
 
147
  )
148
 
149
 
150
+
151
  if __name__ == "__main__":
152
  gradio_interface().launch()