Spaces:
Sleeping
Sleeping
ender
commited on
Commit
Β·
a92043d
1
Parent(s):
52d4758
HF Ready
Browse files- README.md +5 -5
- app.py +70 -0
- spatialmedia/metadata_utils.py +667 -0
- spatialmedia/mpeg/__init__.py +31 -0
- spatialmedia/mpeg/box.py +201 -0
- spatialmedia/mpeg/constants.py +86 -0
- spatialmedia/mpeg/container.py +210 -0
- spatialmedia/mpeg/mpeg4_container.py +133 -0
- spatialmedia/mpeg/sa3d.py +176 -0
README.md
CHANGED
@@ -1,10 +1,10 @@
|
|
1 |
---
|
2 |
-
title:
|
3 |
-
emoji:
|
4 |
-
colorFrom:
|
5 |
-
colorTo:
|
6 |
sdk: gradio
|
7 |
-
sdk_version: 4.
|
8 |
app_file: app.py
|
9 |
pinned: false
|
10 |
---
|
|
|
1 |
---
|
2 |
+
title: 360metadata
|
3 |
+
emoji: π
|
4 |
+
colorFrom: blue
|
5 |
+
colorTo: green
|
6 |
sdk: gradio
|
7 |
+
sdk_version: 4.29.0
|
8 |
app_file: app.py
|
9 |
pinned: false
|
10 |
---
|
app.py
ADDED
@@ -0,0 +1,70 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
1 |
+
import gradio as gr
|
2 |
+
from spatialmedia import metadata_utils
|
3 |
+
import os
|
4 |
+
import shutil
|
5 |
+
|
6 |
+
def console_print(message):
|
7 |
+
print(message)
|
8 |
+
|
9 |
+
def inject_360_metadata(input_video, stereo_mode, spatial_audio, crop):
|
10 |
+
if input_video is None:
|
11 |
+
return None
|
12 |
+
|
13 |
+
base_dir = "processed_videos"
|
14 |
+
os.makedirs(base_dir, exist_ok=True)
|
15 |
+
input_video_path = os.path.join(base_dir, "input_video.mp4")
|
16 |
+
output_video_path = os.path.join(base_dir, "output_video.mp4")
|
17 |
+
|
18 |
+
with open(input_video_path, 'wb') as f:
|
19 |
+
f.write(input_video)
|
20 |
+
|
21 |
+
metadata = metadata_utils.Metadata()
|
22 |
+
metadata.video = metadata_utils.generate_spherical_xml(stereo_mode, crop)
|
23 |
+
|
24 |
+
if spatial_audio:
|
25 |
+
parsed_metadata = metadata_utils.parse_metadata(input_video_path, console_print)
|
26 |
+
if not metadata.audio:
|
27 |
+
spatial_audio_description = metadata_utils.get_spatial_audio_description(
|
28 |
+
parsed_metadata.num_audio_channels)
|
29 |
+
if spatial_audio_description.is_supported:
|
30 |
+
metadata.audio = metadata_utils.get_spatial_audio_metadata(
|
31 |
+
spatial_audio_description.order,
|
32 |
+
spatial_audio_description.has_head_locked_stereo)
|
33 |
+
else:
|
34 |
+
raise ValueError("Audio has %d channel(s) and isn't a supported spatial audio format." % (parsed_metadata.num_audio_channels))
|
35 |
+
|
36 |
+
if metadata.video:
|
37 |
+
metadata_utils.inject_metadata(input_video_path, output_video_path, metadata, console_print)
|
38 |
+
return output_video_path
|
39 |
+
else:
|
40 |
+
raise ValueError("Failed to generate metadata.")
|
41 |
+
|
42 |
+
def update_output(output_file_path):
|
43 |
+
if output_file_path and os.path.exists(output_file_path):
|
44 |
+
return output_file_path
|
45 |
+
else:
|
46 |
+
return None
|
47 |
+
|
48 |
+
def main():
|
49 |
+
with gr.Blocks(theme='JohnSmith9982/small_and_pretty') as demo:
|
50 |
+
gr.Markdown("This space adds appropriate metadata to 360Β° equirectangular videos so they can be recognized as such. The audio checkbox option is only viable if your video has spatial audio in ambiX ACN/SN3D with head-locked stereo")
|
51 |
+
with gr.Row():
|
52 |
+
video_input = gr.File(label="Select video file", type="binary")
|
53 |
+
stereo_dropdown = gr.Dropdown(choices=["none", "top-bottom", "left-right"], label="Stereo Mode")
|
54 |
+
spatial_audio_checkbox = gr.Checkbox(label="Spatial Audio")
|
55 |
+
crop_input = gr.Textbox(label="Crop Region (w:h:f_w:f_h:x:y)- Optional")
|
56 |
+
submit_btn = gr.Button("Inject 360Β° Metadata")
|
57 |
+
|
58 |
+
output_file = gr.File(label="Download Injected Video", type="filepath", visible=True)
|
59 |
+
|
60 |
+
submit_btn.click(
|
61 |
+
fn=inject_360_metadata,
|
62 |
+
inputs=[video_input, stereo_dropdown, spatial_audio_checkbox, crop_input],
|
63 |
+
outputs=output_file,
|
64 |
+
postprocess=update_output
|
65 |
+
)
|
66 |
+
|
67 |
+
demo.launch(share=True)
|
68 |
+
|
69 |
+
if __name__ == "__main__":
|
70 |
+
main()
|
spatialmedia/metadata_utils.py
ADDED
@@ -0,0 +1,667 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
1 |
+
#! /usr/bin/env python
|
2 |
+
# -*- coding: utf-8 -*-
|
3 |
+
|
4 |
+
# Copyright 2016 Google Inc. All rights reserved.
|
5 |
+
#
|
6 |
+
# Licensed under the Apache License, Version 2.0 (the "License");
|
7 |
+
# you may not use this file except in compliance with the License.
|
8 |
+
# You may obtain a copy of the License at
|
9 |
+
#
|
10 |
+
# http://www.apache.org/licenses/LICENSE-2.0
|
11 |
+
#
|
12 |
+
# Unless required by applicable law or agreed to in writing, software
|
13 |
+
# distributed under the License is distributed on an "AS IS" BASIS,
|
14 |
+
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
|
15 |
+
# See the License for the specific language governing permissions and
|
16 |
+
# limitations under the License.
|
17 |
+
|
18 |
+
"""Utilities for examining/injecting spatial media metadata in MP4/MOV files."""
|
19 |
+
|
20 |
+
import collections
|
21 |
+
import os
|
22 |
+
import re
|
23 |
+
import struct
|
24 |
+
import traceback
|
25 |
+
import xml.etree
|
26 |
+
import xml.etree.ElementTree
|
27 |
+
|
28 |
+
from spatialmedia import mpeg
|
29 |
+
|
30 |
+
MPEG_FILE_EXTENSIONS = [".mp4", ".mov"]
|
31 |
+
|
32 |
+
SPHERICAL_UUID_ID = (
|
33 |
+
b"\xff\xcc\x82\x63\xf8\x55\x4a\x93\x88\x14\x58\x7a\x02\x52\x1f\xdd")
|
34 |
+
|
35 |
+
# XML contents.
|
36 |
+
RDF_PREFIX = " xmlns:rdf=\"http://www.w3.org/1999/02/22-rdf-syntax-ns#\" "
|
37 |
+
|
38 |
+
SPHERICAL_XML_HEADER = \
|
39 |
+
"<?xml version=\"1.0\"?>"\
|
40 |
+
"<rdf:SphericalVideo\n"\
|
41 |
+
"xmlns:rdf=\"http://www.w3.org/1999/02/22-rdf-syntax-ns#\"\n"\
|
42 |
+
"xmlns:GSpherical=\"http://ns.google.com/videos/1.0/spherical/\">"
|
43 |
+
|
44 |
+
SPHERICAL_XML_CONTENTS = \
|
45 |
+
"<GSpherical:Spherical>true</GSpherical:Spherical>"\
|
46 |
+
"<GSpherical:Stitched>true</GSpherical:Stitched>"\
|
47 |
+
"<GSpherical:StitchingSoftware>"\
|
48 |
+
"Spherical Metadata Tool"\
|
49 |
+
"</GSpherical:StitchingSoftware>"\
|
50 |
+
"<GSpherical:ProjectionType>equirectangular</GSpherical:ProjectionType>"
|
51 |
+
|
52 |
+
SPHERICAL_XML_CONTENTS_TOP_BOTTOM = \
|
53 |
+
"<GSpherical:StereoMode>top-bottom</GSpherical:StereoMode>"
|
54 |
+
SPHERICAL_XML_CONTENTS_LEFT_RIGHT = \
|
55 |
+
"<GSpherical:StereoMode>left-right</GSpherical:StereoMode>"
|
56 |
+
|
57 |
+
# Parameter order matches that of the crop option.
|
58 |
+
SPHERICAL_XML_CONTENTS_CROP_FORMAT = \
|
59 |
+
"<GSpherical:CroppedAreaImageWidthPixels>{0}"\
|
60 |
+
"</GSpherical:CroppedAreaImageWidthPixels>"\
|
61 |
+
"<GSpherical:CroppedAreaImageHeightPixels>{1}"\
|
62 |
+
"</GSpherical:CroppedAreaImageHeightPixels>"\
|
63 |
+
"<GSpherical:FullPanoWidthPixels>{2}</GSpherical:FullPanoWidthPixels>"\
|
64 |
+
"<GSpherical:FullPanoHeightPixels>{3}</GSpherical:FullPanoHeightPixels>"\
|
65 |
+
"<GSpherical:CroppedAreaLeftPixels>{4}</GSpherical:CroppedAreaLeftPixels>"\
|
66 |
+
"<GSpherical:CroppedAreaTopPixels>{5}</GSpherical:CroppedAreaTopPixels>"
|
67 |
+
|
68 |
+
SPHERICAL_XML_FOOTER = "</rdf:SphericalVideo>"
|
69 |
+
|
70 |
+
SPHERICAL_TAGS_LIST = [
|
71 |
+
"Spherical",
|
72 |
+
"Stitched",
|
73 |
+
"StitchingSoftware",
|
74 |
+
"ProjectionType",
|
75 |
+
"SourceCount",
|
76 |
+
"StereoMode",
|
77 |
+
"InitialViewHeadingDegrees",
|
78 |
+
"InitialViewPitchDegrees",
|
79 |
+
"InitialViewRollDegrees",
|
80 |
+
"Timestamp",
|
81 |
+
"CroppedAreaImageWidthPixels",
|
82 |
+
"CroppedAreaImageHeightPixels",
|
83 |
+
"FullPanoWidthPixels",
|
84 |
+
"FullPanoHeightPixels",
|
85 |
+
"CroppedAreaLeftPixels",
|
86 |
+
"CroppedAreaTopPixels",
|
87 |
+
]
|
88 |
+
|
89 |
+
class Metadata(object):
|
90 |
+
def __init__(self):
|
91 |
+
self.video = None
|
92 |
+
self.audio = None
|
93 |
+
|
94 |
+
class ParsedMetadata(object):
|
95 |
+
def __init__(self):
|
96 |
+
self.video = dict()
|
97 |
+
self.audio = None
|
98 |
+
self.num_audio_channels = 0
|
99 |
+
|
100 |
+
SPHERICAL_PREFIX = "{http://ns.google.com/videos/1.0/spherical/}"
|
101 |
+
SPHERICAL_TAGS = dict()
|
102 |
+
for tag in SPHERICAL_TAGS_LIST:
|
103 |
+
SPHERICAL_TAGS[SPHERICAL_PREFIX + tag] = tag
|
104 |
+
|
105 |
+
integer_regex_group = "(\d+)"
|
106 |
+
crop_regex = "^{0}$".format(":".join([integer_regex_group] * 6))
|
107 |
+
|
108 |
+
MAX_SUPPORTED_AMBIX_ORDER = 1
|
109 |
+
|
110 |
+
SpatialAudioDescription = collections.namedtuple(
|
111 |
+
'SpatialAudioDescription',
|
112 |
+
'order is_supported has_head_locked_stereo')
|
113 |
+
|
114 |
+
def get_spatial_audio_description(num_channels):
|
115 |
+
for i in range(1, MAX_SUPPORTED_AMBIX_ORDER+1):
|
116 |
+
if (i + 1)*(i + 1) == num_channels:
|
117 |
+
return SpatialAudioDescription(
|
118 |
+
order=i, is_supported=True, has_head_locked_stereo=False)
|
119 |
+
elif ((i + 1)*(i + 1) + 2) == num_channels:
|
120 |
+
return SpatialAudioDescription(
|
121 |
+
order=i, is_supported=True, has_head_locked_stereo=True)
|
122 |
+
|
123 |
+
return SpatialAudioDescription(
|
124 |
+
order=-1, is_supported=False, has_head_locked_stereo=True)
|
125 |
+
|
126 |
+
def spherical_uuid(metadata):
|
127 |
+
"""Constructs a uuid containing spherical metadata.
|
128 |
+
|
129 |
+
Args:
|
130 |
+
metadata: String, xml to inject in spherical tag.
|
131 |
+
|
132 |
+
Returns:
|
133 |
+
uuid_leaf: a box containing spherical metadata.
|
134 |
+
"""
|
135 |
+
uuid_leaf = mpeg.Box()
|
136 |
+
assert(len(SPHERICAL_UUID_ID) == 16)
|
137 |
+
uuid_leaf.name = mpeg.constants.TAG_UUID
|
138 |
+
uuid_leaf.header_size = 8
|
139 |
+
uuid_leaf.content_size = 0
|
140 |
+
|
141 |
+
uuid_leaf.contents = SPHERICAL_UUID_ID + metadata.encode("utf-8")
|
142 |
+
uuid_leaf.content_size = len(uuid_leaf.contents)
|
143 |
+
|
144 |
+
return uuid_leaf
|
145 |
+
|
146 |
+
|
147 |
+
def mpeg4_add_spherical(mpeg4_file, in_fh, metadata):
|
148 |
+
"""Adds a spherical uuid box to an mpeg4 file for all video tracks.
|
149 |
+
|
150 |
+
Args:
|
151 |
+
mpeg4_file: mpeg4, Mpeg4 file structure to add metadata.
|
152 |
+
in_fh: file handle, Source for uncached file contents.
|
153 |
+
metadata: string, xml metadata to inject into spherical tag.
|
154 |
+
"""
|
155 |
+
for element in mpeg4_file.moov_box.contents:
|
156 |
+
if element.name == mpeg.constants.TAG_TRAK:
|
157 |
+
added = False
|
158 |
+
element.remove(mpeg.constants.TAG_UUID)
|
159 |
+
for sub_element in element.contents:
|
160 |
+
if sub_element.name != mpeg.constants.TAG_MDIA:
|
161 |
+
continue
|
162 |
+
for mdia_sub_element in sub_element.contents:
|
163 |
+
if mdia_sub_element.name != mpeg.constants.TAG_HDLR:
|
164 |
+
continue
|
165 |
+
position = mdia_sub_element.content_start() + 8
|
166 |
+
in_fh.seek(position)
|
167 |
+
if in_fh.read(4) == mpeg.constants.TRAK_TYPE_VIDE:
|
168 |
+
added = True
|
169 |
+
break
|
170 |
+
|
171 |
+
if added:
|
172 |
+
if not element.add(spherical_uuid(metadata)):
|
173 |
+
return False
|
174 |
+
break
|
175 |
+
|
176 |
+
mpeg4_file.resize()
|
177 |
+
return True
|
178 |
+
|
179 |
+
def mpeg4_add_spatial_audio(mpeg4_file, in_fh, audio_metadata, console):
|
180 |
+
"""Adds spatial audio metadata to the first audio track of the input
|
181 |
+
mpeg4_file. Returns False on failure.
|
182 |
+
|
183 |
+
Args:
|
184 |
+
mpeg4_file: mpeg4, Mpeg4 file structure to add metadata.
|
185 |
+
in_fh: file handle, Source for uncached file contents.
|
186 |
+
audio_metadata: dictionary ('ambisonic_type': string,
|
187 |
+
'ambisonic_order': int, 'head_locked_stereo': Bool),
|
188 |
+
Supports 'periphonic' ambisonic type only.
|
189 |
+
"""
|
190 |
+
for element in mpeg4_file.moov_box.contents:
|
191 |
+
if element.name == mpeg.constants.TAG_TRAK:
|
192 |
+
for sub_element in element.contents:
|
193 |
+
if sub_element.name != mpeg.constants.TAG_MDIA:
|
194 |
+
continue
|
195 |
+
for mdia_sub_element in sub_element.contents:
|
196 |
+
if mdia_sub_element.name != mpeg.constants.TAG_HDLR:
|
197 |
+
continue
|
198 |
+
position = mdia_sub_element.content_start() + 8
|
199 |
+
in_fh.seek(position)
|
200 |
+
if in_fh.read(4) == mpeg.constants.TAG_SOUN:
|
201 |
+
return inject_spatial_audio_atom(
|
202 |
+
in_fh, sub_element, audio_metadata, console)
|
203 |
+
return True
|
204 |
+
|
205 |
+
def mpeg4_add_audio_metadata(mpeg4_file, in_fh, audio_metadata, console):
|
206 |
+
num_audio_tracks = get_num_audio_tracks(mpeg4_file, in_fh)
|
207 |
+
if num_audio_tracks > 1:
|
208 |
+
console("Error: Expected 1 audio track. Found %d" % num_audio_tracks)
|
209 |
+
return False
|
210 |
+
|
211 |
+
return mpeg4_add_spatial_audio(mpeg4_file, in_fh, audio_metadata, console)
|
212 |
+
|
213 |
+
def inject_spatial_audio_atom(
|
214 |
+
in_fh, audio_media_atom, audio_metadata, console):
|
215 |
+
for atom in audio_media_atom.contents:
|
216 |
+
if atom.name != mpeg.constants.TAG_MINF:
|
217 |
+
continue
|
218 |
+
for element in atom.contents:
|
219 |
+
if element.name != mpeg.constants.TAG_STBL:
|
220 |
+
continue
|
221 |
+
for sub_element in element.contents:
|
222 |
+
if sub_element.name != mpeg.constants.TAG_STSD:
|
223 |
+
continue
|
224 |
+
for sample_description in sub_element.contents:
|
225 |
+
if sample_description.name in\
|
226 |
+
mpeg.constants.SOUND_SAMPLE_DESCRIPTIONS:
|
227 |
+
in_fh.seek(sample_description.position +
|
228 |
+
sample_description.header_size + 16)
|
229 |
+
num_channels = get_num_audio_channels(
|
230 |
+
sub_element, in_fh)
|
231 |
+
expected_num_channels = \
|
232 |
+
get_expected_num_audio_channels(
|
233 |
+
audio_metadata["ambisonic_type"],
|
234 |
+
audio_metadata["ambisonic_order"],
|
235 |
+
audio_metadata["head_locked_stereo"])
|
236 |
+
if num_channels != expected_num_channels:
|
237 |
+
head_locked_stereo_msg = (" with head-locked stereo" if
|
238 |
+
audio_metadata["head_locked_stereo"] else "")
|
239 |
+
err_msg = "Error: Found %d audio channel(s). "\
|
240 |
+
"Expected %d channel(s) for %s ambisonics "\
|
241 |
+
"of order %d%s."\
|
242 |
+
% (num_channels,
|
243 |
+
expected_num_channels,
|
244 |
+
audio_metadata["ambisonic_type"],
|
245 |
+
audio_metadata["ambisonic_order"],
|
246 |
+
head_locked_stereo_msg)
|
247 |
+
console(err_msg)
|
248 |
+
return False
|
249 |
+
sa3d_atom = mpeg.SA3DBox.create(
|
250 |
+
num_channels, audio_metadata)
|
251 |
+
sample_description.contents.append(sa3d_atom)
|
252 |
+
return True
|
253 |
+
|
254 |
+
def parse_spherical_xml(contents, console):
|
255 |
+
"""Returns spherical metadata for a set of xml data.
|
256 |
+
|
257 |
+
Args:
|
258 |
+
contents: string, spherical metadata xml contents.
|
259 |
+
|
260 |
+
Returns:
|
261 |
+
dictionary containing the parsed spherical metadata values.
|
262 |
+
"""
|
263 |
+
try:
|
264 |
+
parsed_xml = xml.etree.ElementTree.XML(contents)
|
265 |
+
except xml.etree.ElementTree.ParseError:
|
266 |
+
try:
|
267 |
+
console(traceback.format_exc())
|
268 |
+
console(contents)
|
269 |
+
index = contents.find("<rdf:SphericalVideo")
|
270 |
+
if index != -1:
|
271 |
+
index += len("<rdf:SphericalVideo")
|
272 |
+
contents = contents[:index] + RDF_PREFIX + contents[index:]
|
273 |
+
parsed_xml = xml.etree.ElementTree.XML(contents)
|
274 |
+
console("\t\tWarning missing rdf prefix:", RDF_PREFIX)
|
275 |
+
except xml.etree.ElementTree.ParseError as e:
|
276 |
+
console("\t\tParser Error on XML")
|
277 |
+
console(traceback.format_exc())
|
278 |
+
console(contents)
|
279 |
+
return
|
280 |
+
|
281 |
+
sphericalDictionary = dict()
|
282 |
+
for child in list(parsed_xml):
|
283 |
+
if child.tag in SPHERICAL_TAGS.keys():
|
284 |
+
console("\t\t" + SPHERICAL_TAGS[child.tag]
|
285 |
+
+ " = " + child.text)
|
286 |
+
sphericalDictionary[SPHERICAL_TAGS[child.tag]] = child.text
|
287 |
+
else:
|
288 |
+
tag = child.tag
|
289 |
+
if child.tag[:len(spherical_prefix)] == spherical_prefix:
|
290 |
+
tag = child.tag[len(spherical_prefix):]
|
291 |
+
console("\t\tUnknown: " + tag + " = " + child.text)
|
292 |
+
|
293 |
+
return sphericalDictionary
|
294 |
+
|
295 |
+
|
296 |
+
def parse_spherical_mpeg4(mpeg4_file, fh, console):
|
297 |
+
"""Returns spherical metadata for a loaded mpeg4 file.
|
298 |
+
|
299 |
+
Args:
|
300 |
+
mpeg4_file: mpeg4, loaded mpeg4 file contents.
|
301 |
+
fh: file handle, file handle for uncached file contents.
|
302 |
+
|
303 |
+
Returns:
|
304 |
+
Dictionary stored as (trackName, metadataDictionary)
|
305 |
+
"""
|
306 |
+
metadata = ParsedMetadata()
|
307 |
+
track_num = 0
|
308 |
+
for element in mpeg4_file.moov_box.contents:
|
309 |
+
if element.name == mpeg.constants.TAG_TRAK:
|
310 |
+
trackName = "Track %d" % track_num
|
311 |
+
console("\t%s" % trackName)
|
312 |
+
track_num += 1
|
313 |
+
for sub_element in element.contents:
|
314 |
+
if sub_element.name == mpeg.constants.TAG_UUID:
|
315 |
+
if sub_element.contents:
|
316 |
+
sub_element_id = sub_element.contents[:16]
|
317 |
+
else:
|
318 |
+
fh.seek(sub_element.content_start())
|
319 |
+
sub_element_id = fh.read(16)
|
320 |
+
|
321 |
+
if sub_element_id == SPHERICAL_UUID_ID:
|
322 |
+
if sub_element.contents:
|
323 |
+
contents = sub_element.contents[16:]
|
324 |
+
else:
|
325 |
+
contents = fh.read(sub_element.content_size - 16)
|
326 |
+
metadata.video[trackName] = \
|
327 |
+
parse_spherical_xml(contents.decode("utf-8"), console)
|
328 |
+
|
329 |
+
if sub_element.name == mpeg.constants.TAG_MDIA:
|
330 |
+
for mdia_sub_element in sub_element.contents:
|
331 |
+
if mdia_sub_element.name != mpeg.constants.TAG_MINF:
|
332 |
+
continue
|
333 |
+
for stbl_elem in mdia_sub_element.contents:
|
334 |
+
if stbl_elem.name != mpeg.constants.TAG_STBL:
|
335 |
+
continue
|
336 |
+
for stsd_elem in stbl_elem.contents:
|
337 |
+
if stsd_elem.name != mpeg.constants.TAG_STSD:
|
338 |
+
continue
|
339 |
+
for sa3d_container_elem in stsd_elem.contents:
|
340 |
+
if sa3d_container_elem.name not in \
|
341 |
+
mpeg.constants.SOUND_SAMPLE_DESCRIPTIONS:
|
342 |
+
continue
|
343 |
+
metadata.num_audio_channels = \
|
344 |
+
get_num_audio_channels(stsd_elem, fh)
|
345 |
+
for sa3d_elem in sa3d_container_elem.contents:
|
346 |
+
if sa3d_elem.name == mpeg.constants.TAG_SA3D:
|
347 |
+
sa3d_elem.print_box(console)
|
348 |
+
metadata.audio = sa3d_elem
|
349 |
+
return metadata
|
350 |
+
|
351 |
+
def parse_mpeg4(input_file, console):
|
352 |
+
with open(input_file, "rb") as in_fh:
|
353 |
+
mpeg4_file = mpeg.load(in_fh)
|
354 |
+
if mpeg4_file is None:
|
355 |
+
console("Error, file could not be opened.")
|
356 |
+
return
|
357 |
+
|
358 |
+
console("Loaded file...")
|
359 |
+
return parse_spherical_mpeg4(mpeg4_file, in_fh, console)
|
360 |
+
|
361 |
+
console("Error \"" + input_file + "\" does not exist or do not have "
|
362 |
+
"permission.")
|
363 |
+
|
364 |
+
|
365 |
+
def inject_mpeg4(input_file, output_file, metadata, console):
|
366 |
+
with open(input_file, "rb") as in_fh:
|
367 |
+
|
368 |
+
mpeg4_file = mpeg.load(in_fh)
|
369 |
+
if mpeg4_file is None:
|
370 |
+
console("Error file could not be opened.")
|
371 |
+
|
372 |
+
if not mpeg4_add_spherical(mpeg4_file, in_fh, metadata.video):
|
373 |
+
console("Error failed to insert spherical data")
|
374 |
+
|
375 |
+
if metadata.audio:
|
376 |
+
if not mpeg4_add_audio_metadata(
|
377 |
+
mpeg4_file, in_fh, metadata.audio, console):
|
378 |
+
console("Error failed to insert spatial audio data")
|
379 |
+
|
380 |
+
console("Saved file settings")
|
381 |
+
parse_spherical_mpeg4(mpeg4_file, in_fh, console)
|
382 |
+
|
383 |
+
with open(output_file, "wb") as out_fh:
|
384 |
+
mpeg4_file.save(in_fh, out_fh)
|
385 |
+
return
|
386 |
+
|
387 |
+
console("Error file: \"" + input_file + "\" does not exist or do not have "
|
388 |
+
"permission.")
|
389 |
+
|
390 |
+
def parse_metadata(src, console):
|
391 |
+
infile = os.path.abspath(src)
|
392 |
+
|
393 |
+
try:
|
394 |
+
in_fh = open(infile, "rb")
|
395 |
+
in_fh.close()
|
396 |
+
except:
|
397 |
+
console("Error: " + infile +
|
398 |
+
" does not exist or we do not have permission")
|
399 |
+
|
400 |
+
console("Processing: " + infile)
|
401 |
+
extension = os.path.splitext(infile)[1].lower()
|
402 |
+
|
403 |
+
if extension in MPEG_FILE_EXTENSIONS:
|
404 |
+
return parse_mpeg4(infile, console)
|
405 |
+
|
406 |
+
console("Unknown file type")
|
407 |
+
return None
|
408 |
+
|
409 |
+
|
410 |
+
def inject_metadata(src, dest, metadata, console):
|
411 |
+
infile = os.path.abspath(src)
|
412 |
+
outfile = os.path.abspath(dest)
|
413 |
+
|
414 |
+
if infile == outfile:
|
415 |
+
return "Input and output cannot be the same"
|
416 |
+
|
417 |
+
try:
|
418 |
+
in_fh = open(infile, "rb")
|
419 |
+
in_fh.close()
|
420 |
+
except:
|
421 |
+
console("Error: " + infile +
|
422 |
+
" does not exist or we do not have permission")
|
423 |
+
return
|
424 |
+
|
425 |
+
console("Processing: " + infile)
|
426 |
+
|
427 |
+
extension = os.path.splitext(infile)[1].lower()
|
428 |
+
|
429 |
+
if (extension in MPEG_FILE_EXTENSIONS):
|
430 |
+
inject_mpeg4(infile, outfile, metadata, console)
|
431 |
+
return
|
432 |
+
|
433 |
+
console("Unknown file type")
|
434 |
+
|
435 |
+
|
436 |
+
def generate_spherical_xml(stereo=None, crop=None):
|
437 |
+
# Configure inject xml.
|
438 |
+
additional_xml = ""
|
439 |
+
if stereo == "top-bottom":
|
440 |
+
additional_xml += SPHERICAL_XML_CONTENTS_TOP_BOTTOM
|
441 |
+
|
442 |
+
if stereo == "left-right":
|
443 |
+
additional_xml += SPHERICAL_XML_CONTENTS_LEFT_RIGHT
|
444 |
+
|
445 |
+
if crop:
|
446 |
+
crop_match = re.match(crop_regex, crop)
|
447 |
+
if not crop_match:
|
448 |
+
print("Error: Invalid crop params: {crop}".format(crop=crop))
|
449 |
+
return False
|
450 |
+
else:
|
451 |
+
cropped_width_pixels = int(crop_match.group(1))
|
452 |
+
cropped_height_pixels = int(crop_match.group(2))
|
453 |
+
full_width_pixels = int(crop_match.group(3))
|
454 |
+
full_height_pixels = int(crop_match.group(4))
|
455 |
+
cropped_offset_left_pixels = int(crop_match.group(5))
|
456 |
+
cropped_offset_top_pixels = int(crop_match.group(6))
|
457 |
+
|
458 |
+
# This should never happen based on the crop regex.
|
459 |
+
if full_width_pixels <= 0 or full_height_pixels <= 0:
|
460 |
+
print("Error with crop params: full pano dimensions are "\
|
461 |
+
"invalid: width = {width} height = {height}".format(
|
462 |
+
width=full_width_pixels,
|
463 |
+
height=full_height_pixels))
|
464 |
+
return False
|
465 |
+
|
466 |
+
if (cropped_width_pixels <= 0 or
|
467 |
+
cropped_height_pixels <= 0 or
|
468 |
+
cropped_width_pixels > full_width_pixels or
|
469 |
+
cropped_height_pixels > full_height_pixels):
|
470 |
+
print("Error with crop params: cropped area dimensions are "\
|
471 |
+
"invalid: width = {width} height = {height}".format(
|
472 |
+
width=cropped_width_pixels,
|
473 |
+
height=cropped_height_pixels))
|
474 |
+
return False
|
475 |
+
|
476 |
+
# We are pretty restrictive and don't allow anything strange. There
|
477 |
+
# could be use-cases for a horizontal offset that essentially
|
478 |
+
# translates the domain, but we don't support this (so that no
|
479 |
+
# extra work has to be done on the client).
|
480 |
+
total_width = cropped_offset_left_pixels + cropped_width_pixels
|
481 |
+
total_height = cropped_offset_top_pixels + cropped_height_pixels
|
482 |
+
if (cropped_offset_left_pixels < 0 or
|
483 |
+
cropped_offset_top_pixels < 0 or
|
484 |
+
total_width > full_width_pixels or
|
485 |
+
total_height > full_height_pixels):
|
486 |
+
print("Error with crop params: cropped area offsets are "\
|
487 |
+
"invalid: left = {left} top = {top} "\
|
488 |
+
"left+cropped width: {total_width} "\
|
489 |
+
"top+cropped height: {total_height}".format(
|
490 |
+
left=cropped_offset_left_pixels,
|
491 |
+
top=cropped_offset_top_pixels,
|
492 |
+
total_width=total_width,
|
493 |
+
total_height=total_height))
|
494 |
+
return False
|
495 |
+
|
496 |
+
additional_xml += SPHERICAL_XML_CONTENTS_CROP_FORMAT.format(
|
497 |
+
cropped_width_pixels, cropped_height_pixels,
|
498 |
+
full_width_pixels, full_height_pixels,
|
499 |
+
cropped_offset_left_pixels, cropped_offset_top_pixels)
|
500 |
+
|
501 |
+
spherical_xml = (SPHERICAL_XML_HEADER +
|
502 |
+
SPHERICAL_XML_CONTENTS +
|
503 |
+
additional_xml +
|
504 |
+
SPHERICAL_XML_FOOTER)
|
505 |
+
return spherical_xml
|
506 |
+
|
507 |
+
|
508 |
+
def get_descriptor_length(in_fh):
|
509 |
+
"""Derives the length of the MP4 elementary stream descriptor at the
|
510 |
+
current position in the input file.
|
511 |
+
"""
|
512 |
+
descriptor_length = 0
|
513 |
+
for i in range(4):
|
514 |
+
size_byte = struct.unpack(">c", in_fh.read(1))[0]
|
515 |
+
descriptor_length = (descriptor_length << 7 |
|
516 |
+
ord(size_byte) & int("0x7f", 0))
|
517 |
+
if (ord(size_byte) != int("0x80", 0)):
|
518 |
+
break
|
519 |
+
return descriptor_length
|
520 |
+
|
521 |
+
|
522 |
+
def get_expected_num_audio_channels(
|
523 |
+
ambisonics_type, ambisonics_order, head_locked_stereo):
|
524 |
+
""" Returns the expected number of ambisonic components for a given
|
525 |
+
ambisonic type and ambisonic order.
|
526 |
+
"""
|
527 |
+
head_locked_stereo_channels = 2 if head_locked_stereo == True else 0
|
528 |
+
if (ambisonics_type == 'periphonic'):
|
529 |
+
return (((ambisonics_order + 1) * (ambisonics_order + 1)) +
|
530 |
+
head_locked_stereo_channels)
|
531 |
+
else:
|
532 |
+
return -1
|
533 |
+
|
534 |
+
def get_num_audio_channels(stsd, in_fh):
|
535 |
+
if stsd.name != mpeg.constants.TAG_STSD:
|
536 |
+
print("get_num_audio_channels should be given a STSD box")
|
537 |
+
return -1
|
538 |
+
for sample_description in stsd.contents:
|
539 |
+
if sample_description.name == mpeg.constants.TAG_MP4A:
|
540 |
+
return get_aac_num_channels(sample_description, in_fh)
|
541 |
+
elif sample_description.name in mpeg.constants.SOUND_SAMPLE_DESCRIPTIONS:
|
542 |
+
return get_sample_description_num_channels(sample_description, in_fh)
|
543 |
+
return -1
|
544 |
+
|
545 |
+
def get_sample_description_num_channels(sample_description, in_fh):
|
546 |
+
"""Reads the number of audio channels from a sound sample description.
|
547 |
+
"""
|
548 |
+
p = in_fh.tell()
|
549 |
+
in_fh.seek(sample_description.content_start() + 8)
|
550 |
+
|
551 |
+
version = struct.unpack(">h", in_fh.read(2))[0]
|
552 |
+
revision_level = struct.unpack(">h", in_fh.read(2))[0]
|
553 |
+
vendor = struct.unpack(">i", in_fh.read(4))[0]
|
554 |
+
if version == 0:
|
555 |
+
num_audio_channels = struct.unpack(">h", in_fh.read(2))[0]
|
556 |
+
sample_size_bytes = struct.unpack(">h", in_fh.read(2))[0]
|
557 |
+
elif version == 1:
|
558 |
+
num_audio_channels = struct.unpack(">h", in_fh.read(2))[0]
|
559 |
+
sample_size_bytes = struct.unpack(">h", in_fh.read(2))[0]
|
560 |
+
samples_per_packet = struct.unpack(">i", in_fh.read(4))[0]
|
561 |
+
bytes_per_packet = struct.unpack(">i", in_fh.read(4))[0]
|
562 |
+
bytes_per_frame = struct.unpack(">i", in_fh.read(4))[0]
|
563 |
+
bytes_per_sample = struct.unpack(">i", in_fh.read(4))[0]
|
564 |
+
elif version == 2:
|
565 |
+
always_3 = struct.unpack(">h", in_fh.read(2))[0]
|
566 |
+
always_16 = struct.unpack(">h", in_fh.read(2))[0]
|
567 |
+
always_minus_2 = struct.unpack(">h", in_fh.read(2))[0]
|
568 |
+
always_0 = struct.unpack(">h", in_fh.read(2))[0]
|
569 |
+
always_65536 = struct.unpack(">i", in_fh.read(4))[0]
|
570 |
+
size_of_struct_only = struct.unpack(">i", in_fh.read(4))[0]
|
571 |
+
audio_sample_rate = struct.unpack(">d", in_fh.read(8))[0]
|
572 |
+
num_audio_channels = struct.unpack(">i", in_fh.read(4))[0]
|
573 |
+
else:
|
574 |
+
print("Unsupported version for " + sample_description.name + " box")
|
575 |
+
return -1
|
576 |
+
|
577 |
+
in_fh.seek(p)
|
578 |
+
return num_audio_channels
|
579 |
+
|
580 |
+
def get_aac_num_channels(box, in_fh):
|
581 |
+
"""Reads the number of audio channels from AAC's AudioSpecificConfig
|
582 |
+
descriptor within the esds child box of the input mp4a or wave box.
|
583 |
+
"""
|
584 |
+
p = in_fh.tell()
|
585 |
+
if box.name not in [mpeg.constants.TAG_MP4A, mpeg.constants.TAG_WAVE]:
|
586 |
+
return -1
|
587 |
+
|
588 |
+
for element in box.contents:
|
589 |
+
if element.name == mpeg.constants.TAG_WAVE:
|
590 |
+
# Handle .mov with AAC audio, where the structure is:
|
591 |
+
# stsd -> mp4a -> wave -> esds
|
592 |
+
channel_configuration = get_aac_num_channels(element, in_fh)
|
593 |
+
break
|
594 |
+
|
595 |
+
if element.name != mpeg.constants.TAG_ESDS:
|
596 |
+
continue
|
597 |
+
in_fh.seek(element.content_start() + 4)
|
598 |
+
descriptor_tag = struct.unpack(">c", in_fh.read(1))[0]
|
599 |
+
|
600 |
+
# Verify the read descriptor is an elementary stream descriptor
|
601 |
+
if ord(descriptor_tag) != 3: # Not an MP4 elementary stream.
|
602 |
+
print("Error: failed to read elementary stream descriptor.")
|
603 |
+
return -1
|
604 |
+
get_descriptor_length(in_fh)
|
605 |
+
in_fh.seek(3, 1) # Seek to the decoder configuration descriptor
|
606 |
+
config_descriptor_tag = struct.unpack(">c", in_fh.read(1))[0]
|
607 |
+
|
608 |
+
# Verify the read descriptor is a decoder config. descriptor.
|
609 |
+
if ord(config_descriptor_tag) != 4:
|
610 |
+
print("Error: failed to read decoder config. descriptor.")
|
611 |
+
return -1
|
612 |
+
get_descriptor_length(in_fh)
|
613 |
+
in_fh.seek(13, 1) # offset to the decoder specific config descriptor.
|
614 |
+
decoder_specific_descriptor_tag = struct.unpack(">c", in_fh.read(1))[0]
|
615 |
+
|
616 |
+
# Verify the read descriptor is a decoder specific info descriptor
|
617 |
+
if ord(decoder_specific_descriptor_tag) != 5:
|
618 |
+
print("Error: failed to read MP4 audio decoder specific config.")
|
619 |
+
return -1
|
620 |
+
audio_specific_descriptor_size = get_descriptor_length(in_fh)
|
621 |
+
assert audio_specific_descriptor_size >= 2
|
622 |
+
decoder_descriptor = struct.unpack(">h", in_fh.read(2))[0]
|
623 |
+
object_type = (int("F800", 16) & decoder_descriptor) >> 11
|
624 |
+
sampling_frequency_index = (int("0780", 16) & decoder_descriptor) >> 7
|
625 |
+
if sampling_frequency_index == 0:
|
626 |
+
# TODO: If the sample rate is 96kHz an additional 24 bit offset
|
627 |
+
# value here specifies the actual sample rate.
|
628 |
+
print("Error: Greater than 48khz audio is currently not supported.")
|
629 |
+
return -1
|
630 |
+
channel_configuration = (int("0078", 16) & decoder_descriptor) >> 3
|
631 |
+
in_fh.seek(p)
|
632 |
+
return channel_configuration
|
633 |
+
|
634 |
+
|
635 |
+
def get_num_audio_tracks(mpeg4_file, in_fh):
|
636 |
+
""" Returns the number of audio track in the input mpeg4 file. """
|
637 |
+
num_audio_tracks = 0
|
638 |
+
for element in mpeg4_file.moov_box.contents:
|
639 |
+
if (element.name == mpeg.constants.TAG_TRAK):
|
640 |
+
for sub_element in element.contents:
|
641 |
+
if (sub_element.name != mpeg.constants.TAG_MDIA):
|
642 |
+
continue
|
643 |
+
for mdia_sub_element in sub_element.contents:
|
644 |
+
if (mdia_sub_element.name != mpeg.constants.TAG_HDLR):
|
645 |
+
continue
|
646 |
+
position = mdia_sub_element.content_start() + 8
|
647 |
+
in_fh.seek(position)
|
648 |
+
if (in_fh.read(4) == mpeg.constants.TAG_SOUN):
|
649 |
+
num_audio_tracks += 1
|
650 |
+
return num_audio_tracks
|
651 |
+
|
652 |
+
|
653 |
+
def get_spatial_audio_metadata(ambisonic_order, head_locked_stereo):
|
654 |
+
num_channels = get_expected_num_audio_channels(
|
655 |
+
"periphonic", ambisonic_order, head_locked_stereo)
|
656 |
+
metadata = {
|
657 |
+
"ambisonic_order": 0,
|
658 |
+
"head_locked_stereo": False,
|
659 |
+
"ambisonic_type": "periphonic",
|
660 |
+
"ambisonic_channel_ordering": "ACN",
|
661 |
+
"ambisonic_normalization": "SN3D",
|
662 |
+
"channel_map": [],
|
663 |
+
}
|
664 |
+
metadata['ambisonic_order'] = ambisonic_order
|
665 |
+
metadata['head_locked_stereo'] = head_locked_stereo
|
666 |
+
metadata['channel_map'] = range(0, num_channels)
|
667 |
+
return metadata
|
spatialmedia/mpeg/__init__.py
ADDED
@@ -0,0 +1,31 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
1 |
+
#! /usr/bin/env python
|
2 |
+
# -*- coding: utf-8 -*-
|
3 |
+
|
4 |
+
# Copyright 2016 Google Inc. All rights reserved.
|
5 |
+
#
|
6 |
+
# Licensed under the Apache License, Version 2.0 (the "License");
|
7 |
+
# you may not use this file except in compliance with the License.
|
8 |
+
# You may obtain a copy of the License at
|
9 |
+
#
|
10 |
+
# http://www.apache.org/licenses/LICENSE-2.0
|
11 |
+
#
|
12 |
+
# Unless required by applicable law or agreed to in writing, software
|
13 |
+
# distributed under the License is distributed on an "AS IS" BASIS,
|
14 |
+
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
|
15 |
+
# See the License for the specific language governing permissions and
|
16 |
+
# limitations under the License.
|
17 |
+
|
18 |
+
import spatialmedia.mpeg.sa3d
|
19 |
+
import spatialmedia.mpeg.box
|
20 |
+
import spatialmedia.mpeg.constants
|
21 |
+
import spatialmedia.mpeg.container
|
22 |
+
import spatialmedia.mpeg.mpeg4_container
|
23 |
+
|
24 |
+
load = mpeg4_container.load
|
25 |
+
|
26 |
+
Box = box.Box
|
27 |
+
SA3DBox = sa3d.SA3DBox
|
28 |
+
Container = container.Container
|
29 |
+
Mpeg4Container = mpeg4_container.Mpeg4Container
|
30 |
+
|
31 |
+
__all__ = ["box", "mpeg4", "container", "constants", "sa3d"]
|
spatialmedia/mpeg/box.py
ADDED
@@ -0,0 +1,201 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
1 |
+
#! /usr/bin/env python
|
2 |
+
# -*- coding: utf-8 -*-
|
3 |
+
|
4 |
+
# Copyright 2016 Google Inc. All rights reserved.
|
5 |
+
#
|
6 |
+
# Licensed under the Apache License, Version 2.0 (the "License");
|
7 |
+
# you may not use this file except in compliance with the License.
|
8 |
+
# You may obtain a copy of the License at
|
9 |
+
#
|
10 |
+
# http://www.apache.org/licenses/LICENSE-2.0
|
11 |
+
#
|
12 |
+
# Unless required by applicable law or agreed to in writing, software
|
13 |
+
# distributed under the License is distributed on an "AS IS" BASIS,
|
14 |
+
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
|
15 |
+
# See the License for the specific language governing permissions and
|
16 |
+
# limitations under the License.
|
17 |
+
|
18 |
+
"""MPEG processing classes.
|
19 |
+
|
20 |
+
Tool for loading mpeg4 files and manipulating atoms.
|
21 |
+
"""
|
22 |
+
|
23 |
+
import io
|
24 |
+
import struct
|
25 |
+
|
26 |
+
from spatialmedia.mpeg import constants
|
27 |
+
|
28 |
+
def load(fh, position, end):
|
29 |
+
"""Loads the box located at a position in a mp4 file.
|
30 |
+
|
31 |
+
Args:
|
32 |
+
fh: file handle, input file handle.
|
33 |
+
position: int or None, current file position.
|
34 |
+
|
35 |
+
Returns:
|
36 |
+
box: box, box from loaded file location or None.
|
37 |
+
"""
|
38 |
+
if position is None:
|
39 |
+
position = fh.tell()
|
40 |
+
|
41 |
+
fh.seek(position)
|
42 |
+
header_size = 8
|
43 |
+
size = struct.unpack(">I", fh.read(4))[0]
|
44 |
+
name = fh.read(4)
|
45 |
+
|
46 |
+
if size == 1:
|
47 |
+
size = struct.unpack(">Q", fh.read(8))[0]
|
48 |
+
header_size = 16
|
49 |
+
|
50 |
+
if size < 8:
|
51 |
+
print("Error, invalid size {} in {} at {}".format(size, name, position))
|
52 |
+
return None
|
53 |
+
|
54 |
+
if (position + size) > end:
|
55 |
+
print("Error: Leaf box size exceeds bounds.")
|
56 |
+
return None
|
57 |
+
|
58 |
+
new_box = Box()
|
59 |
+
new_box.name = name
|
60 |
+
new_box.position = position
|
61 |
+
new_box.header_size = header_size
|
62 |
+
new_box.content_size = size - header_size
|
63 |
+
new_box.contents = None
|
64 |
+
|
65 |
+
return new_box
|
66 |
+
|
67 |
+
|
68 |
+
class Box(object):
|
69 |
+
"""MPEG4 box contents and behaviour true for all boxes."""
|
70 |
+
|
71 |
+
def __init__(self):
|
72 |
+
self.name = ""
|
73 |
+
self.position = 0
|
74 |
+
self.header_size = 0
|
75 |
+
self.content_size = 0
|
76 |
+
self.contents = None
|
77 |
+
|
78 |
+
def content_start(self):
|
79 |
+
return self.position + self.header_size
|
80 |
+
|
81 |
+
def save(self, in_fh, out_fh, delta):
|
82 |
+
"""Save box contents prioritizing set contents.
|
83 |
+
|
84 |
+
Args:
|
85 |
+
in_fh: file handle, source to read box contents from.
|
86 |
+
out_fh: file handle, destination for written box contents.
|
87 |
+
delta: int, index update amount.
|
88 |
+
"""
|
89 |
+
if self.header_size == 16:
|
90 |
+
out_fh.write(struct.pack(">I", 1))
|
91 |
+
out_fh.write(self.name)
|
92 |
+
out_fh.write(struct.pack(">Q", self.size()))
|
93 |
+
elif self.header_size == 8:
|
94 |
+
out_fh.write(struct.pack(">I", self.size()))
|
95 |
+
out_fh.write(self.name)
|
96 |
+
|
97 |
+
if self.content_start():
|
98 |
+
in_fh.seek(self.content_start())
|
99 |
+
|
100 |
+
if self.name == constants.TAG_STCO:
|
101 |
+
stco_copy(in_fh, out_fh, self, delta)
|
102 |
+
elif self.name == constants.TAG_CO64:
|
103 |
+
co64_copy(in_fh, out_fh, self, delta)
|
104 |
+
elif self.contents:
|
105 |
+
out_fh.write(self.contents)
|
106 |
+
else:
|
107 |
+
tag_copy(in_fh, out_fh, self.content_size)
|
108 |
+
|
109 |
+
def set(self, new_contents):
|
110 |
+
"""Sets / overwrites the box contents."""
|
111 |
+
self.contents = new_contents
|
112 |
+
self.content_size = len(contents)
|
113 |
+
|
114 |
+
def size(self):
|
115 |
+
"""Total size of a box.
|
116 |
+
|
117 |
+
Returns:
|
118 |
+
Int, total size in bytes of the box.
|
119 |
+
"""
|
120 |
+
return self.header_size + self.content_size
|
121 |
+
|
122 |
+
def print_structure(self, indent=""):
|
123 |
+
"""Prints the box structure."""
|
124 |
+
size1 = self.header_size
|
125 |
+
size2 = self.content_size
|
126 |
+
print("{0} {1} [{2}, {3}]".format(indent, self.name, size1, size2))
|
127 |
+
|
128 |
+
|
129 |
+
def tag_copy(in_fh, out_fh, size):
|
130 |
+
"""Copies a block of data from in_fh to out_fh.
|
131 |
+
|
132 |
+
Args:
|
133 |
+
in_fh: file handle, source of uncached file contents.
|
134 |
+
out_fh: file handle, destination for saved file.
|
135 |
+
size: int, amount of data to copy.
|
136 |
+
"""
|
137 |
+
|
138 |
+
# On 32-bit systems reading / writing is limited to 2GB chunks.
|
139 |
+
# To prevent overflow, read/write 64 MB chunks.
|
140 |
+
block_size = 64 * 1024 * 1024
|
141 |
+
while (size > block_size):
|
142 |
+
contents = in_fh.read(block_size)
|
143 |
+
out_fh.write(contents)
|
144 |
+
size = size - block_size
|
145 |
+
|
146 |
+
contents = in_fh.read(size)
|
147 |
+
out_fh.write(contents)
|
148 |
+
|
149 |
+
|
150 |
+
def index_copy(in_fh, out_fh, box, mode, mode_length, delta=0):
|
151 |
+
"""Update and copy index table for stco/co64 files.
|
152 |
+
|
153 |
+
Args:
|
154 |
+
in_fh: file handle, source to read index table from.
|
155 |
+
out_fh: file handle, destination for index file.
|
156 |
+
box: box, stco/co64 box to copy.
|
157 |
+
mode: string, bit packing mode for index entries.
|
158 |
+
mode_length: int, number of bytes for index entires.
|
159 |
+
delta: int, offset change for index entries.
|
160 |
+
"""
|
161 |
+
fh = in_fh
|
162 |
+
if not box.contents:
|
163 |
+
fh.seek(box.content_start())
|
164 |
+
else:
|
165 |
+
fh = io.BytesIO(box.contents)
|
166 |
+
|
167 |
+
header = struct.unpack(">I", fh.read(4))[0]
|
168 |
+
values = struct.unpack(">I", fh.read(4))[0]
|
169 |
+
|
170 |
+
new_contents = []
|
171 |
+
new_contents.append(struct.pack(">I", header))
|
172 |
+
new_contents.append(struct.pack(">I", values))
|
173 |
+
for i in range(values):
|
174 |
+
content = fh.read(mode_length)
|
175 |
+
content = struct.unpack(mode, content)[0] + delta
|
176 |
+
new_contents.append(struct.pack(mode, content))
|
177 |
+
out_fh.write(b"".join(new_contents))
|
178 |
+
|
179 |
+
|
180 |
+
def stco_copy(in_fh, out_fh, box, delta=0):
|
181 |
+
"""Copy for stco box.
|
182 |
+
|
183 |
+
Args:
|
184 |
+
in_fh: file handle, source to read index table from.
|
185 |
+
out_fh: file handle, destination for index file.
|
186 |
+
box: box, stco box to copy.
|
187 |
+
delta: int, offset change for index entries.
|
188 |
+
"""
|
189 |
+
index_copy(in_fh, out_fh, box, ">I", 4, delta)
|
190 |
+
|
191 |
+
|
192 |
+
def co64_copy(in_fh, out_fh, box, delta=0):
|
193 |
+
"""Copy for co64 box.
|
194 |
+
|
195 |
+
Args:
|
196 |
+
in_fh: file handle, source to read index table from.
|
197 |
+
out_fh: file handle, destination for index file.
|
198 |
+
box: box, co64 box to copy.
|
199 |
+
delta: int, offset change for index entries.
|
200 |
+
"""
|
201 |
+
index_copy(in_fh, out_fh, box, ">Q", 8, delta)
|
spatialmedia/mpeg/constants.py
ADDED
@@ -0,0 +1,86 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
1 |
+
#! /usr/bin/env python
|
2 |
+
# -*- coding: utf-8 -*-
|
3 |
+
|
4 |
+
# Copyright 2016 Google Inc. All rights reserved.
|
5 |
+
#
|
6 |
+
# Licensed under the Apache License, Version 2.0 (the "License");
|
7 |
+
# you may not use this file except in compliance with the License.
|
8 |
+
# You may obtain a copy of the License at
|
9 |
+
#
|
10 |
+
# http://www.apache.org/licenses/LICENSE-2.0
|
11 |
+
#
|
12 |
+
# Unless required by applicable law or agreed to in writing, software
|
13 |
+
# distributed under the License is distributed on an "AS IS" BASIS,
|
14 |
+
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
|
15 |
+
# See the License for the specific language governing permissions and
|
16 |
+
# limitations under the License.
|
17 |
+
|
18 |
+
"""MPEG-4 constants."""
|
19 |
+
|
20 |
+
TRAK_TYPE_VIDE = b"vide"
|
21 |
+
|
22 |
+
# Leaf types.
|
23 |
+
TAG_STCO = b"stco"
|
24 |
+
TAG_CO64 = b"co64"
|
25 |
+
TAG_FREE = b"free"
|
26 |
+
TAG_MDAT = b"mdat"
|
27 |
+
TAG_XML = b"xml "
|
28 |
+
TAG_HDLR = b"hdlr"
|
29 |
+
TAG_FTYP = b"ftyp"
|
30 |
+
TAG_ESDS = b"esds"
|
31 |
+
TAG_SOUN = b"soun"
|
32 |
+
TAG_SA3D = b"SA3D"
|
33 |
+
|
34 |
+
# Container types.
|
35 |
+
TAG_MOOV = b"moov"
|
36 |
+
TAG_UDTA = b"udta"
|
37 |
+
TAG_META = b"meta"
|
38 |
+
TAG_TRAK = b"trak"
|
39 |
+
TAG_MDIA = b"mdia"
|
40 |
+
TAG_MINF = b"minf"
|
41 |
+
TAG_STBL = b"stbl"
|
42 |
+
TAG_STSD = b"stsd"
|
43 |
+
TAG_UUID = b"uuid"
|
44 |
+
TAG_WAVE = b"wave"
|
45 |
+
|
46 |
+
# Sound sample descriptions.
|
47 |
+
TAG_NONE = b"NONE"
|
48 |
+
TAG_RAW_ = b"raw "
|
49 |
+
TAG_TWOS = b"twos"
|
50 |
+
TAG_SOWT = b"sowt"
|
51 |
+
TAG_FL32 = b"fl32"
|
52 |
+
TAG_FL64 = b"fl64"
|
53 |
+
TAG_IN24 = b"in24"
|
54 |
+
TAG_IN32 = b"in32"
|
55 |
+
TAG_ULAW = b"ulaw"
|
56 |
+
TAG_ALAW = b"alaw"
|
57 |
+
TAG_LPCM = b"lpcm"
|
58 |
+
TAG_MP4A = b"mp4a"
|
59 |
+
TAG_OPUS = b"Opus"
|
60 |
+
|
61 |
+
SOUND_SAMPLE_DESCRIPTIONS = frozenset([
|
62 |
+
TAG_NONE,
|
63 |
+
TAG_RAW_,
|
64 |
+
TAG_TWOS,
|
65 |
+
TAG_SOWT,
|
66 |
+
TAG_FL32,
|
67 |
+
TAG_FL64,
|
68 |
+
TAG_IN24,
|
69 |
+
TAG_IN32,
|
70 |
+
TAG_ULAW,
|
71 |
+
TAG_ALAW,
|
72 |
+
TAG_LPCM,
|
73 |
+
TAG_MP4A,
|
74 |
+
TAG_OPUS,
|
75 |
+
])
|
76 |
+
|
77 |
+
CONTAINERS_LIST = frozenset([
|
78 |
+
TAG_MDIA,
|
79 |
+
TAG_MINF,
|
80 |
+
TAG_MOOV,
|
81 |
+
TAG_STBL,
|
82 |
+
TAG_STSD,
|
83 |
+
TAG_TRAK,
|
84 |
+
TAG_UDTA,
|
85 |
+
TAG_WAVE,
|
86 |
+
]).union(SOUND_SAMPLE_DESCRIPTIONS)
|
spatialmedia/mpeg/container.py
ADDED
@@ -0,0 +1,210 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
1 |
+
#! /usr/bin/env python
|
2 |
+
# -*- coding: utf-8 -*-
|
3 |
+
|
4 |
+
# Copyright 2016 Google Inc. All rights reserved.
|
5 |
+
#
|
6 |
+
# Licensed under the Apache License, Version 2.0 (the "License");
|
7 |
+
# you may not use this file except in compliance with the License.
|
8 |
+
# You may obtain a copy of the License at
|
9 |
+
#
|
10 |
+
# http://www.apache.org/licenses/LICENSE-2.0
|
11 |
+
#
|
12 |
+
# Unless required by applicable law or agreed to in writing, software
|
13 |
+
# distributed under the License is distributed on an "AS IS" BASIS,
|
14 |
+
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
|
15 |
+
# See the License for the specific language governing permissions and
|
16 |
+
# limitations under the License.
|
17 |
+
|
18 |
+
"""MPEG processing classes.
|
19 |
+
|
20 |
+
Functions for loading MPEG files and manipulating boxes.
|
21 |
+
"""
|
22 |
+
|
23 |
+
import struct
|
24 |
+
|
25 |
+
from spatialmedia.mpeg import box
|
26 |
+
from spatialmedia.mpeg import constants
|
27 |
+
from spatialmedia.mpeg import sa3d
|
28 |
+
|
29 |
+
def load(fh, position, end):
|
30 |
+
if position is None:
|
31 |
+
position = fh.tell()
|
32 |
+
|
33 |
+
fh.seek(position)
|
34 |
+
header_size = 8
|
35 |
+
size = struct.unpack(">I", fh.read(4))[0]
|
36 |
+
name = fh.read(4)
|
37 |
+
|
38 |
+
is_box = name not in constants.CONTAINERS_LIST
|
39 |
+
# Handle the mp4a decompressor setting (wave -> mp4a).
|
40 |
+
if name == constants.TAG_MP4A and size == 12:
|
41 |
+
is_box = True
|
42 |
+
if is_box:
|
43 |
+
if name == constants.TAG_SA3D:
|
44 |
+
return sa3d.load(fh, position, end)
|
45 |
+
return box.load(fh, position, end)
|
46 |
+
|
47 |
+
if size == 1:
|
48 |
+
size = struct.unpack(">Q", fh.read(8))[0]
|
49 |
+
header_size = 16
|
50 |
+
|
51 |
+
if size < 8:
|
52 |
+
print("Error, invalid size", size, "in", name, "at", position)
|
53 |
+
return None
|
54 |
+
|
55 |
+
if (position + size) > end:
|
56 |
+
print("Error: Container box size exceeds bounds.")
|
57 |
+
return None
|
58 |
+
|
59 |
+
padding = 0
|
60 |
+
if name == constants.TAG_STSD:
|
61 |
+
padding = 8
|
62 |
+
if name in constants.SOUND_SAMPLE_DESCRIPTIONS:
|
63 |
+
current_pos = fh.tell()
|
64 |
+
fh.seek(current_pos + 8)
|
65 |
+
sample_description_version = struct.unpack(">h", fh.read(2))[0]
|
66 |
+
fh.seek(current_pos)
|
67 |
+
|
68 |
+
if sample_description_version == 0:
|
69 |
+
padding = 28
|
70 |
+
elif sample_description_version == 1:
|
71 |
+
padding = 28 + 16
|
72 |
+
elif sample_description_version == 2:
|
73 |
+
padding = 64
|
74 |
+
else:
|
75 |
+
print("Unsupported sample description version:",
|
76 |
+
sample_description_version)
|
77 |
+
|
78 |
+
new_box = Container()
|
79 |
+
new_box.name = name
|
80 |
+
new_box.position = position
|
81 |
+
new_box.header_size = header_size
|
82 |
+
new_box.content_size = size - header_size
|
83 |
+
new_box.padding = padding
|
84 |
+
new_box.contents = load_multiple(
|
85 |
+
fh, position + header_size + padding, position + size)
|
86 |
+
|
87 |
+
if new_box.contents is None:
|
88 |
+
return None
|
89 |
+
|
90 |
+
return new_box
|
91 |
+
|
92 |
+
|
93 |
+
def load_multiple(fh, position=None, end=None):
|
94 |
+
loaded = list()
|
95 |
+
while (position + 4 < end):
|
96 |
+
new_box = load(fh, position, end)
|
97 |
+
if new_box is None:
|
98 |
+
print("Error, failed to load box.")
|
99 |
+
return None
|
100 |
+
loaded.append(new_box)
|
101 |
+
position = new_box.position + new_box.size()
|
102 |
+
|
103 |
+
return loaded
|
104 |
+
|
105 |
+
|
106 |
+
class Container(box.Box):
|
107 |
+
"""MPEG4 container box contents / behaviour."""
|
108 |
+
|
109 |
+
def __init__(self, padding=0):
|
110 |
+
self.name = ""
|
111 |
+
self.position = 0
|
112 |
+
self.header_size = 0
|
113 |
+
self.content_size = 0
|
114 |
+
self.contents = list()
|
115 |
+
self.padding = padding
|
116 |
+
|
117 |
+
def resize(self):
|
118 |
+
"""Recomputes the box size and recurses on contents."""
|
119 |
+
self.content_size = self.padding
|
120 |
+
for element in self.contents:
|
121 |
+
if isinstance(element, Container):
|
122 |
+
element.resize()
|
123 |
+
self.content_size += element.size()
|
124 |
+
|
125 |
+
def print_structure(self, indent=""):
|
126 |
+
"""Prints the box structure and recurses on contents."""
|
127 |
+
size1 = self.header_size
|
128 |
+
size2 = self.content_size
|
129 |
+
print("{0} {1} [{2}, {3}]".format(indent, self.name, size1, size2))
|
130 |
+
|
131 |
+
size = len(self.contents)
|
132 |
+
for i in range(size):
|
133 |
+
next_indent = indent
|
134 |
+
|
135 |
+
next_indent = next_indent.replace("β", "β")
|
136 |
+
next_indent = next_indent.replace("β", " ")
|
137 |
+
next_indent = next_indent.replace("β", " ")
|
138 |
+
|
139 |
+
if i == (size - 1):
|
140 |
+
next_indent = next_indent + " βββ"
|
141 |
+
else:
|
142 |
+
next_indent = next_indent + " βββ"
|
143 |
+
|
144 |
+
element = self.contents[i]
|
145 |
+
element.print_structure(next_indent)
|
146 |
+
|
147 |
+
def remove(self, tag):
|
148 |
+
"""Removes a tag recursively from all containers."""
|
149 |
+
new_contents = []
|
150 |
+
self.content_size = 0
|
151 |
+
for element in self.contents:
|
152 |
+
if element.name != tag:
|
153 |
+
new_contents.append(element)
|
154 |
+
if isinstance(element, Container):
|
155 |
+
element.remove(tag)
|
156 |
+
self.content_size += element.size()
|
157 |
+
self.contents = new_contents
|
158 |
+
|
159 |
+
def add(self, element):
|
160 |
+
"""Adds an element, merging with containers of the same type.
|
161 |
+
|
162 |
+
Returns:
|
163 |
+
Int, increased size of container.
|
164 |
+
"""
|
165 |
+
for content in self.contents:
|
166 |
+
if content.name == element.name:
|
167 |
+
if isinstance(content, container_leaf):
|
168 |
+
return content.merge(element)
|
169 |
+
print("Error, cannot merge leafs.")
|
170 |
+
return False
|
171 |
+
|
172 |
+
self.contents.append(element)
|
173 |
+
return True
|
174 |
+
|
175 |
+
def merge(self, element):
|
176 |
+
"""Merges structure with container.
|
177 |
+
|
178 |
+
Returns:
|
179 |
+
Int, increased size of container.
|
180 |
+
"""
|
181 |
+
assert(self.name == element.name)
|
182 |
+
assert(isinstance(element, container_box))
|
183 |
+
for sub_element in element.contents:
|
184 |
+
if not self.add(sub_element):
|
185 |
+
return False
|
186 |
+
|
187 |
+
return True
|
188 |
+
|
189 |
+
def save(self, in_fh, out_fh, delta):
|
190 |
+
"""Saves box to out_fh reading uncached content from in_fh.
|
191 |
+
|
192 |
+
Args:
|
193 |
+
in_fh: file handle, source of uncached file contents.
|
194 |
+
out_fh: file_hande, destination for saved file.
|
195 |
+
delta: int, file change size for updating stco and co64 files.
|
196 |
+
"""
|
197 |
+
if self.header_size == 16:
|
198 |
+
out_fh.write(struct.pack(">I", 1))
|
199 |
+
out_fh.write(self.name)
|
200 |
+
out_fh.write(struct.pack(">Q", self.size()))
|
201 |
+
elif self.header_size == 8:
|
202 |
+
out_fh.write(struct.pack(">I", self.size()))
|
203 |
+
out_fh.write(self.name)
|
204 |
+
|
205 |
+
if self.padding > 0:
|
206 |
+
in_fh.seek(self.content_start())
|
207 |
+
box.tag_copy(in_fh, out_fh, self.padding)
|
208 |
+
|
209 |
+
for element in self.contents:
|
210 |
+
element.save(in_fh, out_fh, delta)
|
spatialmedia/mpeg/mpeg4_container.py
ADDED
@@ -0,0 +1,133 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
1 |
+
#! /usr/bin/env python
|
2 |
+
# -*- coding: utf-8 -*-
|
3 |
+
|
4 |
+
# Copyright 2016 Google Inc. All rights reserved.
|
5 |
+
#
|
6 |
+
# Licensed under the Apache License, Version 2.0 (the "License");
|
7 |
+
# you may not use this file except in compliance with the License.
|
8 |
+
# You may obtain a copy of the License at
|
9 |
+
#
|
10 |
+
# http://www.apache.org/licenses/LICENSE-2.0
|
11 |
+
#
|
12 |
+
# Unless required by applicable law or agreed to in writing, software
|
13 |
+
# distributed under the License is distributed on an "AS IS" BASIS,
|
14 |
+
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
|
15 |
+
# See the License for the specific language governing permissions and
|
16 |
+
# limitations under the License.
|
17 |
+
|
18 |
+
"""MPEG4 processing classes.
|
19 |
+
|
20 |
+
Functions for loading MP4/MOV files and manipulating boxes.
|
21 |
+
"""
|
22 |
+
|
23 |
+
from spatialmedia.mpeg import box
|
24 |
+
from spatialmedia.mpeg import constants
|
25 |
+
from spatialmedia.mpeg import container
|
26 |
+
|
27 |
+
|
28 |
+
def load(fh):
|
29 |
+
"""Load the mpeg4 file structure of a file.
|
30 |
+
|
31 |
+
Args:
|
32 |
+
fh: file handle, input file handle.
|
33 |
+
position: int, current file position.
|
34 |
+
size: int, maximum size. This is used to ensure correct box sizes.
|
35 |
+
|
36 |
+
return:
|
37 |
+
mpeg4, the loaded mpeg4 structure.
|
38 |
+
"""
|
39 |
+
|
40 |
+
fh.seek(0, 2)
|
41 |
+
size = fh.tell()
|
42 |
+
contents = container.load_multiple(fh, 0, size)
|
43 |
+
|
44 |
+
if not contents:
|
45 |
+
print("Error, failed to load .mp4 file.")
|
46 |
+
return None
|
47 |
+
elif len(contents) == 0:
|
48 |
+
print("Error, no boxes found.")
|
49 |
+
return None
|
50 |
+
|
51 |
+
loaded_mpeg4 = Mpeg4Container()
|
52 |
+
loaded_mpeg4.contents = contents
|
53 |
+
|
54 |
+
for element in loaded_mpeg4.contents:
|
55 |
+
if (element.name == constants.TAG_MOOV):
|
56 |
+
loaded_mpeg4.moov_box = element
|
57 |
+
if (element.name == constants.TAG_FREE):
|
58 |
+
loaded_mpeg4.free_box = element
|
59 |
+
if (element.name == constants.TAG_MDAT
|
60 |
+
and not loaded_mpeg4.first_mdat_box):
|
61 |
+
loaded_mpeg4.first_mdat_box = element
|
62 |
+
if (element.name == constants.TAG_FTYP):
|
63 |
+
loaded_mpeg4.ftyp_box = element
|
64 |
+
|
65 |
+
if not loaded_mpeg4.moov_box:
|
66 |
+
print("Error, file does not contain moov box.")
|
67 |
+
return None
|
68 |
+
|
69 |
+
if not loaded_mpeg4.first_mdat_box:
|
70 |
+
print("Error, file does not contain mdat box.")
|
71 |
+
return None
|
72 |
+
|
73 |
+
loaded_mpeg4.first_mdat_position = \
|
74 |
+
loaded_mpeg4.first_mdat_box.position
|
75 |
+
loaded_mpeg4.first_mdat_position += \
|
76 |
+
loaded_mpeg4.first_mdat_box.header_size
|
77 |
+
|
78 |
+
loaded_mpeg4.content_size = 0
|
79 |
+
for element in loaded_mpeg4.contents:
|
80 |
+
loaded_mpeg4.content_size += element.size()
|
81 |
+
|
82 |
+
return loaded_mpeg4
|
83 |
+
|
84 |
+
|
85 |
+
class Mpeg4Container(container.Container):
|
86 |
+
"""Specialized behaviour for the root mpeg4 container."""
|
87 |
+
|
88 |
+
def __init__(self):
|
89 |
+
self.contents = list()
|
90 |
+
self.content_size = 0
|
91 |
+
self.header_size = 0
|
92 |
+
self.moov_box = None
|
93 |
+
self.free_box = None
|
94 |
+
self.first_mdat_box = None
|
95 |
+
self.ftyp_box = None
|
96 |
+
self.first_mdat_position = None
|
97 |
+
self.padding = 0
|
98 |
+
|
99 |
+
def merge(self, element):
|
100 |
+
"""Mpeg4 containers do not support merging."""
|
101 |
+
print("Cannot merge mpeg4 files")
|
102 |
+
exit(0)
|
103 |
+
|
104 |
+
def print_structure(self):
|
105 |
+
"""Print mpeg4 file structure recursively."""
|
106 |
+
print("mpeg4 [{}]".format(self.content_size))
|
107 |
+
|
108 |
+
size = len(self.contents)
|
109 |
+
for i in range(size):
|
110 |
+
next_indent = " βββ"
|
111 |
+
if i == (size - 1):
|
112 |
+
next_indent = " βββ"
|
113 |
+
|
114 |
+
self.contents[i].print_structure(next_indent)
|
115 |
+
|
116 |
+
def save(self, in_fh, out_fh):
|
117 |
+
"""Save mpeg4 filecontent to file.
|
118 |
+
|
119 |
+
Args:
|
120 |
+
in_fh: file handle, source file handle for uncached contents.
|
121 |
+
out_fh: file handle, destination file hand for saved file.
|
122 |
+
"""
|
123 |
+
self.resize()
|
124 |
+
new_position = 0
|
125 |
+
for element in self.contents:
|
126 |
+
if element.name == constants.TAG_MDAT:
|
127 |
+
new_position += element.header_size
|
128 |
+
break
|
129 |
+
new_position += element.size()
|
130 |
+
delta = new_position - self.first_mdat_position
|
131 |
+
|
132 |
+
for element in self.contents:
|
133 |
+
element.save(in_fh, out_fh, delta)
|
spatialmedia/mpeg/sa3d.py
ADDED
@@ -0,0 +1,176 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
1 |
+
#! /usr/bin/env python
|
2 |
+
# -*- coding: utf-8 -*-
|
3 |
+
|
4 |
+
# Copyright 2016 Google Inc. All rights reserved.
|
5 |
+
#
|
6 |
+
# Licensed under the Apache License, Version 2.0 (the "License");
|
7 |
+
# you may not use this file except in compliance with the License.
|
8 |
+
# You may obtain a copy of the License at
|
9 |
+
#
|
10 |
+
# http://www.apache.org/licenses/LICENSE-2.0
|
11 |
+
#
|
12 |
+
# Unless required by applicable law or agreed to in writing, software
|
13 |
+
# distributed under the License is distributed on an "AS IS" BASIS,
|
14 |
+
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
|
15 |
+
# See the License for the specific language governing permissions and
|
16 |
+
# limitations under the License.
|
17 |
+
|
18 |
+
"""MPEG SA3D box processing classes.
|
19 |
+
|
20 |
+
Enables the injection of an SA3D MPEG-4. The SA3D box specification
|
21 |
+
conforms to that outlined in docs/spatial-audio-rfc.md
|
22 |
+
"""
|
23 |
+
|
24 |
+
import struct
|
25 |
+
|
26 |
+
from spatialmedia.mpeg import box
|
27 |
+
from spatialmedia.mpeg import constants
|
28 |
+
|
29 |
+
|
30 |
+
def load(fh, position=None, end=None):
|
31 |
+
""" Loads the SA3D box located at position in an mp4 file.
|
32 |
+
|
33 |
+
Args:
|
34 |
+
fh: file handle, input file handle.
|
35 |
+
position: int or None, current file position.
|
36 |
+
|
37 |
+
Returns:
|
38 |
+
new_box: box, SA3D box loaded from the file location or None.
|
39 |
+
"""
|
40 |
+
if position is None:
|
41 |
+
position = fh.tell()
|
42 |
+
|
43 |
+
fh.seek(position)
|
44 |
+
new_box = SA3DBox()
|
45 |
+
new_box.position = position
|
46 |
+
size = struct.unpack(">I", fh.read(4))[0]
|
47 |
+
name = fh.read(4)
|
48 |
+
|
49 |
+
if (name != constants.TAG_SA3D):
|
50 |
+
print("Error: box is not an SA3D box.")
|
51 |
+
return None
|
52 |
+
|
53 |
+
if (position + size > end):
|
54 |
+
print("Error: SA3D box size exceeds bounds.")
|
55 |
+
return None
|
56 |
+
|
57 |
+
new_box.content_size = size - new_box.header_size
|
58 |
+
new_box.version = struct.unpack(">B", fh.read(1))[0]
|
59 |
+
new_box.ambisonic_type = struct.unpack(">B", fh.read(1))[0]
|
60 |
+
new_box.head_locked_stereo = (new_box.ambisonic_type & int('10000000', 2) != 0)
|
61 |
+
new_box.ambisonic_type = new_box.ambisonic_type & int('01111111', 2)
|
62 |
+
new_box.ambisonic_order = struct.unpack(">I", fh.read(4))[0]
|
63 |
+
new_box.ambisonic_channel_ordering = struct.unpack(">B", fh.read(1))[0]
|
64 |
+
new_box.ambisonic_normalization = struct.unpack(">B", fh.read(1))[0]
|
65 |
+
new_box.num_channels = struct.unpack(">I", fh.read(4))[0]
|
66 |
+
for i in range(0, new_box.num_channels):
|
67 |
+
new_box.channel_map.append(
|
68 |
+
struct.unpack(">I", fh.read(4))[0])
|
69 |
+
return new_box
|
70 |
+
|
71 |
+
|
72 |
+
class SA3DBox(box.Box):
|
73 |
+
ambisonic_types = {'periphonic': 0}
|
74 |
+
ambisonic_orderings = {'ACN': 0}
|
75 |
+
ambisonic_normalizations = {'SN3D': 0}
|
76 |
+
|
77 |
+
def __init__(self):
|
78 |
+
box.Box.__init__(self)
|
79 |
+
self.name = constants.TAG_SA3D
|
80 |
+
self.header_size = 8
|
81 |
+
self.version = 0
|
82 |
+
self.ambisonic_type = 0
|
83 |
+
self.head_locked_stereo = False
|
84 |
+
self.ambisonic_order = 0
|
85 |
+
self.ambisonic_channel_ordering = 0
|
86 |
+
self.ambisonic_normalization = 0
|
87 |
+
self.num_channels = 0
|
88 |
+
self.channel_map = list()
|
89 |
+
|
90 |
+
@staticmethod
|
91 |
+
def create(num_channels, audio_metadata):
|
92 |
+
new_box = SA3DBox()
|
93 |
+
new_box.header_size = 8
|
94 |
+
new_box.name = constants.TAG_SA3D
|
95 |
+
new_box.version = 0 # uint8
|
96 |
+
new_box.content_size += 1 # uint8
|
97 |
+
new_box.ambisonic_type = SA3DBox.ambisonic_types[
|
98 |
+
audio_metadata["ambisonic_type"]]
|
99 |
+
new_box.head_locked_stereo = audio_metadata["head_locked_stereo"]
|
100 |
+
new_box.content_size += 1 # uint8
|
101 |
+
new_box.ambisonic_order = audio_metadata["ambisonic_order"]
|
102 |
+
new_box.content_size += 4 # uint32
|
103 |
+
new_box.ambisonic_channel_ordering = SA3DBox.ambisonic_orderings[
|
104 |
+
audio_metadata["ambisonic_channel_ordering"]]
|
105 |
+
new_box.content_size += 1 # uint8
|
106 |
+
new_box.ambisonic_normalization = SA3DBox.ambisonic_normalizations[
|
107 |
+
audio_metadata["ambisonic_normalization"]]
|
108 |
+
new_box.content_size += 1 # uint8
|
109 |
+
new_box.num_channels = num_channels
|
110 |
+
new_box.content_size += 4 # uint32
|
111 |
+
|
112 |
+
channel_map = audio_metadata["channel_map"]
|
113 |
+
for channel_element in channel_map:
|
114 |
+
new_box.channel_map.append(channel_element)
|
115 |
+
new_box.content_size += 4 # uint32
|
116 |
+
return new_box
|
117 |
+
|
118 |
+
def ambisonic_type_name(self):
|
119 |
+
return next((key for key,value in SA3DBox.ambisonic_types.items()
|
120 |
+
if value==self.ambisonic_type))
|
121 |
+
|
122 |
+
def ambisonic_channel_ordering_name(self):
|
123 |
+
return next((key for key,value in SA3DBox.ambisonic_orderings.items()
|
124 |
+
if value==self.ambisonic_channel_ordering))
|
125 |
+
|
126 |
+
def ambisonic_normalization_name(self):
|
127 |
+
return next((key for key,value in SA3DBox.ambisonic_normalizations.items()
|
128 |
+
if value==self.ambisonic_normalization))
|
129 |
+
|
130 |
+
def print_box(self, console):
|
131 |
+
""" Prints the contents of this spatial audio (SA3D) box to the
|
132 |
+
console.
|
133 |
+
"""
|
134 |
+
ambisonic_type = self.ambisonic_type_name()
|
135 |
+
channel_ordering = self.ambisonic_channel_ordering_name()
|
136 |
+
ambisonic_normalization = self.ambisonic_normalization_name()
|
137 |
+
console("\t\tAmbisonic Type: %s" % ambisonic_type)
|
138 |
+
console("\t\tContains Head-Locked Stereo: %r" % self.head_locked_stereo)
|
139 |
+
console("\t\tAmbisonic Order: %d" % self.ambisonic_order)
|
140 |
+
console("\t\tAmbisonic Channel Ordering: %s" % channel_ordering)
|
141 |
+
console("\t\tAmbisonic Normalization: %s" % ambisonic_normalization)
|
142 |
+
console("\t\tNumber of Channels: %d" % self.num_channels)
|
143 |
+
console("\t\tChannel Map: %s" % str(self.channel_map))
|
144 |
+
|
145 |
+
def get_metadata_string(self):
|
146 |
+
""" Outputs a concise single line audio metadata string. """
|
147 |
+
metadata = "%s, %s, %s, Order %d, %d Channel(s), Channel Map: %s" \
|
148 |
+
% (self.ambisonic_normalization_name(),\
|
149 |
+
self.ambisonic_channel_ordering_name(),\
|
150 |
+
self.ambisonic_type_name(),\
|
151 |
+
self.ambisonic_order,\
|
152 |
+
self.num_channels,\
|
153 |
+
str(self.channel_map))
|
154 |
+
return metadata
|
155 |
+
|
156 |
+
def save(self, in_fh, out_fh, delta):
|
157 |
+
if (self.header_size == 16):
|
158 |
+
out_fh.write(struct.pack(">I", 1))
|
159 |
+
out_fh.write(struct.pack(">Q", self.size()))
|
160 |
+
out_fh.write(self.name)
|
161 |
+
elif(self.header_size == 8):
|
162 |
+
out_fh.write(struct.pack(">I", self.size()))
|
163 |
+
out_fh.write(self.name)
|
164 |
+
|
165 |
+
ambisonic_type = (
|
166 |
+
self.ambisonic_type | int('10000000', 2) if
|
167 |
+
self.head_locked_stereo else self.ambisonic_type & int('01111111', 2))
|
168 |
+
out_fh.write(struct.pack(">B", self.version))
|
169 |
+
out_fh.write(struct.pack(">B", ambisonic_type))
|
170 |
+
out_fh.write(struct.pack(">I", self.ambisonic_order))
|
171 |
+
out_fh.write(struct.pack(">B", self.ambisonic_channel_ordering))
|
172 |
+
out_fh.write(struct.pack(">B", self.ambisonic_normalization))
|
173 |
+
out_fh.write(struct.pack(">I", self.num_channels))
|
174 |
+
for i in self.channel_map:
|
175 |
+
if (i != None):
|
176 |
+
out_fh.write(struct.pack(">I", int(i)))
|