Spaces:

SixOpen
/

360-video-injector

Sleeping

App Files Files Community

ender commited on May 18, 2024

Commit

a92043d

1 Parent(s): 52d4758

HF Ready

Browse files

Files changed (9) hide show

README.md +5 -5
app.py +70 -0
spatialmedia/metadata_utils.py +667 -0
spatialmedia/mpeg/__init__.py +31 -0
spatialmedia/mpeg/box.py +201 -0
spatialmedia/mpeg/constants.py +86 -0
spatialmedia/mpeg/container.py +210 -0
spatialmedia/mpeg/mpeg4_container.py +133 -0
spatialmedia/mpeg/sa3d.py +176 -0

README.md CHANGED Viewed

@@ -1,10 +1,10 @@
 ---
-title: 360 Video Injector
-emoji: 🌖
-colorFrom: pink
-colorTo: red
 sdk: gradio
-sdk_version: 4.31.4
 app_file: app.py
 pinned: false
 ---

 ---
+title: 360metadata
+emoji: 🏃
+colorFrom: blue
+colorTo: green
 sdk: gradio
+sdk_version: 4.29.0
 app_file: app.py
 pinned: false
 ---

app.py ADDED Viewed

	@@ -0,0 +1,70 @@

+import gradio as gr
+from spatialmedia import metadata_utils
+import os
+import shutil
+def console_print(message):
+    print(message)
+def inject_360_metadata(input_video, stereo_mode, spatial_audio, crop):
+    if input_video is None:
+        return None
+    base_dir = "processed_videos"
+    os.makedirs(base_dir, exist_ok=True)
+    input_video_path = os.path.join(base_dir, "input_video.mp4")
+    output_video_path = os.path.join(base_dir, "output_video.mp4")
+    with open(input_video_path, 'wb') as f:
+        f.write(input_video)
+    metadata = metadata_utils.Metadata()
+    metadata.video = metadata_utils.generate_spherical_xml(stereo_mode, crop)
+    if spatial_audio:
+        parsed_metadata = metadata_utils.parse_metadata(input_video_path, console_print)
+        if not metadata.audio:
+            spatial_audio_description = metadata_utils.get_spatial_audio_description(
+                parsed_metadata.num_audio_channels)
+            if spatial_audio_description.is_supported:
+                metadata.audio = metadata_utils.get_spatial_audio_metadata(
+                    spatial_audio_description.order,
+                    spatial_audio_description.has_head_locked_stereo)
+            else:
+                raise ValueError("Audio has %d channel(s) and isn't a supported spatial audio format." % (parsed_metadata.num_audio_channels))
+    if metadata.video:
+        metadata_utils.inject_metadata(input_video_path, output_video_path, metadata, console_print)
+        return output_video_path
+    else:
+        raise ValueError("Failed to generate metadata.")
+def update_output(output_file_path):
+    if output_file_path and os.path.exists(output_file_path):
+        return output_file_path
+    else:
+        return None
+def main():
+    with gr.Blocks(theme='JohnSmith9982/small_and_pretty') as demo:
+        gr.Markdown("This space adds appropriate metadata to 360° equirectangular videos so they can be recognized as such. The audio checkbox option is only viable if your video has spatial audio in ambiX ACN/SN3D with head-locked stereo")
+        with gr.Row():
+            video_input = gr.File(label="Select video file", type="binary")
+            stereo_dropdown = gr.Dropdown(choices=["none", "top-bottom", "left-right"], label="Stereo Mode")
+            spatial_audio_checkbox = gr.Checkbox(label="Spatial Audio")
+            crop_input = gr.Textbox(label="Crop Region (w:h:f_w:f_h:x:y)- Optional")
+            submit_btn = gr.Button("Inject 360° Metadata")
+        output_file = gr.File(label="Download Injected Video", type="filepath", visible=True)
+        submit_btn.click(
+            fn=inject_360_metadata,
+            inputs=[video_input, stereo_dropdown, spatial_audio_checkbox, crop_input],
+            outputs=output_file,
+            postprocess=update_output
+        )
+    demo.launch(share=True)
+if __name__ == "__main__":
+    main()

spatialmedia/metadata_utils.py ADDED Viewed

	@@ -0,0 +1,667 @@

+#! /usr/bin/env python
+# -*- coding: utf-8 -*-
+# Copyright 2016 Google Inc. All rights reserved.
+#
+# Licensed under the Apache License, Version 2.0 (the "License");
+# you may not use this file except in compliance with the License.
+# You may obtain a copy of the License at
+#
+#     http://www.apache.org/licenses/LICENSE-2.0
+#
+# Unless required by applicable law or agreed to in writing, software
+# distributed under the License is distributed on an "AS IS" BASIS,
+# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+# See the License for the specific language governing permissions and
+# limitations under the License.
+"""Utilities for examining/injecting spatial media metadata in MP4/MOV files."""
+import collections
+import os
+import re
+import struct
+import traceback
+import xml.etree
+import xml.etree.ElementTree
+from spatialmedia import mpeg
+MPEG_FILE_EXTENSIONS = [".mp4", ".mov"]
+SPHERICAL_UUID_ID = (
+    b"\xff\xcc\x82\x63\xf8\x55\x4a\x93\x88\x14\x58\x7a\x02\x52\x1f\xdd")
+# XML contents.
+RDF_PREFIX = " xmlns:rdf=\"http://www.w3.org/1999/02/22-rdf-syntax-ns#\" "
+SPHERICAL_XML_HEADER = \
+    "<?xml version=\"1.0\"?>"\
+    "<rdf:SphericalVideo\n"\
+    "xmlns:rdf=\"http://www.w3.org/1999/02/22-rdf-syntax-ns#\"\n"\
+    "xmlns:GSpherical=\"http://ns.google.com/videos/1.0/spherical/\">"
+SPHERICAL_XML_CONTENTS = \
+    "<GSpherical:Spherical>true</GSpherical:Spherical>"\
+    "<GSpherical:Stitched>true</GSpherical:Stitched>"\
+    "<GSpherical:StitchingSoftware>"\
+    "Spherical Metadata Tool"\
+    "</GSpherical:StitchingSoftware>"\
+    "<GSpherical:ProjectionType>equirectangular</GSpherical:ProjectionType>"
+SPHERICAL_XML_CONTENTS_TOP_BOTTOM = \
+    "<GSpherical:StereoMode>top-bottom</GSpherical:StereoMode>"
+SPHERICAL_XML_CONTENTS_LEFT_RIGHT = \
+    "<GSpherical:StereoMode>left-right</GSpherical:StereoMode>"
+# Parameter order matches that of the crop option.
+SPHERICAL_XML_CONTENTS_CROP_FORMAT = \
+    "<GSpherical:CroppedAreaImageWidthPixels>{0}"\
+    "</GSpherical:CroppedAreaImageWidthPixels>"\
+    "<GSpherical:CroppedAreaImageHeightPixels>{1}"\
+    "</GSpherical:CroppedAreaImageHeightPixels>"\
+    "<GSpherical:FullPanoWidthPixels>{2}</GSpherical:FullPanoWidthPixels>"\
+    "<GSpherical:FullPanoHeightPixels>{3}</GSpherical:FullPanoHeightPixels>"\
+    "<GSpherical:CroppedAreaLeftPixels>{4}</GSpherical:CroppedAreaLeftPixels>"\
+    "<GSpherical:CroppedAreaTopPixels>{5}</GSpherical:CroppedAreaTopPixels>"
+SPHERICAL_XML_FOOTER = "</rdf:SphericalVideo>"
+SPHERICAL_TAGS_LIST = [
+    "Spherical",
+    "Stitched",
+    "StitchingSoftware",
+    "ProjectionType",
+    "SourceCount",
+    "StereoMode",
+    "InitialViewHeadingDegrees",
+    "InitialViewPitchDegrees",
+    "InitialViewRollDegrees",
+    "Timestamp",
+    "CroppedAreaImageWidthPixels",
+    "CroppedAreaImageHeightPixels",
+    "FullPanoWidthPixels",
+    "FullPanoHeightPixels",
+    "CroppedAreaLeftPixels",
+    "CroppedAreaTopPixels",
+]
+class Metadata(object):
+    def __init__(self):
+        self.video = None
+        self.audio = None
+class ParsedMetadata(object):
+    def __init__(self):
+        self.video = dict()
+        self.audio = None
+        self.num_audio_channels = 0
+SPHERICAL_PREFIX = "{http://ns.google.com/videos/1.0/spherical/}"
+SPHERICAL_TAGS = dict()
+for tag in SPHERICAL_TAGS_LIST:
+    SPHERICAL_TAGS[SPHERICAL_PREFIX + tag] = tag
+integer_regex_group = "(\d+)"
+crop_regex = "^{0}$".format(":".join([integer_regex_group] * 6))
+MAX_SUPPORTED_AMBIX_ORDER = 1
+SpatialAudioDescription = collections.namedtuple(
+    'SpatialAudioDescription',
+    'order is_supported has_head_locked_stereo')
+def get_spatial_audio_description(num_channels):
+  for i in range(1, MAX_SUPPORTED_AMBIX_ORDER+1):
+    if (i + 1)*(i + 1) == num_channels:
+      return SpatialAudioDescription(
+          order=i, is_supported=True, has_head_locked_stereo=False)
+    elif ((i + 1)*(i + 1) + 2) == num_channels:
+      return SpatialAudioDescription(
+          order=i, is_supported=True, has_head_locked_stereo=True)
+  return SpatialAudioDescription(
+      order=-1, is_supported=False, has_head_locked_stereo=True)
+def spherical_uuid(metadata):
+    """Constructs a uuid containing spherical metadata.
+    Args:
+      metadata: String, xml to inject in spherical tag.
+    Returns:
+      uuid_leaf: a box containing spherical metadata.
+    """
+    uuid_leaf = mpeg.Box()
+    assert(len(SPHERICAL_UUID_ID) == 16)
+    uuid_leaf.name = mpeg.constants.TAG_UUID
+    uuid_leaf.header_size = 8
+    uuid_leaf.content_size = 0
+    uuid_leaf.contents = SPHERICAL_UUID_ID + metadata.encode("utf-8")
+    uuid_leaf.content_size = len(uuid_leaf.contents)
+    return uuid_leaf
+def mpeg4_add_spherical(mpeg4_file, in_fh, metadata):
+    """Adds a spherical uuid box to an mpeg4 file for all video tracks.
+    Args:
+      mpeg4_file: mpeg4, Mpeg4 file structure to add metadata.
+      in_fh: file handle, Source for uncached file contents.
+      metadata: string, xml metadata to inject into spherical tag.
+    """
+    for element in mpeg4_file.moov_box.contents:
+        if element.name == mpeg.constants.TAG_TRAK:
+            added = False
+            element.remove(mpeg.constants.TAG_UUID)
+            for sub_element in element.contents:
+                if sub_element.name != mpeg.constants.TAG_MDIA:
+                    continue
+                for mdia_sub_element in sub_element.contents:
+                    if mdia_sub_element.name != mpeg.constants.TAG_HDLR:
+                        continue
+                    position = mdia_sub_element.content_start() + 8
+                    in_fh.seek(position)
+                    if in_fh.read(4) == mpeg.constants.TRAK_TYPE_VIDE:
+                        added = True
+                        break
+                if added:
+                    if not element.add(spherical_uuid(metadata)):
+                        return False
+                    break
+    mpeg4_file.resize()
+    return True
+def mpeg4_add_spatial_audio(mpeg4_file, in_fh, audio_metadata, console):
+    """Adds spatial audio metadata to the first audio track of the input
+       mpeg4_file. Returns False on failure.
+    Args:
+      mpeg4_file: mpeg4, Mpeg4 file structure to add metadata.
+      in_fh: file handle, Source for uncached file contents.
+      audio_metadata: dictionary ('ambisonic_type': string,
+      'ambisonic_order': int, 'head_locked_stereo': Bool),
+      Supports 'periphonic' ambisonic type only.
+    """
+    for element in mpeg4_file.moov_box.contents:
+        if element.name == mpeg.constants.TAG_TRAK:
+            for sub_element in element.contents:
+                if sub_element.name != mpeg.constants.TAG_MDIA:
+                    continue
+                for mdia_sub_element in sub_element.contents:
+                    if mdia_sub_element.name != mpeg.constants.TAG_HDLR:
+                        continue
+                    position = mdia_sub_element.content_start() + 8
+                    in_fh.seek(position)
+                    if in_fh.read(4) == mpeg.constants.TAG_SOUN:
+                        return inject_spatial_audio_atom(
+                            in_fh, sub_element, audio_metadata, console)
+    return True
+def mpeg4_add_audio_metadata(mpeg4_file, in_fh, audio_metadata, console):
+    num_audio_tracks = get_num_audio_tracks(mpeg4_file, in_fh)
+    if num_audio_tracks > 1:
+        console("Error: Expected 1 audio track. Found %d" % num_audio_tracks)
+        return False
+    return mpeg4_add_spatial_audio(mpeg4_file, in_fh, audio_metadata, console)
+def inject_spatial_audio_atom(
+    in_fh, audio_media_atom, audio_metadata, console):
+    for atom in audio_media_atom.contents:
+        if atom.name != mpeg.constants.TAG_MINF:
+            continue
+        for element in atom.contents:
+            if element.name != mpeg.constants.TAG_STBL:
+                continue
+            for sub_element in element.contents:
+                if sub_element.name != mpeg.constants.TAG_STSD:
+                    continue
+                for sample_description in sub_element.contents:
+                    if sample_description.name in\
+                            mpeg.constants.SOUND_SAMPLE_DESCRIPTIONS:
+                        in_fh.seek(sample_description.position +
+                                   sample_description.header_size + 16)
+                        num_channels = get_num_audio_channels(
+                            sub_element, in_fh)
+                        expected_num_channels = \
+                            get_expected_num_audio_channels(
+                                audio_metadata["ambisonic_type"],
+                                audio_metadata["ambisonic_order"],
+                                audio_metadata["head_locked_stereo"])
+                        if num_channels != expected_num_channels:
+                            head_locked_stereo_msg = (" with head-locked stereo" if
+                                            audio_metadata["head_locked_stereo"] else "")
+                            err_msg = "Error: Found %d audio channel(s). "\
+                                  "Expected %d channel(s) for %s ambisonics "\
+                                  "of order %d%s."\
+                                % (num_channels,
+                                   expected_num_channels,
+                                   audio_metadata["ambisonic_type"],
+                                   audio_metadata["ambisonic_order"],
+                                   head_locked_stereo_msg)
+                            console(err_msg)
+                            return False
+                        sa3d_atom = mpeg.SA3DBox.create(
+                            num_channels, audio_metadata)
+                        sample_description.contents.append(sa3d_atom)
+    return True
+def parse_spherical_xml(contents, console):
+    """Returns spherical metadata for a set of xml data.
+    Args:
+      contents: string, spherical metadata xml contents.
+    Returns:
+      dictionary containing the parsed spherical metadata values.
+    """
+    try:
+        parsed_xml = xml.etree.ElementTree.XML(contents)
+    except xml.etree.ElementTree.ParseError:
+        try:
+            console(traceback.format_exc())
+            console(contents)
+            index = contents.find("<rdf:SphericalVideo")
+            if index != -1:
+                index += len("<rdf:SphericalVideo")
+                contents = contents[:index] + RDF_PREFIX + contents[index:]
+            parsed_xml = xml.etree.ElementTree.XML(contents)
+            console("\t\tWarning missing rdf prefix:", RDF_PREFIX)
+        except xml.etree.ElementTree.ParseError as e:
+            console("\t\tParser Error on XML")
+            console(traceback.format_exc())
+            console(contents)
+            return
+    sphericalDictionary = dict()
+    for child in list(parsed_xml):
+        if child.tag in SPHERICAL_TAGS.keys():
+            console("\t\t" + SPHERICAL_TAGS[child.tag]
+                    + " = " + child.text)
+            sphericalDictionary[SPHERICAL_TAGS[child.tag]] = child.text
+        else:
+            tag = child.tag
+            if child.tag[:len(spherical_prefix)] == spherical_prefix:
+                tag = child.tag[len(spherical_prefix):]
+            console("\t\tUnknown: " + tag + " = " + child.text)
+    return sphericalDictionary
+def parse_spherical_mpeg4(mpeg4_file, fh, console):
+    """Returns spherical metadata for a loaded mpeg4 file.
+    Args:
+      mpeg4_file: mpeg4, loaded mpeg4 file contents.
+      fh: file handle, file handle for uncached file contents.
+    Returns:
+      Dictionary stored as (trackName, metadataDictionary)
+    """
+    metadata = ParsedMetadata()
+    track_num = 0
+    for element in mpeg4_file.moov_box.contents:
+        if element.name == mpeg.constants.TAG_TRAK:
+            trackName = "Track %d" % track_num
+            console("\t%s" % trackName)
+            track_num += 1
+            for sub_element in element.contents:
+                if sub_element.name == mpeg.constants.TAG_UUID:
+                    if sub_element.contents:
+                        sub_element_id = sub_element.contents[:16]
+                    else:
+                        fh.seek(sub_element.content_start())
+                        sub_element_id = fh.read(16)
+                    if sub_element_id == SPHERICAL_UUID_ID:
+                        if sub_element.contents:
+                            contents = sub_element.contents[16:]
+                        else:
+                            contents = fh.read(sub_element.content_size - 16)
+                        metadata.video[trackName] = \
+                            parse_spherical_xml(contents.decode("utf-8"), console)
+                if sub_element.name == mpeg.constants.TAG_MDIA:
+                    for mdia_sub_element in sub_element.contents:
+                        if mdia_sub_element.name != mpeg.constants.TAG_MINF:
+                            continue
+                        for stbl_elem in mdia_sub_element.contents:
+                            if stbl_elem.name != mpeg.constants.TAG_STBL:
+                                continue
+                            for stsd_elem in stbl_elem.contents:
+                                if stsd_elem.name != mpeg.constants.TAG_STSD:
+                                    continue
+                                for sa3d_container_elem in stsd_elem.contents:
+                                    if sa3d_container_elem.name not in \
+                                            mpeg.constants.SOUND_SAMPLE_DESCRIPTIONS:
+                                        continue
+                                    metadata.num_audio_channels = \
+                                        get_num_audio_channels(stsd_elem, fh)
+                                    for sa3d_elem in sa3d_container_elem.contents:
+                                        if sa3d_elem.name == mpeg.constants.TAG_SA3D:
+                                            sa3d_elem.print_box(console)
+                                            metadata.audio = sa3d_elem
+    return metadata
+def parse_mpeg4(input_file, console):
+    with open(input_file, "rb") as in_fh:
+        mpeg4_file = mpeg.load(in_fh)
+        if mpeg4_file is None:
+            console("Error, file could not be opened.")
+            return
+        console("Loaded file...")
+        return parse_spherical_mpeg4(mpeg4_file, in_fh, console)
+    console("Error \"" + input_file + "\" does not exist or do not have "
+            "permission.")
+def inject_mpeg4(input_file, output_file, metadata, console):
+    with open(input_file, "rb") as in_fh:
+        mpeg4_file = mpeg.load(in_fh)
+        if mpeg4_file is None:
+            console("Error file could not be opened.")
+        if not mpeg4_add_spherical(mpeg4_file, in_fh, metadata.video):
+            console("Error failed to insert spherical data")
+        if metadata.audio:
+            if not mpeg4_add_audio_metadata(
+                mpeg4_file, in_fh, metadata.audio, console):
+                    console("Error failed to insert spatial audio data")
+        console("Saved file settings")
+        parse_spherical_mpeg4(mpeg4_file, in_fh, console)
+        with open(output_file, "wb") as out_fh:
+            mpeg4_file.save(in_fh, out_fh)
+        return
+    console("Error file: \"" + input_file + "\" does not exist or do not have "
+            "permission.")
+def parse_metadata(src, console):
+    infile = os.path.abspath(src)
+    try:
+        in_fh = open(infile, "rb")
+        in_fh.close()
+    except:
+        console("Error: " + infile +
+                " does not exist or we do not have permission")
+    console("Processing: " + infile)
+    extension = os.path.splitext(infile)[1].lower()
+    if extension in MPEG_FILE_EXTENSIONS:
+        return parse_mpeg4(infile, console)
+    console("Unknown file type")
+    return None
+def inject_metadata(src, dest, metadata, console):
+    infile = os.path.abspath(src)
+    outfile = os.path.abspath(dest)
+    if infile == outfile:
+        return "Input and output cannot be the same"
+    try:
+        in_fh = open(infile, "rb")
+        in_fh.close()
+    except:
+        console("Error: " + infile +
+                " does not exist or we do not have permission")
+        return
+    console("Processing: " + infile)
+    extension = os.path.splitext(infile)[1].lower()
+    if (extension in MPEG_FILE_EXTENSIONS):
+        inject_mpeg4(infile, outfile, metadata, console)
+        return
+    console("Unknown file type")
+def generate_spherical_xml(stereo=None, crop=None):
+    # Configure inject xml.
+    additional_xml = ""
+    if stereo == "top-bottom":
+        additional_xml += SPHERICAL_XML_CONTENTS_TOP_BOTTOM
+    if stereo == "left-right":
+        additional_xml += SPHERICAL_XML_CONTENTS_LEFT_RIGHT
+    if crop:
+        crop_match = re.match(crop_regex, crop)
+        if not crop_match:
+            print("Error: Invalid crop params: {crop}".format(crop=crop))
+            return False
+        else:
+            cropped_width_pixels = int(crop_match.group(1))
+            cropped_height_pixels = int(crop_match.group(2))
+            full_width_pixels = int(crop_match.group(3))
+            full_height_pixels = int(crop_match.group(4))
+            cropped_offset_left_pixels = int(crop_match.group(5))
+            cropped_offset_top_pixels = int(crop_match.group(6))
+            # This should never happen based on the crop regex.
+            if full_width_pixels <= 0 or full_height_pixels <= 0:
+                print("Error with crop params: full pano dimensions are "\
+                        "invalid: width = {width} height = {height}".format(
+                            width=full_width_pixels,
+                            height=full_height_pixels))
+                return False
+            if (cropped_width_pixels <= 0 or
+                    cropped_height_pixels <= 0 or
+                    cropped_width_pixels > full_width_pixels or
+                    cropped_height_pixels > full_height_pixels):
+                print("Error with crop params: cropped area dimensions are "\
+                        "invalid: width = {width} height = {height}".format(
+                            width=cropped_width_pixels,
+                            height=cropped_height_pixels))
+                return False
+            # We are pretty restrictive and don't allow anything strange. There
+            # could be use-cases for a horizontal offset that essentially
+            # translates the domain, but we don't support this (so that no
+            # extra work has to be done on the client).
+            total_width = cropped_offset_left_pixels + cropped_width_pixels
+            total_height = cropped_offset_top_pixels + cropped_height_pixels
+            if (cropped_offset_left_pixels < 0 or
+                    cropped_offset_top_pixels < 0 or
+                    total_width > full_width_pixels or
+                    total_height > full_height_pixels):
+                    print("Error with crop params: cropped area offsets are "\
+                            "invalid: left = {left} top = {top} "\
+                            "left+cropped width: {total_width} "\
+                            "top+cropped height: {total_height}".format(
+                                left=cropped_offset_left_pixels,
+                                top=cropped_offset_top_pixels,
+                                total_width=total_width,
+                                total_height=total_height))
+                    return False
+            additional_xml += SPHERICAL_XML_CONTENTS_CROP_FORMAT.format(
+                cropped_width_pixels, cropped_height_pixels,
+                full_width_pixels, full_height_pixels,
+                cropped_offset_left_pixels, cropped_offset_top_pixels)
+    spherical_xml = (SPHERICAL_XML_HEADER +
+                     SPHERICAL_XML_CONTENTS +
+                     additional_xml +
+                     SPHERICAL_XML_FOOTER)
+    return spherical_xml
+def get_descriptor_length(in_fh):
+    """Derives the length of the MP4 elementary stream descriptor at the
+       current position in the input file.
+    """
+    descriptor_length = 0
+    for i in range(4):
+        size_byte = struct.unpack(">c", in_fh.read(1))[0]
+        descriptor_length = (descriptor_length << 7 |
+                             ord(size_byte) & int("0x7f", 0))
+        if (ord(size_byte) != int("0x80", 0)):
+            break
+    return descriptor_length
+def get_expected_num_audio_channels(
+    ambisonics_type, ambisonics_order, head_locked_stereo):
+    """ Returns the expected number of ambisonic components for a given
+        ambisonic type and ambisonic order.
+    """
+    head_locked_stereo_channels = 2 if head_locked_stereo == True else 0
+    if (ambisonics_type == 'periphonic'):
+        return (((ambisonics_order + 1) * (ambisonics_order + 1)) +
+                head_locked_stereo_channels)
+    else:
+        return -1
+def get_num_audio_channels(stsd, in_fh):
+    if stsd.name != mpeg.constants.TAG_STSD:
+        print("get_num_audio_channels should be given a STSD box")
+        return -1
+    for sample_description in stsd.contents:
+        if sample_description.name == mpeg.constants.TAG_MP4A:
+            return get_aac_num_channels(sample_description, in_fh)
+        elif sample_description.name in mpeg.constants.SOUND_SAMPLE_DESCRIPTIONS:
+            return get_sample_description_num_channels(sample_description, in_fh)
+    return -1
+def get_sample_description_num_channels(sample_description, in_fh):
+    """Reads the number of audio channels from a sound sample description.
+    """
+    p = in_fh.tell()
+    in_fh.seek(sample_description.content_start() + 8)
+    version = struct.unpack(">h", in_fh.read(2))[0]
+    revision_level = struct.unpack(">h", in_fh.read(2))[0]
+    vendor = struct.unpack(">i", in_fh.read(4))[0]
+    if version == 0:
+        num_audio_channels = struct.unpack(">h", in_fh.read(2))[0]
+        sample_size_bytes = struct.unpack(">h", in_fh.read(2))[0]
+    elif version == 1:
+        num_audio_channels = struct.unpack(">h", in_fh.read(2))[0]
+        sample_size_bytes = struct.unpack(">h", in_fh.read(2))[0]
+        samples_per_packet = struct.unpack(">i", in_fh.read(4))[0]
+        bytes_per_packet = struct.unpack(">i", in_fh.read(4))[0]
+        bytes_per_frame = struct.unpack(">i", in_fh.read(4))[0]
+        bytes_per_sample = struct.unpack(">i", in_fh.read(4))[0]
+    elif version == 2:
+        always_3 = struct.unpack(">h", in_fh.read(2))[0]
+        always_16 = struct.unpack(">h", in_fh.read(2))[0]
+        always_minus_2 = struct.unpack(">h", in_fh.read(2))[0]
+        always_0 = struct.unpack(">h", in_fh.read(2))[0]
+        always_65536 = struct.unpack(">i", in_fh.read(4))[0]
+        size_of_struct_only = struct.unpack(">i", in_fh.read(4))[0]
+        audio_sample_rate = struct.unpack(">d", in_fh.read(8))[0]
+        num_audio_channels = struct.unpack(">i", in_fh.read(4))[0]
+    else:
+        print("Unsupported version for " + sample_description.name + " box")
+        return -1
+    in_fh.seek(p)
+    return num_audio_channels
+def get_aac_num_channels(box, in_fh):
+    """Reads the number of audio channels from AAC's AudioSpecificConfig
+       descriptor within the esds child box of the input mp4a or wave box.
+    """
+    p = in_fh.tell()
+    if box.name not in [mpeg.constants.TAG_MP4A, mpeg.constants.TAG_WAVE]:
+        return -1
+    for element in box.contents:
+        if element.name == mpeg.constants.TAG_WAVE:
+            # Handle .mov with AAC audio, where the structure is:
+            #     stsd -> mp4a -> wave -> esds
+            channel_configuration = get_aac_num_channels(element, in_fh)
+            break
+        if element.name != mpeg.constants.TAG_ESDS:
+          continue
+        in_fh.seek(element.content_start() + 4)
+        descriptor_tag = struct.unpack(">c", in_fh.read(1))[0]
+        # Verify the read descriptor is an elementary stream descriptor
+        if ord(descriptor_tag) != 3:  # Not an MP4 elementary stream.
+            print("Error: failed to read elementary stream descriptor.")
+            return -1
+        get_descriptor_length(in_fh)
+        in_fh.seek(3, 1)  # Seek to the decoder configuration descriptor
+        config_descriptor_tag = struct.unpack(">c", in_fh.read(1))[0]
+        # Verify the read descriptor is a decoder config. descriptor.
+        if ord(config_descriptor_tag) != 4:
+            print("Error: failed to read decoder config. descriptor.")
+            return -1
+        get_descriptor_length(in_fh)
+        in_fh.seek(13, 1) # offset to the decoder specific config descriptor.
+        decoder_specific_descriptor_tag = struct.unpack(">c", in_fh.read(1))[0]
+        # Verify the read descriptor is a decoder specific info descriptor
+        if ord(decoder_specific_descriptor_tag) != 5:
+            print("Error: failed to read MP4 audio decoder specific config.")
+            return -1
+        audio_specific_descriptor_size = get_descriptor_length(in_fh)
+        assert audio_specific_descriptor_size >= 2
+        decoder_descriptor = struct.unpack(">h", in_fh.read(2))[0]
+        object_type = (int("F800", 16) & decoder_descriptor) >> 11
+        sampling_frequency_index = (int("0780", 16) & decoder_descriptor) >> 7
+        if sampling_frequency_index == 0:
+            # TODO: If the sample rate is 96kHz an additional 24 bit offset
+            # value here specifies the actual sample rate.
+            print("Error: Greater than 48khz audio is currently not supported.")
+            return -1
+        channel_configuration = (int("0078", 16) & decoder_descriptor) >> 3
+    in_fh.seek(p)
+    return channel_configuration
+def get_num_audio_tracks(mpeg4_file, in_fh):
+    """ Returns the number of audio track in the input mpeg4 file. """
+    num_audio_tracks = 0
+    for element in mpeg4_file.moov_box.contents:
+        if (element.name == mpeg.constants.TAG_TRAK):
+            for sub_element in element.contents:
+                if (sub_element.name != mpeg.constants.TAG_MDIA):
+                    continue
+                for mdia_sub_element in sub_element.contents:
+                    if (mdia_sub_element.name != mpeg.constants.TAG_HDLR):
+                        continue
+                    position = mdia_sub_element.content_start() + 8
+                    in_fh.seek(position)
+                    if (in_fh.read(4) == mpeg.constants.TAG_SOUN):
+                        num_audio_tracks += 1
+    return num_audio_tracks
+def get_spatial_audio_metadata(ambisonic_order, head_locked_stereo):
+    num_channels = get_expected_num_audio_channels(
+        "periphonic", ambisonic_order, head_locked_stereo)
+    metadata = {
+        "ambisonic_order": 0,
+        "head_locked_stereo": False,
+        "ambisonic_type": "periphonic",
+        "ambisonic_channel_ordering": "ACN",
+        "ambisonic_normalization": "SN3D",
+        "channel_map": [],
+    }
+    metadata['ambisonic_order'] = ambisonic_order
+    metadata['head_locked_stereo'] = head_locked_stereo
+    metadata['channel_map'] = range(0, num_channels)
+    return metadata

spatialmedia/mpeg/__init__.py ADDED Viewed

	@@ -0,0 +1,31 @@

+#! /usr/bin/env python
+# -*- coding: utf-8 -*-
+# Copyright 2016 Google Inc. All rights reserved.
+#
+# Licensed under the Apache License, Version 2.0 (the "License");
+# you may not use this file except in compliance with the License.
+# You may obtain a copy of the License at
+#
+#     http://www.apache.org/licenses/LICENSE-2.0
+#
+# Unless required by applicable law or agreed to in writing, software
+# distributed under the License is distributed on an "AS IS" BASIS,
+# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+# See the License for the specific language governing permissions and
+# limitations under the License.
+import spatialmedia.mpeg.sa3d
+import spatialmedia.mpeg.box
+import spatialmedia.mpeg.constants
+import spatialmedia.mpeg.container
+import spatialmedia.mpeg.mpeg4_container
+load = mpeg4_container.load
+Box = box.Box
+SA3DBox = sa3d.SA3DBox
+Container = container.Container
+Mpeg4Container = mpeg4_container.Mpeg4Container
+__all__ = ["box", "mpeg4", "container", "constants", "sa3d"]

spatialmedia/mpeg/box.py ADDED Viewed

	@@ -0,0 +1,201 @@

+#! /usr/bin/env python
+# -*- coding: utf-8 -*-
+# Copyright 2016 Google Inc. All rights reserved.
+#
+# Licensed under the Apache License, Version 2.0 (the "License");
+# you may not use this file except in compliance with the License.
+# You may obtain a copy of the License at
+#
+#     http://www.apache.org/licenses/LICENSE-2.0
+#
+# Unless required by applicable law or agreed to in writing, software
+# distributed under the License is distributed on an "AS IS" BASIS,
+# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+# See the License for the specific language governing permissions and
+# limitations under the License.
+"""MPEG processing classes.
+Tool for loading mpeg4 files and manipulating atoms.
+"""
+import io
+import struct
+from spatialmedia.mpeg import constants
+def load(fh, position, end):
+    """Loads the box located at a position in a mp4 file.
+    Args:
+      fh: file handle, input file handle.
+      position: int or None, current file position.
+    Returns:
+      box: box, box from loaded file location or None.
+    """
+    if position is None:
+        position = fh.tell()
+    fh.seek(position)
+    header_size = 8
+    size = struct.unpack(">I", fh.read(4))[0]
+    name = fh.read(4)
+    if size == 1:
+        size = struct.unpack(">Q", fh.read(8))[0]
+        header_size = 16
+    if size < 8:
+        print("Error, invalid size {} in {} at {}".format(size, name, position))
+        return None
+    if (position + size) > end:
+        print("Error: Leaf box size exceeds bounds.")
+        return None
+    new_box = Box()
+    new_box.name = name
+    new_box.position = position
+    new_box.header_size = header_size
+    new_box.content_size = size - header_size
+    new_box.contents = None
+    return new_box
+class Box(object):
+    """MPEG4 box contents and behaviour true for all boxes."""
+    def __init__(self):
+        self.name = ""
+        self.position = 0
+        self.header_size = 0
+        self.content_size = 0
+        self.contents = None
+    def content_start(self):
+        return self.position + self.header_size
+    def save(self, in_fh, out_fh, delta):
+        """Save box contents prioritizing set contents.
+        Args:
+          in_fh: file handle, source to read box contents from.
+          out_fh: file handle, destination for written box contents.
+          delta: int, index update amount.
+        """
+        if self.header_size == 16:
+            out_fh.write(struct.pack(">I", 1))
+            out_fh.write(self.name)
+            out_fh.write(struct.pack(">Q", self.size()))
+        elif self.header_size == 8:
+            out_fh.write(struct.pack(">I", self.size()))
+            out_fh.write(self.name)
+        if self.content_start():
+            in_fh.seek(self.content_start())
+        if self.name == constants.TAG_STCO:
+            stco_copy(in_fh, out_fh, self, delta)
+        elif self.name == constants.TAG_CO64:
+            co64_copy(in_fh, out_fh, self, delta)
+        elif self.contents:
+            out_fh.write(self.contents)
+        else:
+            tag_copy(in_fh, out_fh, self.content_size)
+    def set(self, new_contents):
+        """Sets / overwrites the box contents."""
+        self.contents = new_contents
+        self.content_size = len(contents)
+    def size(self):
+        """Total size of a box.
+        Returns:
+          Int, total size in bytes of the box.
+        """
+        return self.header_size + self.content_size
+    def print_structure(self, indent=""):
+        """Prints the box structure."""
+        size1 = self.header_size
+        size2 = self.content_size
+        print("{0} {1} [{2}, {3}]".format(indent, self.name, size1, size2))
+def tag_copy(in_fh, out_fh, size):
+    """Copies a block of data from in_fh to out_fh.
+    Args:
+      in_fh: file handle, source of uncached file contents.
+      out_fh: file handle, destination for saved file.
+      size: int, amount of data to copy.
+    """
+    # On 32-bit systems reading / writing is limited to 2GB chunks.
+    # To prevent overflow, read/write 64 MB chunks.
+    block_size = 64 * 1024 * 1024
+    while (size > block_size):
+        contents = in_fh.read(block_size)
+        out_fh.write(contents)
+        size = size - block_size
+    contents = in_fh.read(size)
+    out_fh.write(contents)
+def index_copy(in_fh, out_fh, box, mode, mode_length, delta=0):
+    """Update and copy index table for stco/co64 files.
+    Args:
+      in_fh: file handle, source to read index table from.
+      out_fh: file handle, destination for index file.
+      box: box, stco/co64 box to copy.
+      mode: string, bit packing mode for index entries.
+      mode_length: int, number of bytes for index entires.
+      delta: int, offset change for index entries.
+    """
+    fh = in_fh
+    if not box.contents:
+        fh.seek(box.content_start())
+    else:
+        fh = io.BytesIO(box.contents)
+    header = struct.unpack(">I", fh.read(4))[0]
+    values = struct.unpack(">I", fh.read(4))[0]
+    new_contents = []
+    new_contents.append(struct.pack(">I", header))
+    new_contents.append(struct.pack(">I", values))
+    for i in range(values):
+        content = fh.read(mode_length)
+        content = struct.unpack(mode, content)[0] + delta
+        new_contents.append(struct.pack(mode, content))
+    out_fh.write(b"".join(new_contents))
+def stco_copy(in_fh, out_fh, box, delta=0):
+    """Copy for stco box.
+    Args:
+      in_fh: file handle, source to read index table from.
+      out_fh: file handle, destination for index file.
+      box: box, stco box to copy.
+      delta: int, offset change for index entries.
+    """
+    index_copy(in_fh, out_fh, box, ">I", 4, delta)
+def co64_copy(in_fh, out_fh, box, delta=0):
+    """Copy for co64 box.
+    Args:
+      in_fh: file handle, source to read index table from.
+      out_fh: file handle, destination for index file.
+      box: box, co64 box to copy.
+      delta: int, offset change for index entries.
+    """
+    index_copy(in_fh, out_fh, box, ">Q", 8, delta)

spatialmedia/mpeg/constants.py ADDED Viewed

	@@ -0,0 +1,86 @@

+#! /usr/bin/env python
+# -*- coding: utf-8 -*-
+# Copyright 2016 Google Inc. All rights reserved.
+#
+# Licensed under the Apache License, Version 2.0 (the "License");
+# you may not use this file except in compliance with the License.
+# You may obtain a copy of the License at
+#
+#     http://www.apache.org/licenses/LICENSE-2.0
+#
+# Unless required by applicable law or agreed to in writing, software
+# distributed under the License is distributed on an "AS IS" BASIS,
+# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+# See the License for the specific language governing permissions and
+# limitations under the License.
+"""MPEG-4 constants."""
+TRAK_TYPE_VIDE = b"vide"
+# Leaf types.
+TAG_STCO = b"stco"
+TAG_CO64 = b"co64"
+TAG_FREE = b"free"
+TAG_MDAT = b"mdat"
+TAG_XML = b"xml "
+TAG_HDLR = b"hdlr"
+TAG_FTYP = b"ftyp"
+TAG_ESDS = b"esds"
+TAG_SOUN = b"soun"
+TAG_SA3D = b"SA3D"
+# Container types.
+TAG_MOOV = b"moov"
+TAG_UDTA = b"udta"
+TAG_META = b"meta"
+TAG_TRAK = b"trak"
+TAG_MDIA = b"mdia"
+TAG_MINF = b"minf"
+TAG_STBL = b"stbl"
+TAG_STSD = b"stsd"
+TAG_UUID = b"uuid"
+TAG_WAVE = b"wave"
+# Sound sample descriptions.
+TAG_NONE = b"NONE"
+TAG_RAW_ = b"raw "
+TAG_TWOS = b"twos"
+TAG_SOWT = b"sowt"
+TAG_FL32 = b"fl32"
+TAG_FL64 = b"fl64"
+TAG_IN24 = b"in24"
+TAG_IN32 = b"in32"
+TAG_ULAW = b"ulaw"
+TAG_ALAW = b"alaw"
+TAG_LPCM = b"lpcm"
+TAG_MP4A = b"mp4a"
+TAG_OPUS = b"Opus"
+SOUND_SAMPLE_DESCRIPTIONS = frozenset([
+    TAG_NONE,
+    TAG_RAW_,
+    TAG_TWOS,
+    TAG_SOWT,
+    TAG_FL32,
+    TAG_FL64,
+    TAG_IN24,
+    TAG_IN32,
+    TAG_ULAW,
+    TAG_ALAW,
+    TAG_LPCM,
+    TAG_MP4A,
+    TAG_OPUS,
+    ])
+CONTAINERS_LIST = frozenset([
+    TAG_MDIA,
+    TAG_MINF,
+    TAG_MOOV,
+    TAG_STBL,
+    TAG_STSD,
+    TAG_TRAK,
+    TAG_UDTA,
+    TAG_WAVE,
+    ]).union(SOUND_SAMPLE_DESCRIPTIONS)

spatialmedia/mpeg/container.py ADDED Viewed

	@@ -0,0 +1,210 @@

+#! /usr/bin/env python
+# -*- coding: utf-8 -*-
+# Copyright 2016 Google Inc. All rights reserved.
+#
+# Licensed under the Apache License, Version 2.0 (the "License");
+# you may not use this file except in compliance with the License.
+# You may obtain a copy of the License at
+#
+#     http://www.apache.org/licenses/LICENSE-2.0
+#
+# Unless required by applicable law or agreed to in writing, software
+# distributed under the License is distributed on an "AS IS" BASIS,
+# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+# See the License for the specific language governing permissions and
+# limitations under the License.
+"""MPEG processing classes.
+Functions for loading MPEG files and manipulating boxes.
+"""
+import struct
+from spatialmedia.mpeg import box
+from spatialmedia.mpeg import constants
+from spatialmedia.mpeg import sa3d
+def load(fh, position, end):
+    if position is None:
+        position = fh.tell()
+    fh.seek(position)
+    header_size = 8
+    size = struct.unpack(">I", fh.read(4))[0]
+    name = fh.read(4)
+    is_box = name not in constants.CONTAINERS_LIST
+    # Handle the mp4a decompressor setting (wave -> mp4a).
+    if name == constants.TAG_MP4A and size == 12:
+        is_box = True
+    if is_box:
+        if name == constants.TAG_SA3D:
+            return sa3d.load(fh, position, end)
+        return box.load(fh, position, end)
+    if size == 1:
+        size = struct.unpack(">Q", fh.read(8))[0]
+        header_size = 16
+    if size < 8:
+        print("Error, invalid size", size, "in", name, "at", position)
+        return None
+    if (position + size) > end:
+        print("Error: Container box size exceeds bounds.")
+        return None
+    padding = 0
+    if name == constants.TAG_STSD:
+        padding = 8
+    if name in constants.SOUND_SAMPLE_DESCRIPTIONS:
+        current_pos = fh.tell()
+        fh.seek(current_pos + 8)
+        sample_description_version = struct.unpack(">h", fh.read(2))[0]
+        fh.seek(current_pos)
+        if sample_description_version == 0:
+            padding = 28
+        elif sample_description_version == 1:
+            padding = 28 + 16
+        elif sample_description_version == 2:
+            padding = 64
+        else:
+            print("Unsupported sample description version:",
+                  sample_description_version)
+    new_box = Container()
+    new_box.name = name
+    new_box.position = position
+    new_box.header_size = header_size
+    new_box.content_size = size - header_size
+    new_box.padding = padding
+    new_box.contents = load_multiple(
+        fh, position + header_size + padding, position + size)
+    if new_box.contents is None:
+        return None
+    return new_box
+def load_multiple(fh, position=None, end=None):
+    loaded = list()
+    while (position + 4 < end):
+        new_box = load(fh, position, end)
+        if new_box is None:
+            print("Error, failed to load box.")
+            return None
+        loaded.append(new_box)
+        position = new_box.position + new_box.size()
+    return loaded
+class Container(box.Box):
+    """MPEG4 container box contents / behaviour."""
+    def __init__(self, padding=0):
+        self.name = ""
+        self.position = 0
+        self.header_size = 0
+        self.content_size = 0
+        self.contents = list()
+        self.padding = padding
+    def resize(self):
+        """Recomputes the box size and recurses on contents."""
+        self.content_size = self.padding
+        for element in self.contents:
+            if isinstance(element, Container):
+                element.resize()
+            self.content_size += element.size()
+    def print_structure(self, indent=""):
+        """Prints the box structure and recurses on contents."""
+        size1 = self.header_size
+        size2 = self.content_size
+        print("{0} {1} [{2}, {3}]".format(indent, self.name, size1, size2))
+        size = len(self.contents)
+        for i in range(size):
+            next_indent = indent
+            next_indent = next_indent.replace("├", "│")
+            next_indent = next_indent.replace("└", " ")
+            next_indent = next_indent.replace("─", " ")
+            if i == (size - 1):
+                next_indent = next_indent + " └──"
+            else:
+                next_indent = next_indent + " ├──"
+            element = self.contents[i]
+            element.print_structure(next_indent)
+    def remove(self, tag):
+        """Removes a tag recursively from all containers."""
+        new_contents = []
+        self.content_size = 0
+        for element in self.contents:
+            if element.name != tag:
+                new_contents.append(element)
+                if isinstance(element, Container):
+                    element.remove(tag)
+                self.content_size += element.size()
+        self.contents = new_contents
+    def add(self, element):
+        """Adds an element, merging with containers of the same type.
+        Returns:
+          Int, increased size of container.
+        """
+        for content in self.contents:
+            if content.name == element.name:
+                if isinstance(content, container_leaf):
+                    return content.merge(element)
+                print("Error, cannot merge leafs.")
+                return False
+        self.contents.append(element)
+        return True
+    def merge(self, element):
+        """Merges structure with container.
+        Returns:
+          Int, increased size of container.
+        """
+        assert(self.name == element.name)
+        assert(isinstance(element, container_box))
+        for sub_element in element.contents:
+            if not self.add(sub_element):
+                return False
+        return True
+    def save(self, in_fh, out_fh, delta):
+        """Saves box to out_fh reading uncached content from in_fh.
+        Args:
+          in_fh: file handle, source of uncached file contents.
+          out_fh: file_hande, destination for saved file.
+          delta: int, file change size for updating stco and co64 files.
+        """
+        if self.header_size == 16:
+            out_fh.write(struct.pack(">I", 1))
+            out_fh.write(self.name)
+            out_fh.write(struct.pack(">Q", self.size()))
+        elif self.header_size == 8:
+            out_fh.write(struct.pack(">I", self.size()))
+            out_fh.write(self.name)
+        if self.padding > 0:
+            in_fh.seek(self.content_start())
+            box.tag_copy(in_fh, out_fh, self.padding)
+        for element in self.contents:
+            element.save(in_fh, out_fh, delta)

spatialmedia/mpeg/mpeg4_container.py ADDED Viewed

	@@ -0,0 +1,133 @@

+#! /usr/bin/env python
+# -*- coding: utf-8 -*-
+# Copyright 2016 Google Inc. All rights reserved.
+#
+# Licensed under the Apache License, Version 2.0 (the "License");
+# you may not use this file except in compliance with the License.
+# You may obtain a copy of the License at
+#
+#     http://www.apache.org/licenses/LICENSE-2.0
+#
+# Unless required by applicable law or agreed to in writing, software
+# distributed under the License is distributed on an "AS IS" BASIS,
+# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+# See the License for the specific language governing permissions and
+# limitations under the License.
+"""MPEG4 processing classes.
+Functions for loading MP4/MOV files and manipulating boxes.
+"""
+from spatialmedia.mpeg import box
+from spatialmedia.mpeg import constants
+from spatialmedia.mpeg import container
+def load(fh):
+    """Load the mpeg4 file structure of a file.
+    Args:
+      fh: file handle, input file handle.
+      position: int, current file position.
+      size: int, maximum size. This is used to ensure correct box sizes.
+    return:
+      mpeg4, the loaded mpeg4 structure.
+    """
+    fh.seek(0, 2)
+    size = fh.tell()
+    contents = container.load_multiple(fh, 0, size)
+    if not contents:
+        print("Error, failed to load .mp4 file.")
+        return None
+    elif len(contents) == 0:
+        print("Error, no boxes found.")
+        return None
+    loaded_mpeg4 = Mpeg4Container()
+    loaded_mpeg4.contents = contents
+    for element in loaded_mpeg4.contents:
+        if (element.name == constants.TAG_MOOV):
+            loaded_mpeg4.moov_box = element
+        if (element.name == constants.TAG_FREE):
+            loaded_mpeg4.free_box = element
+        if (element.name == constants.TAG_MDAT
+                and not loaded_mpeg4.first_mdat_box):
+            loaded_mpeg4.first_mdat_box = element
+        if (element.name == constants.TAG_FTYP):
+            loaded_mpeg4.ftyp_box = element
+    if not loaded_mpeg4.moov_box:
+        print("Error, file does not contain moov box.")
+        return None
+    if not loaded_mpeg4.first_mdat_box:
+        print("Error, file does not contain mdat box.")
+        return None
+    loaded_mpeg4.first_mdat_position = \
+        loaded_mpeg4.first_mdat_box.position
+    loaded_mpeg4.first_mdat_position += \
+        loaded_mpeg4.first_mdat_box.header_size
+    loaded_mpeg4.content_size = 0
+    for element in loaded_mpeg4.contents:
+        loaded_mpeg4.content_size += element.size()
+    return loaded_mpeg4
+class Mpeg4Container(container.Container):
+    """Specialized behaviour for the root mpeg4 container."""
+    def __init__(self):
+        self.contents = list()
+        self.content_size = 0
+        self.header_size = 0
+        self.moov_box = None
+        self.free_box = None
+        self.first_mdat_box = None
+        self.ftyp_box = None
+        self.first_mdat_position = None
+        self.padding = 0
+    def merge(self, element):
+        """Mpeg4 containers do not support merging."""
+        print("Cannot merge mpeg4 files")
+        exit(0)
+    def print_structure(self):
+        """Print mpeg4 file structure recursively."""
+        print("mpeg4 [{}]".format(self.content_size))
+        size = len(self.contents)
+        for i in range(size):
+            next_indent = " ├──"
+            if i == (size - 1):
+                next_indent = " └──"
+            self.contents[i].print_structure(next_indent)
+    def save(self, in_fh, out_fh):
+        """Save mpeg4 filecontent to file.
+        Args:
+          in_fh: file handle, source file handle for uncached contents.
+          out_fh: file handle, destination file hand for saved file.
+        """
+        self.resize()
+        new_position = 0
+        for element in self.contents:
+            if element.name == constants.TAG_MDAT:
+                new_position += element.header_size
+                break
+            new_position += element.size()
+        delta = new_position - self.first_mdat_position
+        for element in self.contents:
+            element.save(in_fh, out_fh, delta)

spatialmedia/mpeg/sa3d.py ADDED Viewed

	@@ -0,0 +1,176 @@

+#! /usr/bin/env python
+# -*- coding: utf-8 -*-
+# Copyright 2016 Google Inc. All rights reserved.
+#
+# Licensed under the Apache License, Version 2.0 (the "License");
+# you may not use this file except in compliance with the License.
+# You may obtain a copy of the License at
+#
+#     http://www.apache.org/licenses/LICENSE-2.0
+#
+# Unless required by applicable law or agreed to in writing, software
+# distributed under the License is distributed on an "AS IS" BASIS,
+# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+# See the License for the specific language governing permissions and
+# limitations under the License.
+"""MPEG SA3D box processing classes.
+Enables the injection of an SA3D MPEG-4. The SA3D box specification
+conforms to that outlined in docs/spatial-audio-rfc.md
+"""
+import struct
+from spatialmedia.mpeg import box
+from spatialmedia.mpeg import constants
+def load(fh, position=None, end=None):
+    """ Loads the SA3D box located at position in an mp4 file.
+    Args:
+      fh: file handle, input file handle.
+      position: int or None, current file position.
+    Returns:
+      new_box: box, SA3D box loaded from the file location or None.
+    """
+    if position is None:
+        position = fh.tell()
+    fh.seek(position)
+    new_box = SA3DBox()
+    new_box.position = position
+    size = struct.unpack(">I", fh.read(4))[0]
+    name = fh.read(4)
+    if (name != constants.TAG_SA3D):
+        print("Error: box is not an SA3D box.")
+        return None
+    if (position + size > end):
+        print("Error: SA3D box size exceeds bounds.")
+        return None
+    new_box.content_size = size - new_box.header_size
+    new_box.version = struct.unpack(">B", fh.read(1))[0]
+    new_box.ambisonic_type = struct.unpack(">B", fh.read(1))[0]
+    new_box.head_locked_stereo = (new_box.ambisonic_type & int('10000000', 2) != 0)
+    new_box.ambisonic_type = new_box.ambisonic_type & int('01111111', 2)
+    new_box.ambisonic_order = struct.unpack(">I", fh.read(4))[0]
+    new_box.ambisonic_channel_ordering = struct.unpack(">B", fh.read(1))[0]
+    new_box.ambisonic_normalization = struct.unpack(">B", fh.read(1))[0]
+    new_box.num_channels = struct.unpack(">I", fh.read(4))[0]
+    for i in range(0, new_box.num_channels):
+        new_box.channel_map.append(
+            struct.unpack(">I", fh.read(4))[0])
+    return new_box
+class SA3DBox(box.Box):
+    ambisonic_types = {'periphonic': 0}
+    ambisonic_orderings = {'ACN': 0}
+    ambisonic_normalizations = {'SN3D': 0}
+    def __init__(self):
+        box.Box.__init__(self)
+        self.name = constants.TAG_SA3D
+        self.header_size = 8
+        self.version = 0
+        self.ambisonic_type = 0
+        self.head_locked_stereo = False
+        self.ambisonic_order = 0
+        self.ambisonic_channel_ordering = 0
+        self.ambisonic_normalization = 0
+        self.num_channels = 0
+        self.channel_map = list()
+    @staticmethod
+    def create(num_channels, audio_metadata):
+        new_box = SA3DBox()
+        new_box.header_size = 8
+        new_box.name = constants.TAG_SA3D
+        new_box.version = 0                     # uint8
+        new_box.content_size += 1               # uint8
+        new_box.ambisonic_type = SA3DBox.ambisonic_types[
+            audio_metadata["ambisonic_type"]]
+        new_box.head_locked_stereo = audio_metadata["head_locked_stereo"]
+        new_box.content_size += 1               # uint8
+        new_box.ambisonic_order = audio_metadata["ambisonic_order"]
+        new_box.content_size += 4               # uint32
+        new_box.ambisonic_channel_ordering = SA3DBox.ambisonic_orderings[
+            audio_metadata["ambisonic_channel_ordering"]]
+        new_box.content_size += 1               # uint8
+        new_box.ambisonic_normalization = SA3DBox.ambisonic_normalizations[
+            audio_metadata["ambisonic_normalization"]]
+        new_box.content_size += 1               # uint8
+        new_box.num_channels = num_channels
+        new_box.content_size += 4               # uint32
+        channel_map = audio_metadata["channel_map"]
+        for channel_element in channel_map:
+            new_box.channel_map.append(channel_element)
+            new_box.content_size += 4  # uint32
+        return new_box
+    def ambisonic_type_name(self):
+        return  next((key for key,value in SA3DBox.ambisonic_types.items()
+                 if value==self.ambisonic_type))
+    def ambisonic_channel_ordering_name(self):
+        return next((key for key,value in SA3DBox.ambisonic_orderings.items()
+                if value==self.ambisonic_channel_ordering))
+    def ambisonic_normalization_name(self):
+        return next((key for key,value in SA3DBox.ambisonic_normalizations.items()
+                if value==self.ambisonic_normalization))
+    def print_box(self, console):
+        """ Prints the contents of this spatial audio (SA3D) box to the
+            console.
+        """
+        ambisonic_type = self.ambisonic_type_name()
+        channel_ordering = self.ambisonic_channel_ordering_name()
+        ambisonic_normalization = self.ambisonic_normalization_name()
+        console("\t\tAmbisonic Type: %s" % ambisonic_type)
+        console("\t\tContains Head-Locked Stereo: %r" % self.head_locked_stereo)
+        console("\t\tAmbisonic Order: %d" % self.ambisonic_order)
+        console("\t\tAmbisonic Channel Ordering: %s" % channel_ordering)
+        console("\t\tAmbisonic Normalization: %s" % ambisonic_normalization)
+        console("\t\tNumber of Channels: %d" % self.num_channels)
+        console("\t\tChannel Map: %s" % str(self.channel_map))
+    def get_metadata_string(self):
+        """ Outputs a concise single line audio metadata string. """
+        metadata = "%s, %s, %s, Order %d, %d Channel(s), Channel Map: %s" \
+            % (self.ambisonic_normalization_name(),\
+               self.ambisonic_channel_ordering_name(),\
+               self.ambisonic_type_name(),\
+               self.ambisonic_order,\
+               self.num_channels,\
+               str(self.channel_map))
+        return metadata
+    def save(self, in_fh, out_fh, delta):
+        if (self.header_size == 16):
+            out_fh.write(struct.pack(">I", 1))
+            out_fh.write(struct.pack(">Q", self.size()))
+            out_fh.write(self.name)
+        elif(self.header_size == 8):
+            out_fh.write(struct.pack(">I", self.size()))
+            out_fh.write(self.name)
+        ambisonic_type = (
+            self.ambisonic_type | int('10000000', 2) if
+            self.head_locked_stereo else self.ambisonic_type & int('01111111', 2))
+        out_fh.write(struct.pack(">B", self.version))
+        out_fh.write(struct.pack(">B", ambisonic_type))
+        out_fh.write(struct.pack(">I", self.ambisonic_order))
+        out_fh.write(struct.pack(">B", self.ambisonic_channel_ordering))
+        out_fh.write(struct.pack(">B", self.ambisonic_normalization))
+        out_fh.write(struct.pack(">I", self.num_channels))
+        for i in self.channel_map:
+            if (i != None):
+                out_fh.write(struct.pack(">I", int(i)))