File size: 4,153 Bytes
d0681c4
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
{%+ macro render_line(line) +%}
            <TextLine id="{{ line.id }}" {% if line.tags and "type" in line.tags %}custom="structure {type:{{ line.tags["type"] }};}"{% endif %}>
                {% if line.boundary %}
                <Coords points="{% for point in line.boundary %}{{ point|join(',') }}{% if not loop.last %} {% endif %}{% endfor %}"/>
                {% endif %}
                {% if line.baseline %}
                <Baseline points="{% for point in line.baseline %}{{ point|join(',') }}{% if not loop.last %} {% endif %}{% endfor %}"/>
                {% endif %}
                {% if line.text is string %}
                    <TextEquiv{% if line.confidences|length %} conf="{{ (line.confidences|sum / line.confidences|length)|round(4) }}"{% endif %}><Unicode>{{ line.text|e }}</Unicode></TextEquiv>
                {% else %}
                {% for segment in line.recognition %}
                <Word id="segment_{{ segment.index }}">
                    {% if segment.boundary %}
                    <Coords points="{% for point in segment.boundary %}{{ point|join(',') }}{% if not loop.last %} {% endif %}{% endfor %}"/>
                    {% else %}
                    <Coords points="{{ segment.bbox[0] }},{{ segment.bbox[1] }} {{ segment.bbox[0] }},{{ segment.bbox[3] }} {{ segment.bbox[2] }},{{ segment.bbox[3] }} {{ segment.bbox[2] }},{{ segment.bbox[1] }}"/>
                    {% endif %}
                {% for char in segment.recognition %}
                    <Glyph id="char_{{ char.index }}">
                        <Coords points="{% for point in char.boundary %}{{ point|join(',') }}{% if not loop.last %} {% endif %}{% endfor %}"/>
                        <TextEquiv conf="{{ char.confidence|round(4) }}"><Unicode>{{ char.text|e }}</Unicode></TextEquiv>
                    </Glyph>
                {% endfor %}
                    <TextEquiv conf="{{ (segment.confidences|sum / segment.confidences|length)|round(4) }}"><Unicode>{{ segment.text|e }}</Unicode></TextEquiv>
                </Word>
                {% endfor %}
                {%+ if line.confidences|length %}<TextEquiv conf="{{ (line.confidences|sum / line.confidences|length)|round(4) }}"><Unicode>{% for segment in line.recognition %}{{ segment.text|e }}{% endfor %}</Unicode></TextEquiv>{% endif +%}
                {% endif %}
            </TextLine>
{%+ endmacro %}
<?xml version="1.0" encoding="UTF-8"?>
<PcGts xmlns="http://schema.primaresearch.org/PAGE/gts/pagecontent/2019-07-15" xmlns:xsi="http://www.w3.org/2001/XMLSchema-instance" xsi:schemaLocation="http://schema.primaresearch.org/PAGE/gts/pagecontent/2019-07-15 http://schema.primaresearch.org/PAGE/gts/pagecontent/2019-07-15/pagecontent.xsd">
    <Metadata>
        <Creator>kraken {{ metadata.version }}</Creator>
        <Created>{{ page.date }}</Created>
        <LastChange>{{ page.date }}</LastChange>
    </Metadata>
    <Page imageFilename="{{ page.name }}" imageWidth="{{ page.size[0] }}" imageHeight="{{ page.size[1] }}" {% if page.base_dir %}readingDirection="{{ page.base_dir }}"{% endif %}>
    {% for entity in page.entities %}
        {% if entity.type == "region" %}
        {% if loop.previtem and loop.previtem.type == 'line' %}
        </TextRegion>
        {% endif %}
        <TextRegion id="{{ entity.id }}" {% if entity.tags and "type" in entity.tags %}custom="structure {type:{{ entity.tags["type"] }};}"{% endif %}>
            {% if entity.boundary %}<Coords points="{% for point in entity.boundary %}{{ point|join(',') }}{% if not loop.last %} {% endif %}{% endfor %}"/>{% endif %}
            {%- for line in entity.lines -%}
            {{ render_line(line) }}
            {%- endfor %}
        </TextRegion>
        {% else %}
        {% if not loop.previtem or loop.previtem.type != 'line' %}
        <TextRegion id="textblock_{{ loop.index }}">
            <Coords points="0,0 0,{{ page.size[1] }} {{ page.size[0] }},{{ page.size[1] }} {{ page.size[0] }},0"/>
        {% endif %}
            {{ render_line(entity) }}
        {% if loop.last %}
        </TextRegion>
        {% endif %}
        {% endif %}
    {% endfor %}
    </Page>
</PcGts>