File size: 2,399 Bytes
f7aaeb6 |
1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20 21 22 23 24 25 26 27 28 29 30 31 32 33 34 35 36 37 38 39 40 41 42 43 44 45 46 47 48 49 50 51 52 53 54 55 56 57 58 59 60 61 62 63 64 65 66 67 68 69 70 71 72 73 74 75 76 77 78 79 80 81 82 83 84 85 86 87 88 89 90 91 92 93 |
"""
formats output text file from llampia
into markdown tables
"""
import os
def format_edu(turn_str):
no = turn_str.split('<')[0].strip()
text = turn_str.split('>')[1].strip()
speaker = 'BUIL'
if 'Arch' in turn_str.split('>')[0]:
speaker = 'ARCH'
edu_str = ' ' + '**' + no + ' ' + speaker + '**' + ' ' + text
return edu_str
current_folder=os.getcwd()
output_path = '/path/to/test-output-file.txt'
table_path = current_folder + '/test_predictions.md'
with open(output_path, 'r') as txt:
text = txt.read().split('\n')
group = 0
pairs = []
pair = []
for t in text:
if t.startswith('New Turn'):
group = 1
pair.append(t)
elif t.startswith(' ### DS:'):
pair.append(t)
pairs.append(pair)
pair = []
group = 0
elif group == 1:
pair.append(t)
tables = []
rows = []
dial_str = ''
struct_str = ''
for pair in pairs:
#print(pair)
if pair[0].startswith('New Turn: 1 <'):
if len(rows) > 0:
tables.append(rows)
rows = []
dial_str = '**0 BUIL** Mission has started.'
dial_str += format_edu(pair[0].lstrip('New Turn: '))
for p in pair[1:]:
if p.startswith(' ###'):
struct_str = p.lstrip(' ### DS:')
rows.append([dial_str, struct_str])
# dial_str = ''
# struct_str = ''
else:
dial_str += format_edu(p)
elif pair[0].startswith('New Turn: '):
dial_str = format_edu(pair[0].lstrip('New Turn: '))
#print(dial_str)
for p in pair[1:]:
if p.startswith(' ###'):
struct_str = p.lstrip(' ### DS:')
# print(struct_str)
# print(dial_str)
rows.append([dial_str, struct_str])
# dial_str = ''
# struct_str = ''
else:
dial_str += format_edu(p)
all_md_tables = []
for table in tables:
table_rows = ['| Dialogue | Structure |', '| ----- | ----- |']
for tr in table:
st = '| ' + tr[0] + ' | ' + tr[1] + ' |'
table_rows.append(st)
all_md_tables.extend(table_rows)
all_md_tables.extend(' ')
md_tables_string = '\n'.join(all_md_tables)
f = open(table_path, 'w')
for r in all_md_tables:
print(r)
print(r, file=f)
print("markdown printed")
|