|
from preprocessed_dialogs import dialog_data |
|
|
|
|
|
top_count = 0 |
|
second_count = 0 |
|
top_list = 0 |
|
average_list = [] |
|
list_average = [] |
|
for keys, values in dialog_data.items(): |
|
list_length = len(values) |
|
list_average.append(list_length) |
|
list_1 = [item[0] for item in values] |
|
list_2 = [item[1] for item in values] |
|
for items in list_1: |
|
if items and items != "": |
|
if isinstance(list_1, list): |
|
data = len(items.split(' ')) + 1 |
|
average_list.append(data) |
|
|
|
|
|
for items in list_2: |
|
if items and items != "": |
|
if isinstance(list_2, list): |
|
data = len(items.split(' ')) + 1 |
|
average_list.append(data) |
|
|
|
|
|
list_2 = [item[1] for item in values] |
|
|
|
|
|
|
|
if list_length > second_count and list_length < top_count: |
|
second_count = list_length |
|
|
|
if list_length > top_count: |
|
top_count = list_length |
|
|
|
if len(list_1) > top_list: |
|
top_list = len(list_1) |
|
|
|
|
|
|
|
item_total = 0 |
|
for items in average_list: |
|
item_total += items |
|
|
|
average_total = item_total / len(average_list) |
|
|
|
|
|
item_total = 0 |
|
for items in list_average: |
|
item_total += items |
|
|
|
average_total = item_total / len(list_average) |
|
|
|
|
|
print(f"Average Length of Pairs: {average_total}") |
|
print(f"Conversation 0: {top_count} \nEmbedding Dimension Min: {second_count}") |
|
print(f"Max Sequence Length: {top_list}") |
|
print(f"Max Word in Sentence Count: {max(average_list)}") |
|
|