Generative-Sequence-Chatbot / processed_dlogs_length.py
Hyacinthax's picture
Upload processed_dlogs_length.py
4ec6a0f verified
from preprocessed_dialogs import dialog_data
top_count = 0
second_count = 0
top_list = 0
average_list = []
list_average = []
for keys, values in dialog_data.items():
list_length = len(values) # Length of Pairs
list_average.append(list_length)
list_1 = [item[0] for item in values]
list_2 = [item[1] for item in values]
for items in list_1:
if items and items != "":
if isinstance(list_1, list):
data = len(items.split(' ')) + 1
average_list.append(data)
# print(f"Length of words: {data}")
for items in list_2:
if items and items != "":
if isinstance(list_2, list):
data = len(items.split(' ')) + 1
average_list.append(data)
# print(f"Length of words: {data}")
list_2 = [item[1] for item in values]
# print(f"List 1: {list(list_1)}")
# print(f"List 2: {list(list_2)}")
if list_length > second_count and list_length < top_count:
second_count = list_length
if list_length > top_count:
top_count = list_length
if len(list_1) > top_list:
top_list = len(list_1)
# print(f"Conversation ID: {keys} \nLength of Pairs: {list_length}")
item_total = 0
for items in average_list:
item_total += items
average_total = item_total / len(average_list)
# print(f"Average Length of Sentence: {average_total}")
item_total = 0
for items in list_average:
item_total += items
average_total = item_total / len(list_average)
# print(f"Average Length of Pairs: {average_total}")
print(f"Average Length of Pairs: {average_total}")
print(f"Conversation 0: {top_count} \nEmbedding Dimension Min: {second_count}")
print(f"Max Sequence Length: {top_list}")
print(f"Max Word in Sentence Count: {max(average_list)}")