File size: 1,675 Bytes
4ec6a0f
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
from preprocessed_dialogs import dialog_data


top_count = 0
second_count = 0
top_list = 0
average_list = []
list_average = []
for keys, values in dialog_data.items():
	list_length = len(values)	# Length of Pairs
	list_average.append(list_length)
	list_1 = [item[0] for item in values]
	list_2 = [item[1] for item in values]
	for items in list_1:
		if items and items != "":
			if isinstance(list_1, list):
				data = len(items.split(' ')) + 1
				average_list.append(data)
				# print(f"Length of words: {data}")

	for items in list_2:
		if items and items != "":
			if isinstance(list_2, list):
				data = len(items.split(' ')) + 1
				average_list.append(data)
				# print(f"Length of words: {data}")

	list_2 = [item[1] for item in values]
	# print(f"List 1:  {list(list_1)}")
	# print(f"List 2:  {list(list_2)}")

	if list_length > second_count and list_length < top_count:
		second_count = list_length

	if list_length > top_count:
		top_count = list_length

	if len(list_1) > top_list:
		top_list = len(list_1)

	# print(f"Conversation ID:  {keys}  \nLength of Pairs:  {list_length}")

	item_total = 0
	for items in average_list:
		item_total += items

	average_total = item_total / len(average_list)
	# print(f"Average Length of Sentence:  {average_total}")

	item_total = 0
	for items in list_average:
		item_total += items

	average_total = item_total / len(list_average)
	# print(f"Average Length of Pairs:  {average_total}")

print(f"Average Length of Pairs:  {average_total}")
print(f"Conversation 0:  {top_count}  \nEmbedding Dimension Min:  {second_count}")
print(f"Max Sequence Length:  {top_list}")
print(f"Max Word in Sentence Count:  {max(average_list)}")