Hyacinthax commited on
Commit
4ec6a0f
·
verified ·
1 Parent(s): 0bdb125

Upload processed_dlogs_length.py

Browse files

This file is an example of how I got my hyperparameters

Files changed (1) hide show
  1. processed_dlogs_length.py +60 -0
processed_dlogs_length.py ADDED
@@ -0,0 +1,60 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ from preprocessed_dialogs import dialog_data
2
+
3
+
4
+ top_count = 0
5
+ second_count = 0
6
+ top_list = 0
7
+ average_list = []
8
+ list_average = []
9
+ for keys, values in dialog_data.items():
10
+ list_length = len(values) # Length of Pairs
11
+ list_average.append(list_length)
12
+ list_1 = [item[0] for item in values]
13
+ list_2 = [item[1] for item in values]
14
+ for items in list_1:
15
+ if items and items != "":
16
+ if isinstance(list_1, list):
17
+ data = len(items.split(' ')) + 1
18
+ average_list.append(data)
19
+ # print(f"Length of words: {data}")
20
+
21
+ for items in list_2:
22
+ if items and items != "":
23
+ if isinstance(list_2, list):
24
+ data = len(items.split(' ')) + 1
25
+ average_list.append(data)
26
+ # print(f"Length of words: {data}")
27
+
28
+ list_2 = [item[1] for item in values]
29
+ # print(f"List 1: {list(list_1)}")
30
+ # print(f"List 2: {list(list_2)}")
31
+
32
+ if list_length > second_count and list_length < top_count:
33
+ second_count = list_length
34
+
35
+ if list_length > top_count:
36
+ top_count = list_length
37
+
38
+ if len(list_1) > top_list:
39
+ top_list = len(list_1)
40
+
41
+ # print(f"Conversation ID: {keys} \nLength of Pairs: {list_length}")
42
+
43
+ item_total = 0
44
+ for items in average_list:
45
+ item_total += items
46
+
47
+ average_total = item_total / len(average_list)
48
+ # print(f"Average Length of Sentence: {average_total}")
49
+
50
+ item_total = 0
51
+ for items in list_average:
52
+ item_total += items
53
+
54
+ average_total = item_total / len(list_average)
55
+ # print(f"Average Length of Pairs: {average_total}")
56
+
57
+ print(f"Average Length of Pairs: {average_total}")
58
+ print(f"Conversation 0: {top_count} \nEmbedding Dimension Min: {second_count}")
59
+ print(f"Max Sequence Length: {top_list}")
60
+ print(f"Max Word in Sentence Count: {max(average_list)}")