ArissBandoss commited on
Commit
314a751
·
verified ·
1 Parent(s): 4baecca

Update goai_helpers/utils.py

Browse files
Files changed (1) hide show
  1. goai_helpers/utils.py +115 -0
goai_helpers/utils.py CHANGED
@@ -72,3 +72,118 @@ def diviser_phrases_moore(texte: str) -> list:
72
 
73
  return phrases
74
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
72
 
73
  return phrases
74
 
75
+
76
+ class MooreConverter:
77
+ """
78
+ Convert Arabic numerals into Mooré textual representation.
79
+ """
80
+
81
+ def __init__(self):
82
+ self.units = ["", "yembo", "yiibu", "tãabo", "naase", "nu", "yoobe", "yopoe", "nii", "wae"]
83
+ self.tens_base = ["", "piiga", "pisi", "pis-tã", "pis-naase", "pis-nu", "pis-yoobe", "pis-yopoe", "pis-nii", "pis-wae"]
84
+ self.hundreds = ["", "koabga"]
85
+ self.hundreds_prefix = "kobs-"
86
+ self.thousands = ["", "tusri"]
87
+ # thousands_prefix not used in this implementation; recursion is clearer
88
+
89
+ def number_to_moore(self, n: int) -> str:
90
+ if n == 0:
91
+ return "" # keep original behavior, but expand_number will fallback to the digits for zero
92
+
93
+ if n < 10:
94
+ return self.units[n]
95
+
96
+ if 11 <= n <= 19:
97
+ unit_part = self.units[n - 10]
98
+ if unit_part == "yembo":
99
+ unit_part = "ye"
100
+ elif unit_part == "yiibu":
101
+ unit_part = "yi"
102
+ elif unit_part == "tãabo":
103
+ unit_part = "tã"
104
+ return "piig la a " + unit_part
105
+
106
+ if n == 10:
107
+ return self.tens_base[1]
108
+
109
+ if n < 100:
110
+ tens_part = self.tens_base[n // 10]
111
+ units_part = n % 10
112
+ if units_part > 0:
113
+ unit_text = self.units[units_part]
114
+ if unit_text == "yembo":
115
+ unit_text = "ye"
116
+ elif unit_text == "yiibu":
117
+ unit_text = "yi"
118
+ elif unit_text == "tãabo":
119
+ unit_text = "tã"
120
+ return tens_part + " la a " + unit_text
121
+ else:
122
+ return tens_part
123
+
124
+ if n < 1000:
125
+ hundreds_count = n // 100
126
+ remainder = n % 100
127
+
128
+ if hundreds_count == 1:
129
+ result = self.hundreds[1]
130
+ else:
131
+ # hundreds_count is 2..9 for this branch, safe to index units
132
+ unit_name = self.units[hundreds_count]
133
+ if unit_name == "yembo":
134
+ unit_name = "ye"
135
+ elif unit_name == "yiibu":
136
+ unit_name = "yiibu"
137
+ elif unit_name == "tãabo":
138
+ unit_name = "tã"
139
+ result = self.hundreds_prefix + unit_name
140
+
141
+ if remainder > 0:
142
+ result += " la " + self.number_to_moore(remainder)
143
+ return result
144
+
145
+ if n < 1_000_000:
146
+ thousands_count = n // 1000
147
+ remainder = n % 1000
148
+
149
+ if thousands_count == 1:
150
+ result = self.thousands[1]
151
+ else:
152
+ # Use recursion here: for any thousands_count (can be >= 10),
153
+ # express the thousands_count in Mooré then append "tusri"
154
+ result = self.number_to_moore(thousands_count) + " " + self.thousands[1]
155
+
156
+ if remainder > 0:
157
+ result += " la " + self.number_to_moore(remainder)
158
+ return result
159
+
160
+ # millions and above
161
+ millions_count = n // 1_000_000
162
+ remainder = n % 1_000_000
163
+ result = self.number_to_moore(millions_count) + " milyɔɔng"
164
+ if remainder > 0:
165
+ result += " " + self.number_to_moore(remainder)
166
+ return result
167
+
168
+ def expand_number(self, text: str) -> str:
169
+ """
170
+ Replace plain numbers (e.g. '123') with Mooré words. If conversion returns
171
+ empty (e.g. 0) we fall back to keeping the original digits.
172
+ """
173
+ if not isinstance(text, str):
174
+ return text
175
+
176
+ number_pattern = re.compile(r'\b\d+\b')
177
+
178
+ def replace_number_with_text(match):
179
+ s = match.group()
180
+ try:
181
+ number = int(s)
182
+ moore = self.number_to_moore(number)
183
+ # if converter returns empty string (e.g. for 0), keep original digits
184
+ return moore if moore else s
185
+ except Exception:
186
+ # on any unexpected error, keep the digits
187
+ return s
188
+
189
+ return number_pattern.sub(replace_number_with_text, text)