Update goai_helpers/utils.py
Browse files- goai_helpers/utils.py +115 -0
goai_helpers/utils.py
CHANGED
@@ -72,3 +72,118 @@ def diviser_phrases_moore(texte: str) -> list:
|
|
72 |
|
73 |
return phrases
|
74 |
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
72 |
|
73 |
return phrases
|
74 |
|
75 |
+
|
76 |
+
class MooreConverter:
|
77 |
+
"""
|
78 |
+
Convert Arabic numerals into Mooré textual representation.
|
79 |
+
"""
|
80 |
+
|
81 |
+
def __init__(self):
|
82 |
+
self.units = ["", "yembo", "yiibu", "tãabo", "naase", "nu", "yoobe", "yopoe", "nii", "wae"]
|
83 |
+
self.tens_base = ["", "piiga", "pisi", "pis-tã", "pis-naase", "pis-nu", "pis-yoobe", "pis-yopoe", "pis-nii", "pis-wae"]
|
84 |
+
self.hundreds = ["", "koabga"]
|
85 |
+
self.hundreds_prefix = "kobs-"
|
86 |
+
self.thousands = ["", "tusri"]
|
87 |
+
# thousands_prefix not used in this implementation; recursion is clearer
|
88 |
+
|
89 |
+
def number_to_moore(self, n: int) -> str:
|
90 |
+
if n == 0:
|
91 |
+
return "" # keep original behavior, but expand_number will fallback to the digits for zero
|
92 |
+
|
93 |
+
if n < 10:
|
94 |
+
return self.units[n]
|
95 |
+
|
96 |
+
if 11 <= n <= 19:
|
97 |
+
unit_part = self.units[n - 10]
|
98 |
+
if unit_part == "yembo":
|
99 |
+
unit_part = "ye"
|
100 |
+
elif unit_part == "yiibu":
|
101 |
+
unit_part = "yi"
|
102 |
+
elif unit_part == "tãabo":
|
103 |
+
unit_part = "tã"
|
104 |
+
return "piig la a " + unit_part
|
105 |
+
|
106 |
+
if n == 10:
|
107 |
+
return self.tens_base[1]
|
108 |
+
|
109 |
+
if n < 100:
|
110 |
+
tens_part = self.tens_base[n // 10]
|
111 |
+
units_part = n % 10
|
112 |
+
if units_part > 0:
|
113 |
+
unit_text = self.units[units_part]
|
114 |
+
if unit_text == "yembo":
|
115 |
+
unit_text = "ye"
|
116 |
+
elif unit_text == "yiibu":
|
117 |
+
unit_text = "yi"
|
118 |
+
elif unit_text == "tãabo":
|
119 |
+
unit_text = "tã"
|
120 |
+
return tens_part + " la a " + unit_text
|
121 |
+
else:
|
122 |
+
return tens_part
|
123 |
+
|
124 |
+
if n < 1000:
|
125 |
+
hundreds_count = n // 100
|
126 |
+
remainder = n % 100
|
127 |
+
|
128 |
+
if hundreds_count == 1:
|
129 |
+
result = self.hundreds[1]
|
130 |
+
else:
|
131 |
+
# hundreds_count is 2..9 for this branch, safe to index units
|
132 |
+
unit_name = self.units[hundreds_count]
|
133 |
+
if unit_name == "yembo":
|
134 |
+
unit_name = "ye"
|
135 |
+
elif unit_name == "yiibu":
|
136 |
+
unit_name = "yiibu"
|
137 |
+
elif unit_name == "tãabo":
|
138 |
+
unit_name = "tã"
|
139 |
+
result = self.hundreds_prefix + unit_name
|
140 |
+
|
141 |
+
if remainder > 0:
|
142 |
+
result += " la " + self.number_to_moore(remainder)
|
143 |
+
return result
|
144 |
+
|
145 |
+
if n < 1_000_000:
|
146 |
+
thousands_count = n // 1000
|
147 |
+
remainder = n % 1000
|
148 |
+
|
149 |
+
if thousands_count == 1:
|
150 |
+
result = self.thousands[1]
|
151 |
+
else:
|
152 |
+
# Use recursion here: for any thousands_count (can be >= 10),
|
153 |
+
# express the thousands_count in Mooré then append "tusri"
|
154 |
+
result = self.number_to_moore(thousands_count) + " " + self.thousands[1]
|
155 |
+
|
156 |
+
if remainder > 0:
|
157 |
+
result += " la " + self.number_to_moore(remainder)
|
158 |
+
return result
|
159 |
+
|
160 |
+
# millions and above
|
161 |
+
millions_count = n // 1_000_000
|
162 |
+
remainder = n % 1_000_000
|
163 |
+
result = self.number_to_moore(millions_count) + " milyɔɔng"
|
164 |
+
if remainder > 0:
|
165 |
+
result += " " + self.number_to_moore(remainder)
|
166 |
+
return result
|
167 |
+
|
168 |
+
def expand_number(self, text: str) -> str:
|
169 |
+
"""
|
170 |
+
Replace plain numbers (e.g. '123') with Mooré words. If conversion returns
|
171 |
+
empty (e.g. 0) we fall back to keeping the original digits.
|
172 |
+
"""
|
173 |
+
if not isinstance(text, str):
|
174 |
+
return text
|
175 |
+
|
176 |
+
number_pattern = re.compile(r'\b\d+\b')
|
177 |
+
|
178 |
+
def replace_number_with_text(match):
|
179 |
+
s = match.group()
|
180 |
+
try:
|
181 |
+
number = int(s)
|
182 |
+
moore = self.number_to_moore(number)
|
183 |
+
# if converter returns empty string (e.g. for 0), keep original digits
|
184 |
+
return moore if moore else s
|
185 |
+
except Exception:
|
186 |
+
# on any unexpected error, keep the digits
|
187 |
+
return s
|
188 |
+
|
189 |
+
return number_pattern.sub(replace_number_with_text, text)
|