Spaces:
Running
on
Zero
Running
on
Zero
import os | |
import re | |
import time | |
import torch | |
import spaces | |
import requests | |
import tempfile | |
import concurrent | |
import numpy as np | |
from tqdm import tqdm | |
from huggingface_hub import hf_hub_download, hf_hub_url, login | |
from TTS.tts.layers.xtts.tokenizer import VoiceBpeTokenizer | |
from TTS.tts.configs.xtts_config import XttsConfig | |
from TTS.tts.models.xtts import Xtts | |
def download_file(url: str, destination: str, token: str = None): | |
""" | |
Télécharge un fichier à partir d'une URL avec une barre de progression. Prend en charge les tokens API Hugging Face pour les modèles protégés. | |
:param url: L'URL à partir de laquelle télécharger. | |
:param destination: Le chemin de destination pour enregistrer le fichier téléchargé. | |
:param token: Le jeton API Hugging Face (optionnel). Si non fourni, la variable d'environnement HF_API_TOKEN sera utilisée. | |
""" | |
# utiliser le jeton passé ou récupérer depuis la variable d'environnement | |
if token is None: | |
token = os.getenv("HF_SPACE_TOKEN") | |
# en-têtes pour la requête | |
headers = {} | |
if token: | |
headers['Authorization'] = f'Bearer {token}' | |
# requête GET en streaming avec en-têtes | |
response = requests.get(url, stream=True, headers=headers) | |
# taille totale en octets, définie à zéro si manquante | |
total_size = int(response.headers.get('content-length', 0)) | |
# afficher la progression | |
with open(destination, 'wb') as file, tqdm(desc=destination, total=total_size, unit='B', unit_scale=True, | |
unit_divisor=1024) as bar: | |
for data in response.iter_content(chunk_size=1024): | |
size = file.write(data) | |
bar.update(size) | |
def diviser_phrases_moore(texte: str) -> list: | |
""" | |
Divise un texte en phrases en fonction des signes de ponctuation de fin de phrase. | |
Cette fonction prend un texte en entrée et le divise en phrases en se basant sur les | |
signes de ponctuation (tels que le point (.) ...). | |
Elle nettoie également les espaces superflus et filtre les chaînes vides. | |
Args: | |
texte (str): Le texte à diviser en phrases. | |
Returns: | |
list: Une liste de phrases nettoyées et divisées à partir du texte. | |
""" | |
# définir les motifs de ponctuation de fin de phrase | |
fin_de_phrase = re.compile(r'(?<=[.!?])\s+') | |
# diviser le texte en phrases | |
phrases = fin_de_phrase.split(texte) | |
# nettoyer les espaces superflus et filtrer les chaînes vides | |
phrases = [phrase.strip() for phrase in phrases if phrase.strip()] | |
return phrases | |
class MooreConverter: | |
""" | |
Convert Arabic numerals into Mooré textual representation. | |
""" | |
def __init__(self): | |
self.units = ["", "yembo", "yiibu", "tãabo", "naase", "nu", "yoobe", "yopoe", "nii", "wae"] | |
self.tens_base = ["", "piiga", "pisi", "pis-tã", "pis-naase", "pis-nu", "pis-yoobe", "pis-yopoe", "pis-nii", "pis-wae"] | |
self.hundreds = ["", "koabga"] | |
self.hundreds_prefix = "kobs-" | |
self.thousands = ["", "tusri"] | |
# thousands_prefix not used in this implementation; recursion is clearer | |
def number_to_moore(self, n: int, is_price: bool = False) -> str: | |
if is_price: | |
n = n / 5 | |
n = int(n) | |
if n == 0: | |
return "" # keep original behavior, but expand_number will fallback to the digits for zero | |
if n < 10: | |
return self.units[n] | |
if 11 <= n <= 19: | |
unit_part = self.units[n - 10] | |
if unit_part == "yembo": | |
unit_part = "ye" | |
elif unit_part == "yiibu": | |
unit_part = "yi" | |
elif unit_part == "tãabo": | |
unit_part = "tã" | |
return "piig la a " + unit_part | |
if n == 10: | |
return self.tens_base[1] | |
if n < 100: | |
tens_part = self.tens_base[n // 10] | |
units_part = n % 10 | |
if units_part > 0: | |
unit_text = self.units[units_part] | |
if unit_text == "yembo": | |
unit_text = "ye" | |
elif unit_text == "yiibu": | |
unit_text = "yi" | |
elif unit_text == "tãabo": | |
unit_text = "tã" | |
return tens_part + " la a " + unit_text | |
else: | |
return tens_part | |
if n < 1000: | |
hundreds_count = n // 100 | |
remainder = n % 100 | |
if hundreds_count == 1: | |
result = self.hundreds[1] | |
else: | |
# hundreds_count is 2..9 for this branch, safe to index units | |
unit_name = self.units[hundreds_count] | |
if unit_name == "yembo": | |
unit_name = "ye" | |
elif unit_name == "yiibu": | |
unit_name = "yiibu" | |
elif unit_name == "tãabo": | |
unit_name = "tã" | |
result = self.hundreds_prefix + unit_name | |
if remainder > 0: | |
result += " la " + self.number_to_moore(remainder) | |
return result | |
if n < 1_000_000: | |
thousands_count = n // 1000 | |
remainder = n % 1000 | |
if thousands_count == 1: | |
result = self.thousands[1] | |
else: | |
# Use recursion here: for any thousands_count (can be >= 10), | |
# express the thousands_count in Mooré then append "tusri" | |
result = self.thousands[1] + " " + self.number_to_moore(thousands_count) | |
result = result.replace("tusri", "tusa") | |
if n >= 10000: | |
result = result.replace("tusa", "tus") | |
if remainder > 0: | |
result += " la " + self.number_to_moore(remainder) | |
return result | |
# millions and above | |
millions_count = n // 1_000_000 | |
remainder = n % 1_000_000 | |
result = self.number_to_moore(millions_count) + " milyɔɔng" | |
if remainder > 0: | |
result += " " + self.number_to_moore(remainder) | |
return result | |
def expand_number(self, text: str) -> str: | |
""" | |
Replace plain numbers (e.g. '123') with Mooré words. If conversion returns | |
empty (e.g. 0) we fall back to keeping the original digits. | |
""" | |
if not isinstance(text, str): | |
return text | |
number_pattern = re.compile(r'\b\d+\b') | |
def replace_number_with_text(match): | |
s = match.group() | |
try: | |
number = int(s) | |
moore = self.number_to_moore(number, True) | |
# if converter returns empty string (e.g. for 0), keep original digits | |
return moore if moore else s | |
except Exception: | |
# on any unexpected error, keep the digits | |
return s | |
return number_pattern.sub(replace_number_with_text, text) | |
def mark_numbers(text): | |
return re.sub(r'\b(\d+)\b', r'*\1*', text) | |
def unmark_numbers(text): | |
return text.replace("*", "") |