File size: 4,155 Bytes
b4759b8
 
e7f4764
 
 
 
 
 
b4759b8
e7f4764
 
b4759b8
e7f4764
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
b4759b8
e7f4764
 
 
 
 
 
 
 
 
 
 
e280a73
b4759b8
 
 
 
 
 
 
 
e7f4764
b4759b8
 
 
e7f4764
 
b4759b8
e7f4764
 
854799d
 
e7f4764
 
 
 
 
 
 
 
b4759b8
e7f4764
 
 
b4759b8
75e2c43
b4759b8
 
cca4ee6
e280a73
b4759b8
 
e7f4764
b4759b8
 
 
e280a73
e7f4764
 
b3a0d1d
 
e280a73
b3a0d1d
e280a73
 
b3a0d1d
e280a73
 
 
b3a0d1d
e280a73
e7f4764
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
import streamlit as st
from huggingface_hub import InferenceClient
import re
import edge_tts
import asyncio
from concurrent.futures import ThreadPoolExecutor
import tempfile
from pydub import AudioSegment

# Initialize Hugging Face InferenceClient
client_hf = InferenceClient("mistralai/Mixtral-8x7B-Instruct-v0.1")

async def text_to_speech_edge(text, language_code):
    voice = {"fr": "fr-FR-RemyMultilingualNeural"}[language_code]
    communicate = edge_tts.Communicate(text, voice)
    with tempfile.NamedTemporaryFile(delete=False, suffix=".mp3") as tmp_file:
        tmp_path = tmp_file.name
    await communicate.save(tmp_path)
    return tmp_path

def run_in_threadpool(func, *args, **kwargs):
    loop = asyncio.new_event_loop()
    asyncio.set_event_loop(loop)
    future = asyncio.ensure_future(func(*args, **kwargs))
    return loop.run_until_complete(future)

def concatenate_audio(paths):
    combined = AudioSegment.empty()
    for path in paths:
        audio = AudioSegment.from_mp3(path)
        combined += audio
    combined_path = tempfile.mktemp(suffix=".mp3")
    combined.export(combined_path, format="mp3")
    return combined_path

def dictee_to_audio_segmented(dictee):
    sentences = segmenter_texte(dictee)
    audio_urls = []
    with ThreadPoolExecutor() as executor:
        for sentence in sentences:
            processed_sentence = replace_punctuation(sentence)
            audio_path = executor.submit(run_in_threadpool, text_to_speech_edge, processed_sentence, "fr").result()
            audio_urls.append(audio_path)
    return audio_urls

def generer_dictee(classe, longueur):
    prompt = f"Créer une dictée pour la classe {classe} d'une longueur d'environ {longueur} mots."
    generate_kwargs = {
        "temperature": 0.7,
        "max_new_tokens": 1000,
        "top_p": 0.95,
        "repetition_penalty": 1.2,
        "do_sample": True,
    }
    formatted_prompt = f"<s>[INST] {prompt} [/INST]"
    stream = client_hf.text_generation(formatted_prompt, **generate_kwargs, stream=True, details=True, return_full_text=False)
    dictee = ""
    for response in stream:
        dictee += response.token.text
    dictee = dictee.replace("</s>", "").strip()
    return dictee

def replace_punctuation(text):
    replacements = {
        ".": " point.",
        ",": " virgule,",
        ";": " point-virgule;",
        ":": " deux-points:",
        "!": " point d'exclamation!",
        "?": " point d'interrogation?",
    }
    for key, value in replacements.items():
        text = text.replace(key, value)
    return text

def segmenter_texte(texte):
    sentences = re.split(r'(?<=[.!?]) +', texte)
    return sentences

st.set_page_config(layout="wide")
st.title('Générateur de Dictée')

with st.expander("Paramètres de la dictée", expanded=True):
    mode = st.radio("Mode:", ["S'entrainer", "Entrainer"])
    classe = st.selectbox("Classe", ["CP", "CE1", "CE2", "CM1", "CM2", "6ème", "5ème", "4ème", "3ème", "Seconde", "Premiere", "Terminale"], index=2)
    longueur = st.slider("Longueur de la dictée (nombre de mots)", 50, 500, 200)
    
if st.button('Générer la Dictée'):
    with st.spinner("Génération de la dictée en cours..."):
        dictee = generer_dictee(classe, longueur)
        if mode == "S'entrainer":
            audio_urls = dictee_to_audio_segmented(dictee)
            concatenated_audio_path = concatenate_audio(audio_urls)
            col1, col2 = st.columns(2)
            with col1:
                st.audio(concatenated_audio_path, format='audio/mp3')
            with col2:
                # Utiliser st.session_state pour conserver la saisie de l'utilisateur
                user_input = st.text_area("Écrivez la dictée ici:", value=st.session_state.get('user_input', ''), height=300, key='user_input')
                if st.button('Correction'):
                    st.write("Dictée originale:")
                    st.text(dictee)
                    # Ajouter ici la logique de comparaison/correction détaillée si nécessaire
                    
        elif mode == "Entrainer":
            st.text_area("Voici votre dictée :", dictee, height=300)