File size: 1,292 Bytes
d86a872
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
import mysql.connector
from datasets import load_dataset
from huggingface_hub import login
import config


def seed():
    login(token=config.hf_token)
    dataset = load_dataset(config.hf_tts_ds_repo, split="train", trust_remote_code=True)

    print(dataset.column_names)
    print(dataset[0])

    conn = mysql.connector.connect(config.db_config)
    cursor = conn.cursor()

    cursor.execute(
        """
    CREATE TABLE IF NOT EXISTS tts_data (
        id INT AUTO_INCREMENT PRIMARY KEY,
        filename VARCHAR(255),
        sentence TEXT
    )
    """
    )

    batch_size = 1000
    batch = []

    for i, item in enumerate(dataset):
        filename = f"sample_{i}.wav"
        sentence = item["sentence"]
        batch.append((filename, sentence))

        if len(batch) == batch_size:
            cursor.executemany(
                "INSERT INTO tts_data (filename, sentence) VALUES (%s, %s)", batch
            )
            conn.commit()
            print(f"βœ… {i + 1} records saved!")
            batch = []

    if batch:
        cursor.executemany(
            "INSERT INTO tts_data (filename, sentence) VALUES (%s, %s)", batch
        )
        conn.commit()
        print(f"βœ… last {len(batch)} records saved.")

    cursor.close()
    conn.close()
    return "done!"