Spaces:
Running
Running
File size: 1,292 Bytes
d86a872 |
1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20 21 22 23 24 25 26 27 28 29 30 31 32 33 34 35 36 37 38 39 40 41 42 43 44 45 46 47 48 49 50 51 52 53 |
import mysql.connector
from datasets import load_dataset
from huggingface_hub import login
import config
def seed():
login(token=config.hf_token)
dataset = load_dataset(config.hf_tts_ds_repo, split="train", trust_remote_code=True)
print(dataset.column_names)
print(dataset[0])
conn = mysql.connector.connect(config.db_config)
cursor = conn.cursor()
cursor.execute(
"""
CREATE TABLE IF NOT EXISTS tts_data (
id INT AUTO_INCREMENT PRIMARY KEY,
filename VARCHAR(255),
sentence TEXT
)
"""
)
batch_size = 1000
batch = []
for i, item in enumerate(dataset):
filename = f"sample_{i}.wav"
sentence = item["sentence"]
batch.append((filename, sentence))
if len(batch) == batch_size:
cursor.executemany(
"INSERT INTO tts_data (filename, sentence) VALUES (%s, %s)", batch
)
conn.commit()
print(f"β
{i + 1} records saved!")
batch = []
if batch:
cursor.executemany(
"INSERT INTO tts_data (filename, sentence) VALUES (%s, %s)", batch
)
conn.commit()
print(f"β
last {len(batch)} records saved.")
cursor.close()
conn.close()
return "done!"
|