Spaces:
Paused
Paused
| # ssml_converter.py | |
| """ | |
| SSML (Speech Synthesis Markup Language) Converter | |
| """ | |
| import re | |
| from typing import Dict, Optional | |
| from datetime import datetime | |
| import xml.etree.ElementTree as ET | |
| from xml.sax.saxutils import escape | |
| class SSMLConverter: | |
| """Convert plain text to SSML format""" | |
| def __init__(self, language: str = "tr-TR"): | |
| self.language = language | |
| def convert_to_ssml(self, text: str, options: Dict[str, any] = None) -> str: | |
| """Convert plain text to SSML with smart detection""" | |
| # Create root speak element | |
| speak = ET.Element("speak") | |
| speak.set("version", "1.0") | |
| speak.set("xml:lang", self.language) | |
| # Process text segments | |
| segments = self._segment_text(text) | |
| for segment in segments: | |
| if segment["type"] == "plain": | |
| # Add plain text (escaped) | |
| if segment["text"].strip(): | |
| speak.text = (speak.text or "") + escape(segment["text"]) | |
| elif segment["type"] == "number": | |
| # Add number with say-as | |
| say_as = ET.SubElement(speak, "say-as") | |
| say_as.set("interpret-as", "cardinal") | |
| say_as.text = segment["text"] | |
| elif segment["type"] == "currency": | |
| # Add currency with say-as | |
| say_as = ET.SubElement(speak, "say-as") | |
| say_as.set("interpret-as", "currency") | |
| say_as.text = segment["text"] | |
| elif segment["type"] == "time": | |
| # Add time with say-as | |
| say_as = ET.SubElement(speak, "say-as") | |
| say_as.set("interpret-as", "time") | |
| say_as.set("format", "hms24") | |
| say_as.text = segment["text"] | |
| elif segment["type"] == "date": | |
| # Add date with say-as | |
| say_as = ET.SubElement(speak, "say-as") | |
| say_as.set("interpret-as", "date") | |
| say_as.set("format", "ymd") | |
| say_as.text = segment["text"] | |
| elif segment["type"] == "code": | |
| # Spell out codes | |
| say_as = ET.SubElement(speak, "say-as") | |
| say_as.set("interpret-as", "characters") | |
| say_as.text = segment["text"] | |
| elif segment["type"] == "pause": | |
| # Add break for punctuation | |
| break_elem = ET.SubElement(speak, "break") | |
| break_elem.set("time", segment["duration"]) | |
| # Convert to string | |
| return ET.tostring(speak, encoding='unicode', method='xml') | |
| def _segment_text(self, text: str) -> list: | |
| """Segment text into different types for SSML processing""" | |
| segments = [] | |
| # Patterns for different content types | |
| patterns = { | |
| 'currency': r'[₺$€£]\s*\d+(?:[.,]\d+)?|\d+(?:[.,]\d+)?\s*(?:TL|USD|EUR|GBP)', | |
| 'time': r'\b\d{1,2}:\d{2}(?::\d{2})?\b', | |
| 'date': r'\b\d{4}-\d{2}-\d{2}\b|\b\d{1,2}[./]\d{1,2}[./]\d{2,4}\b', | |
| 'code': r'\b[A-Z]{2,5}\d{2,5}\b', | |
| 'number': r'\b\d+(?:[.,]\d+)?\b', | |
| 'pause': r'\.{3}|--' | |
| } | |
| # Combined pattern | |
| combined_pattern = '|'.join(f'(?P<{name}>{pattern})' for name, pattern in patterns.items()) | |
| last_end = 0 | |
| for match in re.finditer(combined_pattern, text): | |
| # Add text before match | |
| if match.start() > last_end: | |
| segments.append({ | |
| 'type': 'plain', | |
| 'text': text[last_end:match.start()] | |
| }) | |
| # Determine match type and add | |
| for type_name, group_text in match.groupdict().items(): | |
| if group_text: | |
| if type_name == 'pause': | |
| segments.append({ | |
| 'type': 'pause', | |
| 'duration': '500ms' if group_text == '...' else '1s' | |
| }) | |
| else: | |
| segments.append({ | |
| 'type': type_name, | |
| 'text': group_text | |
| }) | |
| break | |
| last_end = match.end() | |
| # Add remaining text | |
| if last_end < len(text): | |
| segments.append({ | |
| 'type': 'plain', | |
| 'text': text[last_end:] | |
| }) | |
| return segments | |
| def add_emphasis(self, text: str, words: list, level: str = "moderate") -> str: | |
| """Add emphasis to specific words in SSML""" | |
| ssml_text = self.convert_to_ssml(text) | |
| # Parse SSML | |
| root = ET.fromstring(ssml_text) | |
| # Add emphasis to matching words | |
| for elem in root.iter(): | |
| if elem.text and any(word in elem.text for word in words): | |
| for word in words: | |
| if word in elem.text: | |
| # Create emphasis element | |
| parts = elem.text.split(word, 1) | |
| elem.text = parts[0] | |
| emphasis = ET.SubElement(elem, "emphasis") | |
| emphasis.set("level", level) | |
| emphasis.text = word | |
| if len(parts) > 1: | |
| emphasis.tail = parts[1] | |
| return ET.tostring(root, encoding='unicode', method='xml') |