first commit
Browse files- .gitignore +45 -0
- README.md +51 -13
- app.py +38 -0
- bpe_tok.ipynb +1750 -0
- bpe_tok.model +3 -0
- bpe_tok.vocab +3256 -0
- requirements.txt +3 -0
- tokenizer.py +91 -0
.gitignore
ADDED
@@ -0,0 +1,45 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
1 |
+
# Python
|
2 |
+
__pycache__/
|
3 |
+
*.py[cod]
|
4 |
+
*$py.class
|
5 |
+
*.so
|
6 |
+
.Python
|
7 |
+
build/
|
8 |
+
develop-eggs/
|
9 |
+
dist/
|
10 |
+
downloads/
|
11 |
+
eggs/
|
12 |
+
.eggs/
|
13 |
+
lib/
|
14 |
+
lib64/
|
15 |
+
parts/
|
16 |
+
sdist/
|
17 |
+
var/
|
18 |
+
wheels/
|
19 |
+
*.egg-info/
|
20 |
+
.installed.cfg
|
21 |
+
*.egg
|
22 |
+
|
23 |
+
# Virtual Environment
|
24 |
+
venv/
|
25 |
+
ENV/
|
26 |
+
env/
|
27 |
+
virtual/
|
28 |
+
|
29 |
+
# IDE
|
30 |
+
.idea/
|
31 |
+
.vscode/
|
32 |
+
*.swp
|
33 |
+
*.swo
|
34 |
+
|
35 |
+
# OS
|
36 |
+
.DS_Store
|
37 |
+
.DS_Store?
|
38 |
+
._*
|
39 |
+
.Spotlight-V100
|
40 |
+
.Trashes
|
41 |
+
ehthumbs.db
|
42 |
+
Thumbs.db
|
43 |
+
|
44 |
+
# Project specific
|
45 |
+
tok_*.txt
|
README.md
CHANGED
@@ -1,13 +1,51 @@
|
|
1 |
-
|
2 |
-
|
3 |
-
|
4 |
-
|
5 |
-
|
6 |
-
|
7 |
-
|
8 |
-
|
9 |
-
|
10 |
-
|
11 |
-
|
12 |
-
|
13 |
-
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
1 |
+
# BPE Tokenization with Custom Regex
|
2 |
+
|
3 |
+
This notebook demonstrates the process of training a Byte Pair Encoding (BPE) tokenizer using a custom regex pattern. The tokenizer is designed to handle multilingual text, specifically English, Hindi, and Kannada. The notebook includes steps for tokenization, vocabulary building, and encoding/decoding of text.
|
4 |
+
|
5 |
+
## Overview
|
6 |
+
|
7 |
+
The notebook is structured into several key sections:
|
8 |
+
|
9 |
+
1. **Custom Regex for BPE Tokenization**:
|
10 |
+
- A custom regex pattern is defined to tokenize text in English, Hindi, and Kannada. This pattern is used to split text into tokens that are then processed by the BPE algorithm.
|
11 |
+
|
12 |
+
2. **Dataset Loading**:
|
13 |
+
- Datasets from Hugging Face are loaded for English, Hindi (Devanagari script), and Kannada. These datasets are used to create a corpus for training the tokenizer.
|
14 |
+
|
15 |
+
3. **Corpus Preparation**:
|
16 |
+
- Texts from the datasets are concatenated into a single corpus, which is then saved to a file. This corpus serves as the input for training the BPE tokenizer.
|
17 |
+
|
18 |
+
4. **Utility Functions**:
|
19 |
+
- Functions are defined to handle control characters, visualize tokens, and manage token rendering.
|
20 |
+
|
21 |
+
5. **Training BPE**:
|
22 |
+
- The BPE algorithm is trained on the prepared corpus. The process involves iteratively merging the most frequent pairs of tokens until the desired vocabulary size is reached.
|
23 |
+
|
24 |
+
6. **Vocabulary and Model Saving**:
|
25 |
+
- The trained vocabulary and model are saved to disk for later use. The vocabulary consists of 3257 tokens, which includes:
|
26 |
+
- 256 byte-level tokens
|
27 |
+
- 3000 merge operations
|
28 |
+
- 1 special `<|endoftext|>` token
|
29 |
+
|
30 |
+
7. **Encoding and Decoding**:
|
31 |
+
- Functions are provided to encode text into token IDs and decode token IDs back into text. Special tokens are handled as part of this process.
|
32 |
+
|
33 |
+
8. **Testing**:
|
34 |
+
- The tokenizer is tested on sample texts to verify its performance and compression ratio.
|
35 |
+
|
36 |
+
## Key Details
|
37 |
+
|
38 |
+
- **Vocabulary Size**: The final vocabulary size is set to 3257 tokens (256 byte tokens + 3000 merges + 1 `<|endoftext|>` token).
|
39 |
+
- **Tokenizer Training Corpus Composition**: The training corpus is constructed by combining texts from multiple datasets with the following distribution:
|
40 |
+
- `HuggingFaceFW/fineweb-edu` (English): 60% of the corpus, aligning with the token distribution patterns observed in advanced language models like GPT-4, where English tokens constitute a significant majority
|
41 |
+
- `ai4bharat/sangraha` (Hindi - Devanagari script): 20% of the corpus
|
42 |
+
- `ai4bharat/sangraha` (Kannada - Kannada script): 20% of the corpus
|
43 |
+
- **Compression Ratio**: The compression ratio achieved by the BPE tokenizer is approximately 4.07x, indicating the efficiency of the tokenization process in reducing the size of the text representation.
|
44 |
+
|
45 |
+
## Usage
|
46 |
+
|
47 |
+
To use the tokenizer, load the saved model and vocabulary files, and utilize the provided encoding and decoding functions to process text. The tokenizer is capable of handling multilingual text and special tokens, making it suitable for diverse applications.
|
48 |
+
|
49 |
+
## Conclusion
|
50 |
+
|
51 |
+
This notebook provides a comprehensive guide to training a BPE tokenizer with custom regex patterns for multilingual text. The process includes dataset preparation, tokenization, vocabulary building, and model saving, offering a robust solution for text processing tasks.
|
app.py
ADDED
@@ -0,0 +1,38 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
1 |
+
import gradio as gr
|
2 |
+
import tiktoken
|
3 |
+
from tokenizer import CustomTokenizer
|
4 |
+
|
5 |
+
# Initialize tokenizers
|
6 |
+
custom_tokenizer = CustomTokenizer("bpe_tok.model")
|
7 |
+
tiktoken_encoder = tiktoken.get_encoding("gpt2")
|
8 |
+
|
9 |
+
def encode_text(text):
|
10 |
+
# Get encodings from both tokenizers
|
11 |
+
custom_tokens = custom_tokenizer.encode(text, allowed_special={"<|endoftext|>"})
|
12 |
+
tiktoken_tokens = tiktoken_encoder.encode(text)
|
13 |
+
|
14 |
+
# Format output
|
15 |
+
custom_output = f"Token count: {len(custom_tokens)}\nTokens: {custom_tokens}"
|
16 |
+
tiktoken_output = f"Token count: {len(tiktoken_tokens)}\nTokens: {tiktoken_tokens}"
|
17 |
+
|
18 |
+
return custom_output, tiktoken_output
|
19 |
+
|
20 |
+
# Create Gradio interface
|
21 |
+
iface = gr.Interface(
|
22 |
+
fn=encode_text,
|
23 |
+
inputs=gr.Textbox(lines=5, label="Enter text to tokenize"),
|
24 |
+
outputs=[
|
25 |
+
gr.Textbox(label="Custom Tokenizer Output", lines=4),
|
26 |
+
gr.Textbox(label="Tiktoken Output", lines=4)
|
27 |
+
],
|
28 |
+
title="Tokenizer Comparison",
|
29 |
+
description="Compare custom BPE tokenizer with Tiktoken GPT-2 tokenizer",
|
30 |
+
examples=[
|
31 |
+
["आज तो बहुत थक गया हूँ, ಸ್ವಲ್ಪ विश्रಾಂತಿ ಬೇಕು।"],
|
32 |
+
["मौसम कितना अच्छा है! ನೀವೂ ಹೊರಗೆ ಬನ್ನಿ, let's enjoy together."],
|
33 |
+
["My name is Jeff Bezos, and I'm the owner of Amazon.<|endoftext|>"]
|
34 |
+
]
|
35 |
+
)
|
36 |
+
|
37 |
+
if __name__ == "__main__":
|
38 |
+
iface.launch()
|
bpe_tok.ipynb
ADDED
@@ -0,0 +1,1750 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
1 |
+
{
|
2 |
+
"cells": [
|
3 |
+
{
|
4 |
+
"cell_type": "markdown",
|
5 |
+
"metadata": {},
|
6 |
+
"source": [
|
7 |
+
"### Custom regex for BPE tokenization"
|
8 |
+
]
|
9 |
+
},
|
10 |
+
{
|
11 |
+
"cell_type": "code",
|
12 |
+
"execution_count": 1,
|
13 |
+
"metadata": {},
|
14 |
+
"outputs": [],
|
15 |
+
"source": [
|
16 |
+
"# wanna handle en, hi, kn languages.\n",
|
17 |
+
"tests = [\n",
|
18 |
+
" \"आज तो बहुत थक गया हूँ, ಸ್ವಲ್ಪ विश्रಾಂತಿ ಬೇಕು।\",\n",
|
19 |
+
" \"मौसम कितना अच्छा है! ನೀವೂ ಹೊರಗೆ ಬನ್ನಿ, let's enjoy together.\",\n",
|
20 |
+
" \"स्वल्पा adjust करो, बैंगलोर का ट्रैफिक ऐसा ही है।\",\n",
|
21 |
+
" \"ನೀವು ಚಹಾ ಕುಡಿತೀರಾ? मुझे एक cup चाहिए।\",\n",
|
22 |
+
" \"आज का काम पूरा करो, ನಾಳೆ ಎಲ್ಲಿಂದ ಆರಂಭಿಸೋದು ನೋಡಿ।\",\n",
|
23 |
+
" \"ಪಾರ್ಟಿ ಹೇಗೆ ಇತ್ತು? मुझे तो बहुत मजा आया!\",\n",
|
24 |
+
" \"ನಮ್ಮ ಚೂರು ಸಹನಶೀಲತೆಯನ್ನು ತೋರಿಸಿ, ये थोड़ी देर का मसला है।\",\n",
|
25 |
+
" \"ಸಮಯ ನಿಲ್ಲುತ್ತಿಲ್ಲ, जिंदगी में स्वल्पा मज़ा भी जरूरी है।\",\n",
|
26 |
+
" \"My name is Jeff Bezos, and I'm the owner of Amazon.<|endoftext|>\"\n",
|
27 |
+
"]"
|
28 |
+
]
|
29 |
+
},
|
30 |
+
{
|
31 |
+
"cell_type": "code",
|
32 |
+
"execution_count": 2,
|
33 |
+
"metadata": {},
|
34 |
+
"outputs": [
|
35 |
+
{
|
36 |
+
"name": "stdout",
|
37 |
+
"output_type": "stream",
|
38 |
+
"text": [
|
39 |
+
"आज तो बहुत थक गया हूँ, ಸ್ವಲ್ಪ विश्रಾಂತಿ ಬೇಕು।\n",
|
40 |
+
"['आज', ' त', 'ो', ' बह', 'ुत', ' थक', ' गय', 'ा', ' ह', 'ूँ,', ' ಸ', '್ವಲ', '್ಪ', ' व', 'िश', '्र', 'ಾಂ', 'ತ', 'ಿ', ' ಬ', 'ೇಕ', 'ು।']\n",
|
41 |
+
"\n",
|
42 |
+
"मौसम कितना अच्छा है! ನೀವೂ ಹೊರಗೆ ಬನ್ನಿ, let's enjoy together.\n",
|
43 |
+
"['म', 'ौसम', ' क', 'ितन', 'ा', ' अच', '्छ', 'ा', ' ह', 'ै!', ' ನ', 'ೀವ', 'ೂ', ' ಹ', 'ೊರಗ', 'ೆ', ' ಬನ', '್ನ', 'ಿ,', ' let', \"'s\", ' enjoy', ' together', '.']\n",
|
44 |
+
"\n",
|
45 |
+
"स्वल्पा adjust करो, बैंगलोर का ट्रैफिक ऐसा ही है।\n",
|
46 |
+
"['स', '्वल', '्प', 'ा', ' adjust', ' कर', 'ो,', ' ब', 'ैं', 'गल', 'ोर', ' क', 'ा', ' ट', '्र', 'ैफ', 'िक', ' ऐस', 'ा', ' ह', 'ी', ' ह', 'ै।']\n",
|
47 |
+
"\n",
|
48 |
+
"ನೀವು ಚಹಾ ಕುಡಿತೀರಾ? मुझे एक cup चाहिए।\n",
|
49 |
+
"['ನ', 'ೀವ', 'ು', ' ಚಹ', 'ಾ', ' ಕ', 'ುಡ', 'ಿತ', 'ೀರ', 'ಾ?', ' म', 'ुझ', 'े', ' एक', ' cup', ' च', 'ाह', 'िए', '।']\n",
|
50 |
+
"\n",
|
51 |
+
"आज का काम पूरा करो, ನಾಳೆ ಎಲ್ಲಿಂದ ಆರಂಭಿಸೋದು ನೋಡಿ।\n",
|
52 |
+
"['आज', ' क', 'ा', ' क', 'ाम', ' प', 'ूर', 'ा', ' कर', 'ो,', ' ನ', 'ಾಳ', 'ೆ', ' ಎಲ', '್ಲ', 'ಿಂ', 'ದ', ' ಆರ', 'ಂಭ', 'ಿಸ', 'ೋದ', 'ು', ' ನ', 'ೋಡ', 'ಿ।']\n",
|
53 |
+
"\n",
|
54 |
+
"ಪಾರ್ಟಿ ಹೇಗೆ ಇತ್ತು? मुझे तो बहुत मजा आया!\n",
|
55 |
+
"['ಪ', 'ಾರ', '್ಟ', 'ಿ', ' ಹ', 'ೇಗ', 'ೆ', ' ಇತ', '್ತ', 'ು?', ' म', 'ुझ', 'े', ' त', 'ो', ' बह', 'ुत', ' मज', 'ा', ' आय', 'ा!']\n",
|
56 |
+
"\n",
|
57 |
+
"ನಮ್ಮ ಚೂರು ಸಹನಶೀಲತೆಯನ್ನು ತೋರಿಸಿ, ये थोड़ी देर का मसला है।\n",
|
58 |
+
"['ನಮ', '್ಮ', ' ಚ', 'ೂರ', 'ು', ' ಸಹನಶ', 'ೀಲತ', 'ೆಯನ', '್ನ', 'ು', ' ತ', 'ೋರ', 'ಿಸ', 'ಿ,', ' य', 'े', ' थ', 'ोड', '़ी', ' द', 'ेर', ' क', 'ा', ' मसल', 'ा', ' ह', 'ै।']\n",
|
59 |
+
"\n",
|
60 |
+
"ಸಮಯ ನಿಲ್ಲುತ್ತಿಲ್ಲ, जिंदगी में स्वल्पा मज़ा भी जरूरी है।\n",
|
61 |
+
"['ಸಮಯ', ' ನ', 'ಿಲ', '್ಲ', 'ುತ', '್ತ', 'ಿಲ', '್ಲ', ',', ' ज', 'िं', 'दग', 'ी', ' म', 'ें', ' स', '्वल', '्प', 'ा', ' मज', '़ा', ' भ', 'ी', ' जर', 'ूर', 'ी', ' ह', 'ै।']\n",
|
62 |
+
"\n",
|
63 |
+
"My name is Jeff Bezos, and I'm the owner of Amazon.<|endoftext|>\n",
|
64 |
+
"['My', ' name', ' is', ' Jeff', ' Bezos', ',', ' and', ' I', \"'m\", ' the', ' owner', ' of', ' Amazon', '.<|', 'endoftext', '|>']\n",
|
65 |
+
"\n"
|
66 |
+
]
|
67 |
+
}
|
68 |
+
],
|
69 |
+
"source": [
|
70 |
+
"import regex as re\n",
|
71 |
+
"\n",
|
72 |
+
"def test_regex(regex, text):\n",
|
73 |
+
" print(text)\n",
|
74 |
+
" print(re.findall(regex, text))\n",
|
75 |
+
" print()\n",
|
76 |
+
"\n",
|
77 |
+
"# default gpt4 regex\n",
|
78 |
+
"pattern = r\"\"\"'(?i:[sdmt]|ll|ve|re)|[^\\r\\n\\p{L}\\p{N}]?+\\p{L}+|\\p{N}{1,3}| ?[^\\s\\p{L}\\p{N}]++[\\r\\n]*|\\s*[\\r\\n]|\\s+(?!\\S)|\\s+\"\"\"\n",
|
79 |
+
"regex = re.compile(pattern)\n",
|
80 |
+
"\n",
|
81 |
+
"for test in tests:\n",
|
82 |
+
" test_regex(regex, test)"
|
83 |
+
]
|
84 |
+
},
|
85 |
+
{
|
86 |
+
"cell_type": "code",
|
87 |
+
"execution_count": 3,
|
88 |
+
"metadata": {},
|
89 |
+
"outputs": [
|
90 |
+
{
|
91 |
+
"name": "stdout",
|
92 |
+
"output_type": "stream",
|
93 |
+
"text": [
|
94 |
+
"आज तो बहुत थक गया हूँ, ಸ್ವಲ್ಪ विश्रಾಂತಿ ಬೇಕು।\n",
|
95 |
+
"['आज', ' तो', ' बहुत', ' थक', ' गया', ' हूँ', ',', ' ಸ್ವಲ್ಪ', ' ಬೇಕು', '।']\n",
|
96 |
+
"\n",
|
97 |
+
"मौसम कितना अच्छा है! ನೀವೂ ಹೊರಗೆ ಬನ್ನಿ, let's enjoy together.\n",
|
98 |
+
"['मौसम', ' कितना', ' अच्छा', ' है', '!', ' ನೀವೂ', ' ಹೊರಗೆ', ' ಬನ್ನಿ', ',', ' let', \"'\", 's', ' enjoy', ' together', '.']\n",
|
99 |
+
"\n",
|
100 |
+
"स्वल्पा adjust करो, बैंगलोर का ट्रैफिक ऐसा ही है।\n",
|
101 |
+
"['स्वल्पा', ' adjust', ' करो', ',', ' बैंगलोर', ' का', ' ट्रैफिक', ' ऐसा', ' ही', ' है', '।']\n",
|
102 |
+
"\n",
|
103 |
+
"ನೀವು ಚಹಾ ಕುಡಿತೀರಾ? मुझे एक cup चाहिए।\n",
|
104 |
+
"['ನೀವು', ' ಚಹಾ', ' ಕುಡಿತೀರಾ', '?', ' मुझे', ' एक', ' cup', ' चाहिए', '।']\n",
|
105 |
+
"\n",
|
106 |
+
"आज का काम पूरा करो, ನಾಳೆ ಎಲ್ಲಿಂದ ಆರಂಭಿಸೋದು ನೋಡಿ।\n",
|
107 |
+
"['आज', ' का', ' काम', ' पूरा', ' करो', ',', ' ನಾಳೆ', ' ಎಲ್ಲಿಂದ', ' ಆರಂಭಿಸೋದು', ' ನೋಡಿ', '।']\n",
|
108 |
+
"\n",
|
109 |
+
"ಪಾರ್ಟಿ ಹೇಗೆ ಇತ್ತು? मुझे तो बहुत मजा आया!\n",
|
110 |
+
"['ಪಾರ್ಟಿ', ' ಹೇಗೆ', ' ಇತ್ತು', '?', ' मुझे', ' तो', ' बहुत', ' मजा', ' आया', '!']\n",
|
111 |
+
"\n",
|
112 |
+
"ನಮ್ಮ ಚೂರು ಸಹನಶೀಲತೆಯನ್ನು ತೋರಿಸಿ, ये थोड़ी देर का मसला है।\n",
|
113 |
+
"['ನಮ್ಮ', ' ಚೂರು', ' ಸಹನಶೀಲತೆಯನ್ನು', ' ತೋರಿಸಿ', ',', ' ये', ' थोड़ी', ' देर', ' का', ' मसला', ' है', '।']\n",
|
114 |
+
"\n",
|
115 |
+
"ಸಮಯ ನಿಲ್ಲುತ್ತಿಲ್ಲ, जिंदगी में स्वल्पा मज़ा भी जरूरी है।\n",
|
116 |
+
"['ಸಮಯ', ' ನಿಲ್ಲುತ್ತಿಲ್ಲ', ',', ' जिंदगी', ' में', ' स्वल्पा', ' मज़ा', ' भी', ' जरूरी', ' है', '।']\n",
|
117 |
+
"\n",
|
118 |
+
"My name is Jeff Bezos, and I'm the owner of Amazon.<|endoftext|>\n",
|
119 |
+
"['My', ' name', ' is', ' Jeff', ' Bezos', ',', ' and', ' I', \"'m\", ' the', ' owner', ' of', ' Amazon', '.', '<|', 'endoftext', '|>']\n",
|
120 |
+
"\n"
|
121 |
+
]
|
122 |
+
}
|
123 |
+
],
|
124 |
+
"source": [
|
125 |
+
"# custom regex\n",
|
126 |
+
"pattern = r\"\"\"(?i) 's|'t|'re|'ve|'m|'ll|'d| ?\\b[\\p{L}\\u0900-\\u0963|\\u0966-\\u097F]+\\b| ?\\b[\\p{L}\\u0C80-\\u0C9E|\\u0CA0-\\u0CFF]+\\b| ?[\\p{N}]+| ?[.,!?;:'\\\"-]| ?[\\u0964-\\u0965]| ?[\\u0C9E-\\u0C9F]| ?[^\\s\\p{L}\\p{N}\\u0900-\\u097F\\u0C80-\\u0CFF]+| \\s+(?!\\S)| \\s+\"\"\"\n",
|
127 |
+
"regex = re.compile(pattern)\n",
|
128 |
+
"\n",
|
129 |
+
"for test in tests:\n",
|
130 |
+
" test_regex(regex, test)"
|
131 |
+
]
|
132 |
+
},
|
133 |
+
{
|
134 |
+
"cell_type": "markdown",
|
135 |
+
"metadata": {},
|
136 |
+
"source": [
|
137 |
+
"### Dataset"
|
138 |
+
]
|
139 |
+
},
|
140 |
+
{
|
141 |
+
"cell_type": "code",
|
142 |
+
"execution_count": null,
|
143 |
+
"metadata": {},
|
144 |
+
"outputs": [],
|
145 |
+
"source": [
|
146 |
+
"from datasets import load_dataset, load_from_disk\n",
|
147 |
+
"import random\n",
|
148 |
+
"import os\n",
|
149 |
+
"\n",
|
150 |
+
"# Load a subset of a dataset and cache it locally\n",
|
151 |
+
"def download_dataset_and_load_subset(dataset_name, name=None, data_dir=None, split=\"train\", num_rows=1000, save_dir=\"/home/yaseen/hf_datasets\"):\n",
|
152 |
+
" # Create a directory path that includes the dataset name and split\n",
|
153 |
+
" dataset_path = f\"{dataset_name.replace('/', '_')}_{name.replace('/', '_') if name else ''}_{data_dir.replace('/', '_') if data_dir else ''}_{split}\"\n",
|
154 |
+
" save_path = os.path.join(save_dir, dataset_path)\n",
|
155 |
+
" \n",
|
156 |
+
" try:\n",
|
157 |
+
" dataset = load_from_disk(save_path)\n",
|
158 |
+
" print(f\"Dataset {dataset_name} loaded from {save_path}..\")\n",
|
159 |
+
" \n",
|
160 |
+
" except FileNotFoundError:\n",
|
161 |
+
" # Load the dataset and cache it in the specified directory\n",
|
162 |
+
" dataset = load_dataset(\n",
|
163 |
+
" dataset_name, \n",
|
164 |
+
" name=name, \n",
|
165 |
+
" data_dir=data_dir, \n",
|
166 |
+
" split=split\n",
|
167 |
+
" )\n",
|
168 |
+
" \n",
|
169 |
+
" # Ensure the base save directory exists\n",
|
170 |
+
" os.makedirs(save_dir, exist_ok=True)\n",
|
171 |
+
" \n",
|
172 |
+
" # Save the dataset to disk\n",
|
173 |
+
" dataset.save_to_disk(save_path)\n",
|
174 |
+
" print(f\"Dataset {dataset_name} saved to {save_path}..\")\n",
|
175 |
+
" \n",
|
176 |
+
" # Select a random subset of the dataset\n",
|
177 |
+
" indices = list(range(len(dataset)))\n",
|
178 |
+
" random.shuffle(indices)\n",
|
179 |
+
" selected_indices = indices[:num_rows]\n",
|
180 |
+
" return dataset.select(selected_indices)\n",
|
181 |
+
"\n",
|
182 |
+
"# Load subsets of each dataset with caching\n",
|
183 |
+
"dataset_en = download_dataset_and_load_subset(\"HuggingFaceFW/fineweb-edu\", name=\"sample-10BT\", num_rows=600)\n",
|
184 |
+
"dataset_hin_deva = download_dataset_and_load_subset(\"ai4bharat/sangraha\", data_dir=\"synthetic/hin_Deva\", num_rows=200)\n",
|
185 |
+
"dataset_kan_knda = download_dataset_and_load_subset(\"ai4bharat/sangraha\", data_dir=\"synthetic/kan_Knda\", num_rows=200)\n",
|
186 |
+
"\n",
|
187 |
+
"# Print the first row of each subset to verify\n",
|
188 |
+
"print(dataset_en[0])\n",
|
189 |
+
"print(dataset_hin_deva[0])\n",
|
190 |
+
"print(dataset_kan_knda[0])"
|
191 |
+
]
|
192 |
+
},
|
193 |
+
{
|
194 |
+
"cell_type": "code",
|
195 |
+
"execution_count": null,
|
196 |
+
"metadata": {},
|
197 |
+
"outputs": [],
|
198 |
+
"source": [
|
199 |
+
"# Concatenate all texts into a single list\n",
|
200 |
+
"all_texts = []\n",
|
201 |
+
"\n",
|
202 |
+
"# Collect texts from each dataset\n",
|
203 |
+
"en_texts = [doc[\"text\"].strip().replace(\"\\n\", \" \") for doc in dataset_en]\n",
|
204 |
+
"hin_deva_texts = [doc[\"text\"].strip().replace(\"\\n\", \" \") for doc in dataset_hin_deva]\n",
|
205 |
+
"kan_knda_texts = [doc[\"text\"].strip().replace(\"\\n\", \" \") for doc in dataset_kan_knda]\n",
|
206 |
+
"\n",
|
207 |
+
"# Add all texts to a single list\n",
|
208 |
+
"all_texts.extend(en_texts)\n",
|
209 |
+
"all_texts.extend(hin_deva_texts)\n",
|
210 |
+
"all_texts.extend(kan_knda_texts)\n",
|
211 |
+
"\n",
|
212 |
+
"# Shuffle the combined texts\n",
|
213 |
+
"random.shuffle(all_texts)\n",
|
214 |
+
"\n",
|
215 |
+
"print(f\"Total number of texts: {len(all_texts)}\")\n",
|
216 |
+
"print(\"\\nFirst few texts after random shuffling:\")\n",
|
217 |
+
"print(all_texts[:3])\n",
|
218 |
+
"\n",
|
219 |
+
"corpus = \"\\n\".join(all_texts)\n",
|
220 |
+
"\n",
|
221 |
+
"# Save the combined texts to a file\n",
|
222 |
+
"with open(\"tok_corpus_small.txt\", \"w\", encoding=\"utf-8\") as file:\n",
|
223 |
+
" file.write(corpus)"
|
224 |
+
]
|
225 |
+
},
|
226 |
+
{
|
227 |
+
"cell_type": "markdown",
|
228 |
+
"metadata": {},
|
229 |
+
"source": [
|
230 |
+
"### Utility functions"
|
231 |
+
]
|
232 |
+
},
|
233 |
+
{
|
234 |
+
"cell_type": "code",
|
235 |
+
"execution_count": 4,
|
236 |
+
"metadata": {},
|
237 |
+
"outputs": [],
|
238 |
+
"source": [
|
239 |
+
"import unicodedata\n",
|
240 |
+
"\n",
|
241 |
+
"def replace_control_characters(s: str) -> str:\n",
|
242 |
+
" # we don't want to print control characters\n",
|
243 |
+
" # which distort the output (e.g. \\n or much worse)\n",
|
244 |
+
" # https://stackoverflow.com/questions/4324790/removing-control-characters-from-a-string-in-python/19016117#19016117\n",
|
245 |
+
" # http://www.unicode.org/reports/tr44/#GC_Values_Table\n",
|
246 |
+
" chars = []\n",
|
247 |
+
" for ch in s:\n",
|
248 |
+
" if unicodedata.category(ch)[0] != \"C\":\n",
|
249 |
+
" chars.append(ch) # this character is ok\n",
|
250 |
+
" else:\n",
|
251 |
+
" chars.append(f\"\\\\u{ord(ch):04x}\") # escape\n",
|
252 |
+
" return \"\".join(chars)\n",
|
253 |
+
"\n",
|
254 |
+
"def render_token(t: bytes) -> str:\n",
|
255 |
+
" # pretty print a token, escaping control characters\n",
|
256 |
+
" s = t.decode('utf-8', errors='replace')\n",
|
257 |
+
" s = replace_control_characters(s)\n",
|
258 |
+
" return s\n",
|
259 |
+
"\n",
|
260 |
+
"# utility function to visualise tokens\n",
|
261 |
+
"def visualise_tokens(token_values: list[bytes], end=\"\\n\") -> None:\n",
|
262 |
+
" background = [f\"\\u001b[48;5;{i}m\" for i in [167, 179, 185, 77, 80, 68, 134]]\n",
|
263 |
+
" # If token boundaries do not occur at unicode character boundaries, it's unclear how best to\n",
|
264 |
+
" # visualise the token. Here, we'll just use the unicode replacement character to represent some\n",
|
265 |
+
" # fraction of a character.\n",
|
266 |
+
" unicode_token_values = [x.decode(\"utf-8\", errors=\"replace\") for x in token_values]\n",
|
267 |
+
"\n",
|
268 |
+
" running_length = 0\n",
|
269 |
+
" last_color = None\n",
|
270 |
+
" for token in unicode_token_values:\n",
|
271 |
+
" color = background[running_length % len(background)]\n",
|
272 |
+
" if color == last_color:\n",
|
273 |
+
" color = background[(running_length + 1) % len(background)]\n",
|
274 |
+
" assert color != last_color\n",
|
275 |
+
" last_color = color\n",
|
276 |
+
" running_length += len(token)\n",
|
277 |
+
" print(color + token, end=\"\")\n",
|
278 |
+
" print(\"\\u001b[0m\", end=end)"
|
279 |
+
]
|
280 |
+
},
|
281 |
+
{
|
282 |
+
"cell_type": "markdown",
|
283 |
+
"metadata": {},
|
284 |
+
"source": [
|
285 |
+
"### Training BPE"
|
286 |
+
]
|
287 |
+
},
|
288 |
+
{
|
289 |
+
"cell_type": "code",
|
290 |
+
"execution_count": 5,
|
291 |
+
"metadata": {},
|
292 |
+
"outputs": [
|
293 |
+
{
|
294 |
+
"name": "stdout",
|
295 |
+
"output_type": "stream",
|
296 |
+
"text": [
|
297 |
+
"Corpus length in characters: 4,182,226\n",
|
298 |
+
"Corpus length in bytes: 6,164,138\n"
|
299 |
+
]
|
300 |
+
}
|
301 |
+
],
|
302 |
+
"source": [
|
303 |
+
"# Read the corpus file\n",
|
304 |
+
"with open(\"tok_corpus_small.txt\", \"r\", encoding=\"utf-8\") as file:\n",
|
305 |
+
" corpus = file.read()\n",
|
306 |
+
"\n",
|
307 |
+
"# Get character and byte lengths\n",
|
308 |
+
"char_length = len(corpus)\n",
|
309 |
+
"byte_length = len(corpus.encode('utf-8'))\n",
|
310 |
+
"\n",
|
311 |
+
"print(f\"Corpus length in characters: {char_length:,}\")\n",
|
312 |
+
"print(f\"Corpus length in bytes: {byte_length:,}\")\n"
|
313 |
+
]
|
314 |
+
},
|
315 |
+
{
|
316 |
+
"cell_type": "code",
|
317 |
+
"execution_count": 6,
|
318 |
+
"metadata": {},
|
319 |
+
"outputs": [
|
320 |
+
{
|
321 |
+
"name": "stderr",
|
322 |
+
"output_type": "stream",
|
323 |
+
"text": [
|
324 |
+
"Training BPE: 100%|██████████| 3000/3000 [56:08<00:00, 1.12s/merge] "
|
325 |
+
]
|
326 |
+
},
|
327 |
+
{
|
328 |
+
"name": "stdout",
|
329 |
+
"output_type": "stream",
|
330 |
+
"text": [
|
331 |
+
"tokens length: 6142083\n",
|
332 |
+
"ids length: 1507403\n",
|
333 |
+
"compression ratio: 4.07X\n"
|
334 |
+
]
|
335 |
+
},
|
336 |
+
{
|
337 |
+
"name": "stderr",
|
338 |
+
"output_type": "stream",
|
339 |
+
"text": [
|
340 |
+
"\n"
|
341 |
+
]
|
342 |
+
}
|
343 |
+
],
|
344 |
+
"source": [
|
345 |
+
"from tqdm import tqdm\n",
|
346 |
+
"\n",
|
347 |
+
"def get_stats(ids, freq):\n",
|
348 |
+
" for pair in zip(ids[:-1], ids[1:]):\n",
|
349 |
+
" freq[pair] = freq.get(pair, 0) + 1\n",
|
350 |
+
"\n",
|
351 |
+
"def merge(ids, pair, idx):\n",
|
352 |
+
" newids = []\n",
|
353 |
+
" i = 0\n",
|
354 |
+
" while i < len(ids):\n",
|
355 |
+
" if i < len(ids) - 1 and ids[i] == pair[0] and ids[i+1] == pair[1]:\n",
|
356 |
+
" newids.append(idx)\n",
|
357 |
+
" i += 2\n",
|
358 |
+
" else:\n",
|
359 |
+
" newids.append(ids[i])\n",
|
360 |
+
" i += 1\n",
|
361 |
+
" return newids\n",
|
362 |
+
"\n",
|
363 |
+
"# ---\n",
|
364 |
+
"# tokens = corpus.encode('utf-8')\n",
|
365 |
+
"vocab_size = 3256 # the desired final vocabulary size\n",
|
366 |
+
"num_merges = vocab_size - 256\n",
|
367 |
+
"# ids = list(tokens) # copy so we don't destroy the original list\n",
|
368 |
+
"\n",
|
369 |
+
"text_chunks = re.findall(regex, corpus)\n",
|
370 |
+
"tokens = [list(ch.encode(\"utf-8\")) for ch in text_chunks]\n",
|
371 |
+
"ids = [list(ch.encode(\"utf-8\")) for ch in text_chunks]\n",
|
372 |
+
"\n",
|
373 |
+
"merges = {} # (int, int) -> int\n",
|
374 |
+
"vocab = {idx: bytes([idx]) for idx in range(256)}\n",
|
375 |
+
"\n",
|
376 |
+
"for i in tqdm(range(num_merges), desc=\"Training BPE\", unit=\"merge\"):\n",
|
377 |
+
" stats = {}\n",
|
378 |
+
" for chunk_ids in ids:\n",
|
379 |
+
" get_stats(chunk_ids, stats)\n",
|
380 |
+
" pair = max(stats, key=stats.get)\n",
|
381 |
+
" idx = 256 + i\n",
|
382 |
+
" ids = [merge(chunk_ids, pair, idx) for chunk_ids in ids]\n",
|
383 |
+
" merges[pair] = idx\n",
|
384 |
+
" vocab[idx] = vocab[pair[0]] + vocab[pair[1]]\n",
|
385 |
+
"\n",
|
386 |
+
"tok_len = 0\n",
|
387 |
+
"for chunk_toks in tokens:\n",
|
388 |
+
" tok_len += len(chunk_toks)\n",
|
389 |
+
"print(\"tokens length:\", tok_len)\n",
|
390 |
+
"\n",
|
391 |
+
"ids_len = 0\n",
|
392 |
+
"for chunk_ids in ids:\n",
|
393 |
+
" ids_len += len(chunk_ids)\n",
|
394 |
+
"print(\"ids length:\", ids_len)\n",
|
395 |
+
"\n",
|
396 |
+
"print(f\"compression ratio: {tok_len / ids_len:.2f}X\")"
|
397 |
+
]
|
398 |
+
},
|
399 |
+
{
|
400 |
+
"cell_type": "code",
|
401 |
+
"execution_count": 8,
|
402 |
+
"metadata": {},
|
403 |
+
"outputs": [],
|
404 |
+
"source": [
|
405 |
+
"def _build_vocab():\n",
|
406 |
+
" vocab = {idx: bytes([idx]) for idx in range(256)}\n",
|
407 |
+
" for (p0, p1), idx in merges.items():\n",
|
408 |
+
" vocab[idx] = vocab[p0] + vocab[p1]\n",
|
409 |
+
" return vocab\n",
|
410 |
+
"\n",
|
411 |
+
"special_tokens = {'<|endoftext|>': len(vocab)}\n",
|
412 |
+
"\n",
|
413 |
+
"def save(file_prefix):\n",
|
414 |
+
" \"\"\"\n",
|
415 |
+
" Saves two files: file_prefix.vocab and file_prefix.model\n",
|
416 |
+
" This is inspired (but not equivalent to!) sentencepiece's model saving:\n",
|
417 |
+
" - model file is the critical one, intended for load()\n",
|
418 |
+
" - vocab file is just a pretty printed version for human inspection only\n",
|
419 |
+
" \"\"\"\n",
|
420 |
+
" # write the model: to be used in load() later\n",
|
421 |
+
" model_file = file_prefix + \".model\"\n",
|
422 |
+
" with open(model_file, 'w') as f:\n",
|
423 |
+
" # write the version, pattern and merges, that's all that's needed\n",
|
424 |
+
" f.write(\"minbpe v1\\n\")\n",
|
425 |
+
" f.write(f\"{pattern}\\n\")\n",
|
426 |
+
" # write the special tokens, first the number of them, then each one\n",
|
427 |
+
" f.write(f\"{len(special_tokens)}\\n\")\n",
|
428 |
+
" for special, idx in special_tokens.items():\n",
|
429 |
+
" f.write(f\"{special} {idx}\\n\")\n",
|
430 |
+
" # the merges dict\n",
|
431 |
+
" for idx1, idx2 in merges:\n",
|
432 |
+
" f.write(f\"{idx1} {idx2}\\n\")\n",
|
433 |
+
" # write the vocab: for the human to look at\n",
|
434 |
+
" vocab_file = file_prefix + \".vocab\"\n",
|
435 |
+
" inverted_merges = {idx: pair for pair, idx in merges.items()}\n",
|
436 |
+
" with open(vocab_file, \"w\", encoding=\"utf-8\") as f:\n",
|
437 |
+
" for idx, token in vocab.items():\n",
|
438 |
+
" # note: many tokens may be partial utf-8 sequences\n",
|
439 |
+
" # and cannot be decoded into valid strings. Here we're using\n",
|
440 |
+
" # errors='replace' to replace them with the replacement char.\n",
|
441 |
+
" # this also means that we couldn't possibly use .vocab in load()\n",
|
442 |
+
" # because decoding in this way is a lossy operation!\n",
|
443 |
+
" s = render_token(token)\n",
|
444 |
+
" # find the children of this token, if any\n",
|
445 |
+
" if idx in inverted_merges:\n",
|
446 |
+
" # if this token has children, render it nicely as a merge\n",
|
447 |
+
" idx0, idx1 = inverted_merges[idx]\n",
|
448 |
+
" s0 = render_token(vocab[idx0])\n",
|
449 |
+
" s1 = render_token(vocab[idx1])\n",
|
450 |
+
" f.write(f\"[{s0}][{s1}] -> [{s}] {idx}\\n\")\n",
|
451 |
+
" else:\n",
|
452 |
+
" # otherwise this is leaf token, just print it\n",
|
453 |
+
" # (this should just be the first 256 tokens, the bytes)\n",
|
454 |
+
" f.write(f\"[{s}] {idx}\\n\")\n",
|
455 |
+
"\n",
|
456 |
+
"save(\"bpe_tok\")\n",
|
457 |
+
"\n",
|
458 |
+
"def load(model_file):\n",
|
459 |
+
" \"\"\"Inverse of save() but only for the model file\"\"\"\n",
|
460 |
+
" assert model_file.endswith(\".model\")\n",
|
461 |
+
" # read the model file\n",
|
462 |
+
" merges = {}\n",
|
463 |
+
" special_tokens = {}\n",
|
464 |
+
" idx = 256\n",
|
465 |
+
" with open(model_file, 'r', encoding=\"utf-8\") as f:\n",
|
466 |
+
" # read the version\n",
|
467 |
+
" version = f.readline().strip()\n",
|
468 |
+
" assert version == \"minbpe v1\"\n",
|
469 |
+
" # read the pattern\n",
|
470 |
+
" pattern = f.readline().strip()\n",
|
471 |
+
" # read the special tokens\n",
|
472 |
+
" num_special = int(f.readline().strip())\n",
|
473 |
+
" for _ in range(num_special):\n",
|
474 |
+
" special, special_idx = f.readline().strip().split()\n",
|
475 |
+
" special_tokens[special] = int(special_idx)\n",
|
476 |
+
" # read the merges\n",
|
477 |
+
" for line in f:\n",
|
478 |
+
" idx1, idx2 = map(int, line.split())\n",
|
479 |
+
" merges[(idx1, idx2)] = idx\n",
|
480 |
+
" idx += 1\n",
|
481 |
+
" vocab = _build_vocab()\n",
|
482 |
+
" return merges, special_tokens, vocab\n",
|
483 |
+
"\n",
|
484 |
+
"merges, special_tokens, vocab = load(\"bpe_tok.model\")"
|
485 |
+
]
|
486 |
+
},
|
487 |
+
{
|
488 |
+
"cell_type": "code",
|
489 |
+
"execution_count": 9,
|
490 |
+
"metadata": {},
|
491 |
+
"outputs": [
|
492 |
+
{
|
493 |
+
"name": "stdout",
|
494 |
+
"output_type": "stream",
|
495 |
+
"text": [
|
496 |
+
"�\n"
|
497 |
+
]
|
498 |
+
}
|
499 |
+
],
|
500 |
+
"source": [
|
501 |
+
"def decode(ids) -> str:\n",
|
502 |
+
" part_bytes = []\n",
|
503 |
+
" for id in ids:\n",
|
504 |
+
" if id in vocab:\n",
|
505 |
+
" part_bytes.append(vocab[id]) # id can be > 256 after merging\n",
|
506 |
+
" elif id in special_tokens:\n",
|
507 |
+
" part_bytes.append(special_tokens[id])\n",
|
508 |
+
" else:\n",
|
509 |
+
" raise ValueError(f\"id={id} not in vocab or special_tokens\")\n",
|
510 |
+
" text_bytes = b\"\".join(part_bytes)\n",
|
511 |
+
" text = text_bytes.decode(encoding=\"utf-8\", errors=\"replace\")\n",
|
512 |
+
" return text\n",
|
513 |
+
"\n",
|
514 |
+
"print(decode([128]))"
|
515 |
+
]
|
516 |
+
},
|
517 |
+
{
|
518 |
+
"cell_type": "code",
|
519 |
+
"execution_count": 10,
|
520 |
+
"metadata": {},
|
521 |
+
"outputs": [
|
522 |
+
{
|
523 |
+
"data": {
|
524 |
+
"text/plain": [
|
525 |
+
"{(224, 178): 256,\n",
|
526 |
+
" (224, 164): 257,\n",
|
527 |
+
" (224, 179): 258,\n",
|
528 |
+
" (32, 257): 259,\n",
|
529 |
+
" (224, 165): 260,\n",
|
530 |
+
" (32, 256): 261,\n",
|
531 |
+
" (32, 116): 262,\n",
|
532 |
+
" (258, 141): 263,\n",
|
533 |
+
" (263, 256): 264,\n",
|
534 |
+
" (32, 97): 265,\n",
|
535 |
+
" (104, 101): 266,\n",
|
536 |
+
" (105, 110): 267,\n",
|
537 |
+
" (256, 191): 268,\n",
|
538 |
+
" (257, 190): 269,\n",
|
539 |
+
" (114, 101): 270,\n",
|
540 |
+
" (268, 256): 271,\n",
|
541 |
+
" (262, 266): 272,\n",
|
542 |
+
" (32, 111): 273,\n",
|
543 |
+
" (258, 129): 274,\n",
|
544 |
+
" (260, 135): 275,\n",
|
545 |
+
" (101, 114): 276,\n",
|
546 |
+
" (257, 176): 277,\n",
|
547 |
+
" (32, 115): 278,\n",
|
548 |
+
" (256, 190): 279,\n",
|
549 |
+
" (260, 141): 280,\n",
|
550 |
+
" (279, 256): 281,\n",
|
551 |
+
" (97, 116): 282,\n",
|
552 |
+
" (111, 110): 283,\n",
|
553 |
+
" (32, 119): 284,\n",
|
554 |
+
" (110, 100): 285,\n",
|
555 |
+
" (32, 99): 286,\n",
|
556 |
+
" (257, 191): 287,\n",
|
557 |
+
" (101, 110): 288,\n",
|
558 |
+
" (101, 115): 289,\n",
|
559 |
+
" (105, 115): 290,\n",
|
560 |
+
" (280, 257): 291,\n",
|
561 |
+
" (287, 257): 292,\n",
|
562 |
+
" (259, 149): 293,\n",
|
563 |
+
" (258, 134): 294,\n",
|
564 |
+
" (105, 116): 295,\n",
|
565 |
+
" (256, 176): 296,\n",
|
566 |
+
" (111, 114): 297,\n",
|
567 |
+
" (269, 257): 298,\n",
|
568 |
+
" (32, 98): 299,\n",
|
569 |
+
" (32, 102): 300,\n",
|
570 |
+
" (32, 112): 301,\n",
|
571 |
+
" (97, 110): 302,\n",
|
572 |
+
" (101, 100): 303,\n",
|
573 |
+
" (273, 102): 304,\n",
|
574 |
+
" (267, 103): 305,\n",
|
575 |
+
" (97, 108): 306,\n",
|
576 |
+
" (111, 117): 307,\n",
|
577 |
+
" (97, 114): 308,\n",
|
578 |
+
" (257, 130): 309,\n",
|
579 |
+
" (32, 267): 310,\n",
|
580 |
+
" (32, 109): 311,\n",
|
581 |
+
" (265, 285): 312,\n",
|
582 |
+
" (256, 166): 313,\n",
|
583 |
+
" (262, 111): 314,\n",
|
584 |
+
" (260, 128): 315,\n",
|
585 |
+
" (32, 100): 316,\n",
|
586 |
+
" (256, 168): 317,\n",
|
587 |
+
" (105, 99): 318,\n",
|
588 |
+
" (256, 178): 319,\n",
|
589 |
+
" (260, 139): 320,\n",
|
590 |
+
" (256, 181): 321,\n",
|
591 |
+
" (32, 104): 322,\n",
|
592 |
+
" (108, 101): 323,\n",
|
593 |
+
" (97, 115): 324,\n",
|
594 |
+
" (257, 168): 325,\n",
|
595 |
+
" (105, 283): 326,\n",
|
596 |
+
" (256, 164): 327,\n",
|
597 |
+
" (114, 111): 328,\n",
|
598 |
+
" (262, 104): 329,\n",
|
599 |
+
" (256, 130): 330,\n",
|
600 |
+
" (259, 184): 331,\n",
|
601 |
+
" (257, 149): 332,\n",
|
602 |
+
" (288, 116): 333,\n",
|
603 |
+
" (32, 101): 334,\n",
|
604 |
+
" (256, 151): 335,\n",
|
605 |
+
" (32, 108): 336,\n",
|
606 |
+
" (226, 128): 337,\n",
|
607 |
+
" (105, 108): 338,\n",
|
608 |
+
" (32, 84): 339,\n",
|
609 |
+
" (32, 270): 340,\n",
|
610 |
+
" (261, 174): 341,\n",
|
611 |
+
" (111, 109): 342,\n",
|
612 |
+
" (32, 110): 343,\n",
|
613 |
+
" (99, 116): 344,\n",
|
614 |
+
" (259, 174): 345,\n",
|
615 |
+
" (256, 179): 346,\n",
|
616 |
+
" (118, 101): 347,\n",
|
617 |
+
" (115, 116): 348,\n",
|
618 |
+
" (261, 184): 349,\n",
|
619 |
+
" (257, 164): 350,\n",
|
620 |
+
" (264, 168): 351,\n",
|
621 |
+
" (264, 164): 352,\n",
|
622 |
+
" (299, 101): 353,\n",
|
623 |
+
" (257, 178): 354,\n",
|
624 |
+
" (108, 121): 355,\n",
|
625 |
+
" (32, 290): 356,\n",
|
626 |
+
" (32, 103): 357,\n",
|
627 |
+
" (256, 149): 358,\n",
|
628 |
+
" (259, 170): 359,\n",
|
629 |
+
" (264, 176): 360,\n",
|
630 |
+
" (293, 275): 361,\n",
|
631 |
+
" (32, 73): 362,\n",
|
632 |
+
" (280, 277): 363,\n",
|
633 |
+
" (32, 49): 364,\n",
|
634 |
+
" (115, 101): 365,\n",
|
635 |
+
" (111, 116): 366,\n",
|
636 |
+
" (275, 309): 367,\n",
|
637 |
+
" (264, 178): 368,\n",
|
638 |
+
" (257, 184): 369,\n",
|
639 |
+
" (105, 109): 370,\n",
|
640 |
+
" (111, 119): 371,\n",
|
641 |
+
" (105, 100): 372,\n",
|
642 |
+
" (117, 116): 373,\n",
|
643 |
+
" (259, 185): 374,\n",
|
644 |
+
" (111, 108): 375,\n",
|
645 |
+
" (32, 65): 376,\n",
|
646 |
+
" (264, 175): 377,\n",
|
647 |
+
" (99, 101): 378,\n",
|
648 |
+
" (260, 136): 379,\n",
|
649 |
+
" (261, 133): 380,\n",
|
650 |
+
" (256, 161): 381,\n",
|
651 |
+
" (329, 282): 382,\n",
|
652 |
+
" (317, 351): 383,\n",
|
653 |
+
" (258, 135): 384,\n",
|
654 |
+
" (32, 83): 385,\n",
|
655 |
+
" (327, 352): 386,\n",
|
656 |
+
" (101, 116): 387,\n",
|
657 |
+
" (261, 170): 388,\n",
|
658 |
+
" (260, 164): 389,\n",
|
659 |
+
" (32, 117): 390,\n",
|
660 |
+
" (294, 256): 391,\n",
|
661 |
+
" (257, 174): 392,\n",
|
662 |
+
" (273, 110): 393,\n",
|
663 |
+
" (300, 297): 394,\n",
|
664 |
+
" (32, 67): 395,\n",
|
665 |
+
" (99, 104): 396,\n",
|
666 |
+
" (114, 97): 397,\n",
|
667 |
+
" (105, 103): 398,\n",
|
668 |
+
" (278, 116): 399,\n",
|
669 |
+
" (260, 129): 400,\n",
|
670 |
+
" (261, 149): 401,\n",
|
671 |
+
" (282, 326): 402,\n",
|
672 |
+
" (117, 114): 403,\n",
|
673 |
+
" (345, 367): 404,\n",
|
674 |
+
" (105, 114): 405,\n",
|
675 |
+
" (271, 184): 406,\n",
|
676 |
+
" (32, 121): 407,\n",
|
677 |
+
" (383, 274): 408,\n",
|
678 |
+
" (32, 266): 409,\n",
|
679 |
+
" (118, 276): 410,\n",
|
680 |
+
" (339, 266): 411,\n",
|
681 |
+
" (97, 100): 412,\n",
|
682 |
+
" (32, 40): 413,\n",
|
683 |
+
" (277, 291): 414,\n",
|
684 |
+
" (97, 121): 415,\n",
|
685 |
+
" (259, 156): 416,\n",
|
686 |
+
" (281, 176): 417,\n",
|
687 |
+
" (319, 368): 418,\n",
|
688 |
+
" (101, 108): 419,\n",
|
689 |
+
" (281, 151): 420,\n",
|
690 |
+
" (271, 175): 421,\n",
|
691 |
+
" (265, 115): 422,\n",
|
692 |
+
" (258, 138): 423,\n",
|
693 |
+
" (261, 168): 424,\n",
|
694 |
+
" (271, 166): 425,\n",
|
695 |
+
" (97, 109): 426,\n",
|
696 |
+
" (295, 104): 427,\n",
|
697 |
+
" (258, 139): 428,\n",
|
698 |
+
" (256, 184): 429,\n",
|
699 |
+
" (32, 50): 430,\n",
|
700 |
+
" (175, 269): 431,\n",
|
701 |
+
" (257, 170): 432,\n",
|
702 |
+
" (265, 270): 433,\n",
|
703 |
+
" (117, 108): 434,\n",
|
704 |
+
" (260, 130): 435,\n",
|
705 |
+
" (265, 110): 436,\n",
|
706 |
+
" (284, 427): 437,\n",
|
707 |
+
" (32, 295): 438,\n",
|
708 |
+
" (259, 181): 439,\n",
|
709 |
+
" (284, 104): 440,\n",
|
710 |
+
" (257, 185): 441,\n",
|
711 |
+
" (32, 77): 442,\n",
|
712 |
+
" (259, 172): 443,\n",
|
713 |
+
" (407, 307): 444,\n",
|
714 |
+
" (259, 143): 445,\n",
|
715 |
+
" (335, 346): 446,\n",
|
716 |
+
" (261, 181): 447,\n",
|
717 |
+
" (105, 102): 448,\n",
|
718 |
+
" (116, 276): 449,\n",
|
719 |
+
" (259, 168): 450,\n",
|
720 |
+
" (301, 328): 451,\n",
|
721 |
+
" (261, 185): 452,\n",
|
722 |
+
" (286, 283): 453,\n",
|
723 |
+
" (32, 80): 454,\n",
|
724 |
+
" (309, 257): 455,\n",
|
725 |
+
" (265, 108): 456,\n",
|
726 |
+
" (269, 277): 457,\n",
|
727 |
+
" (259, 133): 458,\n",
|
728 |
+
" (273, 114): 459,\n",
|
729 |
+
" (418, 268): 460,\n",
|
730 |
+
" (256, 159): 461,\n",
|
731 |
+
" (261, 172): 462,\n",
|
732 |
+
" (296, 274): 463,\n",
|
733 |
+
" (258, 128): 464,\n",
|
734 |
+
" (266, 114): 465,\n",
|
735 |
+
" (374, 379): 466,\n",
|
736 |
+
" (320, 309): 467,\n",
|
737 |
+
" (111, 100): 468,\n",
|
738 |
+
" (276, 115): 469,\n",
|
739 |
+
" (32, 87): 470,\n",
|
740 |
+
" (32, 66): 471,\n",
|
741 |
+
" (386, 274): 472,\n",
|
742 |
+
" (32, 72): 473,\n",
|
743 |
+
" (391, 175): 474,\n",
|
744 |
+
" (259, 166): 475,\n",
|
745 |
+
" (259, 148): 476,\n",
|
746 |
+
" (101, 109): 477,\n",
|
747 |
+
" (259, 178): 478,\n",
|
748 |
+
" (256, 174): 479,\n",
|
749 |
+
" (337, 153): 480,\n",
|
750 |
+
" (284, 101): 481,\n",
|
751 |
+
" (97, 285): 482,\n",
|
752 |
+
" (112, 101): 483,\n",
|
753 |
+
" (341, 472): 484,\n",
|
754 |
+
" (296, 264): 485,\n",
|
755 |
+
" (258, 130): 486,\n",
|
756 |
+
" (476, 277): 487,\n",
|
757 |
+
" (271, 164): 488,\n",
|
758 |
+
" (330, 313): 489,\n",
|
759 |
+
" (97, 103): 490,\n",
|
760 |
+
" (259, 176): 491,\n",
|
761 |
+
" (284, 324): 492,\n",
|
762 |
+
" (259, 151): 493,\n",
|
763 |
+
" (334, 120): 494,\n",
|
764 |
+
" (108, 100): 495,\n",
|
765 |
+
" (271, 149): 496,\n",
|
766 |
+
" (289, 115): 497,\n",
|
767 |
+
" (116, 104): 498,\n",
|
768 |
+
" (261, 134): 499,\n",
|
769 |
+
" (111, 112): 500,\n",
|
770 |
+
" (107, 101): 501,\n",
|
771 |
+
" (338, 108): 502,\n",
|
772 |
+
" (257, 151): 503,\n",
|
773 |
+
" (32, 69): 504,\n",
|
774 |
+
" (114, 105): 505,\n",
|
775 |
+
" (291, 175): 506,\n",
|
776 |
+
" (32, 337): 507,\n",
|
777 |
+
" (278, 117): 508,\n",
|
778 |
+
" (286, 342): 509,\n",
|
779 |
+
" (292, 149): 510,\n",
|
780 |
+
" (275, 257): 511,\n",
|
781 |
+
" (111, 270): 512,\n",
|
782 |
+
" (290, 116): 513,\n",
|
783 |
+
" (32, 118): 514,\n",
|
784 |
+
" (256, 170): 515,\n",
|
785 |
+
" (322, 97): 516,\n",
|
786 |
+
" (289, 116): 517,\n",
|
787 |
+
" (265, 116): 518,\n",
|
788 |
+
" (117, 109): 519,\n",
|
789 |
+
" (298, 168): 520,\n",
|
790 |
+
" (48, 48): 521,\n",
|
791 |
+
" (299, 121): 522,\n",
|
792 |
+
" (32, 34): 523,\n",
|
793 |
+
" (282, 101): 524,\n",
|
794 |
+
" (97, 98): 525,\n",
|
795 |
+
" (264, 159): 526,\n",
|
796 |
+
" (271, 151): 527,\n",
|
797 |
+
" (117, 115): 528,\n",
|
798 |
+
" (316, 101): 529,\n",
|
799 |
+
" (97, 267): 530,\n",
|
800 |
+
" (259, 165): 531,\n",
|
801 |
+
" (117, 270): 532,\n",
|
802 |
+
" (105, 347): 533,\n",
|
803 |
+
" (364, 57): 534,\n",
|
804 |
+
" (117, 110): 535,\n",
|
805 |
+
" (32, 68): 536,\n",
|
806 |
+
" (105, 289): 537,\n",
|
807 |
+
" (398, 104): 538,\n",
|
808 |
+
" (256, 172): 539,\n",
|
809 |
+
" (97, 99): 540,\n",
|
810 |
+
" (328, 109): 541,\n",
|
811 |
+
" (302, 116): 542,\n",
|
812 |
+
" (278, 101): 543,\n",
|
813 |
+
" (32, 76): 544,\n",
|
814 |
+
" (32, 82): 545,\n",
|
815 |
+
" (257, 181): 546,\n",
|
816 |
+
" (110, 116): 547,\n",
|
817 |
+
" (359, 363): 548,\n",
|
818 |
+
" (274, 321): 549,\n",
|
819 |
+
" (259, 137): 550,\n",
|
820 |
+
" (293, 320): 551,\n",
|
821 |
+
" (286, 104): 552,\n",
|
822 |
+
" (295, 121): 553,\n",
|
823 |
+
" (286, 302): 554,\n",
|
824 |
+
" (261, 142): 555,\n",
|
825 |
+
" (111, 115): 556,\n",
|
826 |
+
" (343, 366): 557,\n",
|
827 |
+
" (261, 176): 558,\n",
|
828 |
+
" (388, 360): 559,\n",
|
829 |
+
" (109, 333): 560,\n",
|
830 |
+
" (113, 117): 561,\n",
|
831 |
+
" (32, 70): 562,\n",
|
832 |
+
" (325, 275): 563,\n",
|
833 |
+
" (278, 104): 564,\n",
|
834 |
+
" (32, 78): 565,\n",
|
835 |
+
" (292, 431): 566,\n",
|
836 |
+
" (105, 118): 567,\n",
|
837 |
+
" (297, 116): 568,\n",
|
838 |
+
" (331, 275): 569,\n",
|
839 |
+
" (300, 541): 570,\n",
|
840 |
+
" (343, 101): 571,\n",
|
841 |
+
" (281, 168): 572,\n",
|
842 |
+
" (271, 168): 573,\n",
|
843 |
+
" (256, 175): 574,\n",
|
844 |
+
" (293, 315): 575,\n",
|
845 |
+
" (111, 99): 576,\n",
|
846 |
+
" (292, 164): 577,\n",
|
847 |
+
" (112, 112): 578,\n",
|
848 |
+
" (271, 176): 579,\n",
|
849 |
+
" (400, 257): 580,\n",
|
850 |
+
" (258, 136): 581,\n",
|
851 |
+
" (516, 347): 582,\n",
|
852 |
+
" (445, 332): 583,\n",
|
853 |
+
" (117, 100): 584,\n",
|
854 |
+
" (32, 114): 585,\n",
|
855 |
+
" (257, 175): 586,\n",
|
856 |
+
" (32, 71): 587,\n",
|
857 |
+
" (358, 264): 588,\n",
|
858 |
+
" (257, 172): 589,\n",
|
859 |
+
" (101, 344): 590,\n",
|
860 |
+
" (256, 163): 591,\n",
|
861 |
+
" (307, 495): 592,\n",
|
862 |
+
" (32, 45): 593,\n",
|
863 |
+
" (308, 116): 594,\n",
|
864 |
+
" (270, 115): 595,\n",
|
865 |
+
" (256, 185): 596,\n",
|
866 |
+
" (271, 130): 597,\n",
|
867 |
+
" (419, 108): 598,\n",
|
868 |
+
" (32, 323): 599,\n",
|
869 |
+
" (538, 116): 600,\n",
|
870 |
+
" (317, 263): 601,\n",
|
871 |
+
" (32, 107): 602,\n",
|
872 |
+
" (261, 135): 603,\n",
|
873 |
+
" (257, 188): 604,\n",
|
874 |
+
" (292, 175): 605,\n",
|
875 |
+
" (329, 290): 606,\n",
|
876 |
+
" (318, 306): 607,\n",
|
877 |
+
" (105, 306): 608,\n",
|
878 |
+
" (337, 157): 609,\n",
|
879 |
+
" (264, 166): 610,\n",
|
880 |
+
" (257, 159): 611,\n",
|
881 |
+
" (507, 156): 612,\n",
|
882 |
+
" (493, 257): 613,\n",
|
883 |
+
" (103, 101): 614,\n",
|
884 |
+
" (281, 166): 615,\n",
|
885 |
+
" (103, 104): 616,\n",
|
886 |
+
" (380, 321): 617,\n",
|
887 |
+
" (261, 164): 618,\n",
|
888 |
+
" (331, 291): 619,\n",
|
889 |
+
" (293, 269): 620,\n",
|
890 |
+
" (101, 270): 621,\n",
|
891 |
+
" (284, 297): 622,\n",
|
892 |
+
" (342, 101): 623,\n",
|
893 |
+
" (318, 104): 624,\n",
|
894 |
+
" (265, 98): 625,\n",
|
895 |
+
" (257, 161): 626,\n",
|
896 |
+
" (114, 307): 627,\n",
|
897 |
+
" (281, 175): 628,\n",
|
898 |
+
" (32, 79): 629,\n",
|
899 |
+
" (267, 101): 630,\n",
|
900 |
+
" (293, 277): 631,\n",
|
901 |
+
" (256, 182): 632,\n",
|
902 |
+
" (257, 166): 633,\n",
|
903 |
+
" (111, 103): 634,\n",
|
904 |
+
" (97, 112): 635,\n",
|
905 |
+
" (306, 108): 636,\n",
|
906 |
+
" (260, 137): 637,\n",
|
907 |
+
" (308, 100): 638,\n",
|
908 |
+
" (298, 178): 639,\n",
|
909 |
+
" (259, 134): 640,\n",
|
910 |
+
" (310, 116): 641,\n",
|
911 |
+
" (292, 143): 642,\n",
|
912 |
+
" (291, 164): 643,\n",
|
913 |
+
" (307, 116): 644,\n",
|
914 |
+
" (307, 114): 645,\n",
|
915 |
+
" (390, 115): 646,\n",
|
916 |
+
" (301, 108): 647,\n",
|
917 |
+
" (117, 348): 648,\n",
|
918 |
+
" (362, 110): 649,\n",
|
919 |
+
" (272, 405): 650,\n",
|
920 |
+
" (261, 151): 651,\n",
|
921 |
+
" (97, 107): 652,\n",
|
922 |
+
" (292, 178): 653,\n",
|
923 |
+
" (105, 112): 654,\n",
|
924 |
+
" (261, 156): 655,\n",
|
925 |
+
" (420, 268): 656,\n",
|
926 |
+
" (330, 327): 657,\n",
|
927 |
+
" (272, 121): 658,\n",
|
928 |
+
" (111, 348): 659,\n",
|
929 |
+
" (274, 386): 660,\n",
|
930 |
+
" (284, 266): 661,\n",
|
931 |
+
" (116, 465): 662,\n",
|
932 |
+
" (281, 130): 663,\n",
|
933 |
+
" (282, 303): 664,\n",
|
934 |
+
" (257, 156): 665,\n",
|
935 |
+
" (259, 164): 666,\n",
|
936 |
+
" (259, 135): 667,\n",
|
937 |
+
" (32, 370): 668,\n",
|
938 |
+
" (430, 48): 669,\n",
|
939 |
+
" (324, 116): 670,\n",
|
940 |
+
" (264, 151): 671,\n",
|
941 |
+
" (259, 182): 672,\n",
|
942 |
+
" (292, 184): 673,\n",
|
943 |
+
" (32, 85): 674,\n",
|
944 |
+
" (316, 111): 675,\n",
|
945 |
+
" (32, 74): 676,\n",
|
946 |
+
" (488, 274): 677,\n",
|
947 |
+
" (32, 288): 678,\n",
|
948 |
+
" (281, 178): 679,\n",
|
949 |
+
" (339, 104): 680,\n",
|
950 |
+
" (264, 181): 681,\n",
|
951 |
+
" (256, 156): 682,\n",
|
952 |
+
" (321, 408): 683,\n",
|
953 |
+
" (97, 270): 684,\n",
|
954 |
+
" (307, 115): 685,\n",
|
955 |
+
" (531, 269): 686,\n",
|
956 |
+
" (444, 114): 687,\n",
|
957 |
+
" (456, 108): 688,\n",
|
958 |
+
" (112, 116): 689,\n",
|
959 |
+
" (97, 344): 690,\n",
|
960 |
+
" (326, 115): 691,\n",
|
961 |
+
" (291, 181): 692,\n",
|
962 |
+
" (311, 101): 693,\n",
|
963 |
+
" (112, 108): 694,\n",
|
964 |
+
" (102, 102): 695,\n",
|
965 |
+
" (440, 624): 696,\n",
|
966 |
+
" (264, 184): 697,\n",
|
967 |
+
" (274, 313): 698,\n",
|
968 |
+
" (283, 103): 699,\n",
|
969 |
+
" (112, 276): 700,\n",
|
970 |
+
" (302, 115): 701,\n",
|
971 |
+
" (101, 308): 702,\n",
|
972 |
+
" (298, 174): 703,\n",
|
973 |
+
" (322, 290): 704,\n",
|
974 |
+
" (284, 502): 705,\n",
|
975 |
+
" (97, 501): 706,\n",
|
976 |
+
" (299, 373): 707,\n",
|
977 |
+
" (359, 277): 708,\n",
|
978 |
+
" (261, 146): 709,\n",
|
979 |
+
" (261, 166): 710,\n",
|
980 |
+
" (271, 178): 711,\n",
|
981 |
+
" (357, 111): 712,\n",
|
982 |
+
" (322, 324): 713,\n",
|
983 |
+
" (490, 101): 714,\n",
|
984 |
+
" (372, 101): 715,\n",
|
985 |
+
" (311, 512): 716,\n",
|
986 |
+
" (99, 108): 717,\n",
|
987 |
+
" (298, 164): 718,\n",
|
988 |
+
" (527, 294): 719,\n",
|
989 |
+
" (330, 381): 720,\n",
|
990 |
+
" (350, 269): 721,\n",
|
991 |
+
" (478, 642): 722,\n",
|
992 |
+
" (100, 117): 723,\n",
|
993 |
+
" (481, 270): 724,\n",
|
994 |
+
" (257, 154): 725,\n",
|
995 |
+
" (267, 100): 726,\n",
|
996 |
+
" (291, 159): 727,\n",
|
997 |
+
" (107, 115): 728,\n",
|
998 |
+
" (311, 302): 729,\n",
|
999 |
+
" (509, 112): 730,\n",
|
1000 |
+
" (296, 263): 731,\n",
|
1001 |
+
" (292, 168): 732,\n",
|
1002 |
+
" (105, 98): 733,\n",
|
1003 |
+
" (340, 115): 734,\n",
|
1004 |
+
" (97, 118): 735,\n",
|
1005 |
+
" (293, 566): 736,\n",
|
1006 |
+
" (114, 117): 737,\n",
|
1007 |
+
" (406, 319): 738,\n",
|
1008 |
+
" (425, 610): 739,\n",
|
1009 |
+
" (453, 116): 740,\n",
|
1010 |
+
" (525, 323): 741,\n",
|
1011 |
+
" (273, 373): 742,\n",
|
1012 |
+
" (333, 115): 743,\n",
|
1013 |
+
" (32, 106): 744,\n",
|
1014 |
+
" (264, 174): 745,\n",
|
1015 |
+
" (308, 121): 746,\n",
|
1016 |
+
" (114, 121): 747,\n",
|
1017 |
+
" (257, 163): 748,\n",
|
1018 |
+
" (105, 122): 749,\n",
|
1019 |
+
" (321, 274): 750,\n",
|
1020 |
+
" (322, 412): 751,\n",
|
1021 |
+
" (613, 431): 752,\n",
|
1022 |
+
" (286, 108): 753,\n",
|
1023 |
+
" (291, 183): 754,\n",
|
1024 |
+
" (278, 111): 755,\n",
|
1025 |
+
" (265, 100): 756,\n",
|
1026 |
+
" (409, 114): 757,\n",
|
1027 |
+
" (105, 101): 758,\n",
|
1028 |
+
" (259, 175): 759,\n",
|
1029 |
+
" (297, 109): 760,\n",
|
1030 |
+
" (115, 111): 761,\n",
|
1031 |
+
" (99, 99): 762,\n",
|
1032 |
+
" (347, 108): 763,\n",
|
1033 |
+
" (597, 313): 764,\n",
|
1034 |
+
" (105, 97): 765,\n",
|
1035 |
+
" (324, 115): 766,\n",
|
1036 |
+
" (97, 396): 767,\n",
|
1037 |
+
" (450, 275): 768,\n",
|
1038 |
+
" (393, 101): 769,\n",
|
1039 |
+
" (489, 274): 770,\n",
|
1040 |
+
" (338, 100): 771,\n",
|
1041 |
+
" (259, 150): 772,\n",
|
1042 |
+
" (362, 116): 773,\n",
|
1043 |
+
" (32, 51): 774,\n",
|
1044 |
+
" (259, 173): 775,\n",
|
1045 |
+
" (259, 154): 776,\n",
|
1046 |
+
" (434, 116): 777,\n",
|
1047 |
+
" (261, 137): 778,\n",
|
1048 |
+
" (105, 270): 779,\n",
|
1049 |
+
" (256, 167): 780,\n",
|
1050 |
+
" (281, 161): 781,\n",
|
1051 |
+
" (281, 184): 782,\n",
|
1052 |
+
" (111, 111): 783,\n",
|
1053 |
+
" (282, 276): 784,\n",
|
1054 |
+
" (261, 175): 785,\n",
|
1055 |
+
" (273, 662): 786,\n",
|
1056 |
+
" (440, 111): 787,\n",
|
1057 |
+
" (302, 378): 788,\n",
|
1058 |
+
" (259, 171): 789,\n",
|
1059 |
+
" (262, 101): 790,\n",
|
1060 |
+
" (283, 101): 791,\n",
|
1061 |
+
" (288, 378): 792,\n",
|
1062 |
+
" (275, 354): 793,\n",
|
1063 |
+
" (108, 371): 794,\n",
|
1064 |
+
" (430, 521): 795,\n",
|
1065 |
+
" (316, 290): 796,\n",
|
1066 |
+
" (402, 115): 797,\n",
|
1067 |
+
" (425, 294): 798,\n",
|
1068 |
+
" (97, 378): 799,\n",
|
1069 |
+
" (257, 182): 800,\n",
|
1070 |
+
" (98, 276): 801,\n",
|
1071 |
+
" (298, 184): 802,\n",
|
1072 |
+
" (426, 101): 803,\n",
|
1073 |
+
" (313, 460): 804,\n",
|
1074 |
+
" (281, 181): 805,\n",
|
1075 |
+
" (112, 568): 806,\n",
|
1076 |
+
" (117, 306): 807,\n",
|
1077 |
+
" (364, 56): 808,\n",
|
1078 |
+
" (669, 49): 809,\n",
|
1079 |
+
" (270, 282): 810,\n",
|
1080 |
+
" (290, 104): 811,\n",
|
1081 |
+
" (110, 371): 812,\n",
|
1082 |
+
" (272, 109): 813,\n",
|
1083 |
+
" (262, 370): 814,\n",
|
1084 |
+
" (264, 170): 815,\n",
|
1085 |
+
" (456, 761): 816,\n",
|
1086 |
+
" (256, 173): 817,\n",
|
1087 |
+
" (261, 182): 818,\n",
|
1088 |
+
" (278, 97): 819,\n",
|
1089 |
+
" (302, 103): 820,\n",
|
1090 |
+
" (291, 184): 821,\n",
|
1091 |
+
" (410, 121): 822,\n",
|
1092 |
+
" (588, 149): 823,\n",
|
1093 |
+
" (550, 325): 824,\n",
|
1094 |
+
" (390, 110): 825,\n",
|
1095 |
+
" (349, 330): 826,\n",
|
1096 |
+
" (264, 149): 827,\n",
|
1097 |
+
" (270, 101): 828,\n",
|
1098 |
+
" (344, 326): 829,\n",
|
1099 |
+
" (264, 165): 830,\n",
|
1100 |
+
" (446, 274): 831,\n",
|
1101 |
+
" (318, 101): 832,\n",
|
1102 |
+
" (261, 154): 833,\n",
|
1103 |
+
" (390, 112): 834,\n",
|
1104 |
+
" (385, 116): 835,\n",
|
1105 |
+
" (301, 276): 836,\n",
|
1106 |
+
" (301, 270): 837,\n",
|
1107 |
+
" (406, 425): 838,\n",
|
1108 |
+
" (398, 110): 839,\n",
|
1109 |
+
" (257, 171): 840,\n",
|
1110 |
+
" (331, 298): 841,\n",
|
1111 |
+
" (298, 166): 842,\n",
|
1112 |
+
" (423, 489): 843,\n",
|
1113 |
+
" (334, 118): 844,\n",
|
1114 |
+
" (625, 644): 845,\n",
|
1115 |
+
" (256, 154): 846,\n",
|
1116 |
+
" (272, 270): 847,\n",
|
1117 |
+
" (281, 174): 848,\n",
|
1118 |
+
" (278, 121): 849,\n",
|
1119 |
+
" (301, 594): 850,\n",
|
1120 |
+
" (540, 107): 851,\n",
|
1121 |
+
" (265, 114): 852,\n",
|
1122 |
+
" (446, 408): 853,\n",
|
1123 |
+
" (99, 289): 854,\n",
|
1124 |
+
" (357, 288): 855,\n",
|
1125 |
+
" (267, 116): 856,\n",
|
1126 |
+
" (296, 294): 857,\n",
|
1127 |
+
" (257, 167): 858,\n",
|
1128 |
+
" (105, 501): 859,\n",
|
1129 |
+
" (439, 292): 860,\n",
|
1130 |
+
" (307, 285): 861,\n",
|
1131 |
+
" (259, 161): 862,\n",
|
1132 |
+
" (680, 290): 863,\n",
|
1133 |
+
" (509, 109): 864,\n",
|
1134 |
+
" (595, 115): 865,\n",
|
1135 |
+
" (295, 101): 866,\n",
|
1136 |
+
" (111, 365): 867,\n",
|
1137 |
+
" (661, 110): 868,\n",
|
1138 |
+
" (105, 323): 869,\n",
|
1139 |
+
" (97, 117): 870,\n",
|
1140 |
+
" (307, 547): 871,\n",
|
1141 |
+
" (260, 140): 872,\n",
|
1142 |
+
" (325, 269): 873,\n",
|
1143 |
+
" (641, 111): 874,\n",
|
1144 |
+
" (278, 99): 875,\n",
|
1145 |
+
" (466, 309): 876,\n",
|
1146 |
+
" (259, 159): 877,\n",
|
1147 |
+
" (307, 616): 878,\n",
|
1148 |
+
" (331, 455): 879,\n",
|
1149 |
+
" (278, 483): 880,\n",
|
1150 |
+
" (278, 266): 881,\n",
|
1151 |
+
" (111, 468): 882,\n",
|
1152 |
+
" (375, 100): 883,\n",
|
1153 |
+
" (435, 432): 884,\n",
|
1154 |
+
" (331, 392): 885,\n",
|
1155 |
+
" (261, 178): 886,\n",
|
1156 |
+
" (399, 584): 887,\n",
|
1157 |
+
" (415, 115): 888,\n",
|
1158 |
+
" (278, 623): 889,\n",
|
1159 |
+
" (329, 302): 890,\n",
|
1160 |
+
" (97, 338): 891,\n",
|
1161 |
+
" (324, 101): 892,\n",
|
1162 |
+
" (285, 276): 893,\n",
|
1163 |
+
" (301, 101): 894,\n",
|
1164 |
+
" (603, 313): 895,\n",
|
1165 |
+
" (271, 181): 896,\n",
|
1166 |
+
" (527, 346): 897,\n",
|
1167 |
+
" (448, 102): 898,\n",
|
1168 |
+
" (32, 89): 899,\n",
|
1169 |
+
" (291, 178): 900,\n",
|
1170 |
+
" (117, 101): 901,\n",
|
1171 |
+
" (264, 154): 902,\n",
|
1172 |
+
" (261, 161): 903,\n",
|
1173 |
+
" (319, 263): 904,\n",
|
1174 |
+
" (322, 371): 905,\n",
|
1175 |
+
" (278, 112): 906,\n",
|
1176 |
+
" (101, 119): 907,\n",
|
1177 |
+
" (288, 115): 908,\n",
|
1178 |
+
" (117, 98): 909,\n",
|
1179 |
+
" (343, 111): 910,\n",
|
1180 |
+
" (353, 288): 911,\n",
|
1181 |
+
" (262, 119): 912,\n",
|
1182 |
+
" (32, 75): 913,\n",
|
1183 |
+
" (602, 812): 914,\n",
|
1184 |
+
" (295, 326): 915,\n",
|
1185 |
+
" (667, 369): 916,\n",
|
1186 |
+
" (353, 99): 917,\n",
|
1187 |
+
" (41, 46): 918,\n",
|
1188 |
+
" (330, 539): 919,\n",
|
1189 |
+
" (814, 101): 920,\n",
|
1190 |
+
" (407, 702): 921,\n",
|
1191 |
+
" (298, 130): 922,\n",
|
1192 |
+
" (32, 52): 923,\n",
|
1193 |
+
" (107, 305): 924,\n",
|
1194 |
+
" (297, 121): 925,\n",
|
1195 |
+
" (101, 112): 926,\n",
|
1196 |
+
" (32, 448): 927,\n",
|
1197 |
+
" (273, 410): 928,\n",
|
1198 |
+
" (298, 135): 929,\n",
|
1199 |
+
" (297, 100): 930,\n",
|
1200 |
+
" (841, 165): 931,\n",
|
1201 |
+
" (32, 397): 932,\n",
|
1202 |
+
" (335, 294): 933,\n",
|
1203 |
+
" (374, 320): 934,\n",
|
1204 |
+
" (310, 102): 935,\n",
|
1205 |
+
" (297, 115): 936,\n",
|
1206 |
+
" (491, 884): 937,\n",
|
1207 |
+
" (32, 561): 938,\n",
|
1208 |
+
" (341, 781): 939,\n",
|
1209 |
+
" (300, 405): 940,\n",
|
1210 |
+
" (628, 677): 941,\n",
|
1211 |
+
" (281, 149): 942,\n",
|
1212 |
+
" (298, 170): 943,\n",
|
1213 |
+
" (316, 898): 944,\n",
|
1214 |
+
" (348, 477): 945,\n",
|
1215 |
+
" (105, 302): 946,\n",
|
1216 |
+
" (395, 104): 947,\n",
|
1217 |
+
" (500, 323): 948,\n",
|
1218 |
+
" (261, 136): 949,\n",
|
1219 |
+
" (265, 103): 950,\n",
|
1220 |
+
" (350, 363): 951,\n",
|
1221 |
+
" (116, 101): 952,\n",
|
1222 |
+
" (336, 859): 953,\n",
|
1223 |
+
" (119, 110): 954,\n",
|
1224 |
+
" (112, 323): 955,\n",
|
1225 |
+
" (353, 116): 956,\n",
|
1226 |
+
" (284, 592): 957,\n",
|
1227 |
+
" (322, 370): 958,\n",
|
1228 |
+
" (298, 156): 959,\n",
|
1229 |
+
" (283, 100): 960,\n",
|
1230 |
+
" (110, 305): 961,\n",
|
1231 |
+
" (265, 578): 962,\n",
|
1232 |
+
" (311, 415): 963,\n",
|
1233 |
+
" (109, 276): 964,\n",
|
1234 |
+
" (384, 296): 965,\n",
|
1235 |
+
" (108, 108): 966,\n",
|
1236 |
+
" (311, 121): 967,\n",
|
1237 |
+
" (627, 616): 968,\n",
|
1238 |
+
" (32, 328): 969,\n",
|
1239 |
+
" (416, 320): 970,\n",
|
1240 |
+
" (258, 140): 971,\n",
|
1241 |
+
" (318, 107): 972,\n",
|
1242 |
+
" (102, 449): 973,\n",
|
1243 |
+
" (105, 685): 974,\n",
|
1244 |
+
" (436, 121): 975,\n",
|
1245 |
+
" (313, 294): 976,\n",
|
1246 |
+
" (626, 604): 977,\n",
|
1247 |
+
" (440, 282): 978,\n",
|
1248 |
+
" (99, 270): 979,\n",
|
1249 |
+
" (588, 183): 980,\n",
|
1250 |
+
" (261, 159): 981,\n",
|
1251 |
+
" (458, 432): 982,\n",
|
1252 |
+
" (321, 656): 983,\n",
|
1253 |
+
" (438, 115): 984,\n",
|
1254 |
+
" (276, 118): 985,\n",
|
1255 |
+
" (352, 274): 986,\n",
|
1256 |
+
" (400, 277): 987,\n",
|
1257 |
+
" (319, 274): 988,\n",
|
1258 |
+
" (111, 347): 989,\n",
|
1259 |
+
" (310, 100): 990,\n",
|
1260 |
+
" (320, 503): 991,\n",
|
1261 |
+
" (273, 403): 992,\n",
|
1262 |
+
" (277, 315): 993,\n",
|
1263 |
+
" (390, 365): 994,\n",
|
1264 |
+
" (304, 102): 995,\n",
|
1265 |
+
" (41, 44): 996,\n",
|
1266 |
+
" (283, 115): 997,\n",
|
1267 |
+
" (272, 365): 998,\n",
|
1268 |
+
" (340, 99): 999,\n",
|
1269 |
+
" (617, 463): 1000,\n",
|
1270 |
+
" (291, 185): 1001,\n",
|
1271 |
+
" (305, 115): 1002,\n",
|
1272 |
+
" (32, 86): 1003,\n",
|
1273 |
+
" (894, 948): 1004,\n",
|
1274 |
+
" (270, 110): 1005,\n",
|
1275 |
+
" (265, 344): 1006,\n",
|
1276 |
+
" (256, 183): 1007,\n",
|
1277 |
+
" (375, 634): 1008,\n",
|
1278 |
+
" (114, 295): 1009,\n",
|
1279 |
+
" (390, 893): 1010,\n",
|
1280 |
+
" (461, 526): 1011,\n",
|
1281 |
+
" (376, 115): 1012,\n",
|
1282 |
+
" (281, 156): 1013,\n",
|
1283 |
+
" (112, 104): 1014,\n",
|
1284 |
+
" (119, 101): 1015,\n",
|
1285 |
+
" (316, 289): 1016,\n",
|
1286 |
+
" (271, 174): 1017,\n",
|
1287 |
+
" (384, 632): 1018,\n",
|
1288 |
+
" (912, 111): 1019,\n",
|
1289 |
+
" (261, 173): 1020,\n",
|
1290 |
+
" (32, 53): 1021,\n",
|
1291 |
+
" (548, 350): 1022,\n",
|
1292 |
+
" (306, 355): 1023,\n",
|
1293 |
+
" (121, 115): 1024,\n",
|
1294 |
+
" (108, 289): 1025,\n",
|
1295 |
+
" (365, 108): 1026,\n",
|
1296 |
+
" (262, 397): 1027,\n",
|
1297 |
+
" (391, 184): 1028,\n",
|
1298 |
+
" (409, 108): 1029,\n",
|
1299 |
+
" (298, 175): 1030,\n",
|
1300 |
+
" (290, 115): 1031,\n",
|
1301 |
+
" (106, 590): 1032,\n",
|
1302 |
+
" (849, 945): 1033,\n",
|
1303 |
+
" (286, 111): 1034,\n",
|
1304 |
+
" (109, 115): 1035,\n",
|
1305 |
+
" (269, 414): 1036,\n",
|
1306 |
+
" (571, 119): 1037,\n",
|
1307 |
+
" (330, 461): 1038,\n",
|
1308 |
+
" (323, 344): 1039,\n",
|
1309 |
+
" (318, 116): 1040,\n",
|
1310 |
+
" (473, 101): 1041,\n",
|
1311 |
+
" (332, 754): 1042,\n",
|
1312 |
+
" (340, 108): 1043,\n",
|
1313 |
+
" (1029, 112): 1044,\n",
|
1314 |
+
" (295, 537): 1045,\n",
|
1315 |
+
" (870, 365): 1046,\n",
|
1316 |
+
" (291, 149): 1047,\n",
|
1317 |
+
" (308, 107): 1048,\n",
|
1318 |
+
" (282, 289): 1049,\n",
|
1319 |
+
" (393, 355): 1050,\n",
|
1320 |
+
" (423, 720): 1051,\n",
|
1321 |
+
" (294, 319): 1052,\n",
|
1322 |
+
" (410, 115): 1053,\n",
|
1323 |
+
" (311, 659): 1054,\n",
|
1324 |
+
" (257, 173): 1055,\n",
|
1325 |
+
" (257, 136): 1056,\n",
|
1326 |
+
" (290, 101): 1057,\n",
|
1327 |
+
" (717, 584): 1058,\n",
|
1328 |
+
" (552, 771): 1059,\n",
|
1329 |
+
" (315, 586): 1060,\n",
|
1330 |
+
" (940, 348): 1061,\n",
|
1331 |
+
" (453, 115): 1062,\n",
|
1332 |
+
" (455, 161): 1063,\n",
|
1333 |
+
" (262, 114): 1064,\n",
|
1334 |
+
" (470, 104): 1065,\n",
|
1335 |
+
" (657, 296): 1066,\n",
|
1336 |
+
" (564, 592): 1067,\n",
|
1337 |
+
" (434, 308): 1068,\n",
|
1338 |
+
" (424, 579): 1069,\n",
|
1339 |
+
" (429, 263): 1070,\n",
|
1340 |
+
" (447, 271): 1071,\n",
|
1341 |
+
" (783, 107): 1072,\n",
|
1342 |
+
" (102, 434): 1073,\n",
|
1343 |
+
" (660, 976): 1074,\n",
|
1344 |
+
" (321, 420): 1075,\n",
|
1345 |
+
" (306, 498): 1076,\n",
|
1346 |
+
" (729, 121): 1077,\n",
|
1347 |
+
" (823, 294): 1078,\n",
|
1348 |
+
" (458, 325): 1079,\n",
|
1349 |
+
" (298, 181): 1080,\n",
|
1350 |
+
" (264, 172): 1081,\n",
|
1351 |
+
" (299, 108): 1082,\n",
|
1352 |
+
" (256, 150): 1083,\n",
|
1353 |
+
" (124, 124): 1084,\n",
|
1354 |
+
" (301, 111): 1085,\n",
|
1355 |
+
" (296, 591): 1086,\n",
|
1356 |
+
" (258, 131): 1087,\n",
|
1357 |
+
" (265, 109): 1088,\n",
|
1358 |
+
" (105, 387): 1089,\n",
|
1359 |
+
" (281, 164): 1090,\n",
|
1360 |
+
" (449, 110): 1091,\n",
|
1361 |
+
" (266, 100): 1092,\n",
|
1362 |
+
" (359, 441): 1093,\n",
|
1363 |
+
" (354, 275): 1094,\n",
|
1364 |
+
" (286, 375): 1095,\n",
|
1365 |
+
" (332, 275): 1096,\n",
|
1366 |
+
" (32, 54): 1097,\n",
|
1367 |
+
" (257, 143): 1098,\n",
|
1368 |
+
" (944, 621): 1099,\n",
|
1369 |
+
" (300, 108): 1100,\n",
|
1370 |
+
" (49, 57): 1101,\n",
|
1371 |
+
" (350, 275): 1102,\n",
|
1372 |
+
" (311, 468): 1103,\n",
|
1373 |
+
" (416, 673): 1104,\n",
|
1374 |
+
" (617, 296): 1105,\n",
|
1375 |
+
" (281, 170): 1106,\n",
|
1376 |
+
" (99, 303): 1107,\n",
|
1377 |
+
" (880, 99): 1108,\n",
|
1378 |
+
" (286, 592): 1109,\n",
|
1379 |
+
" (448, 101): 1110,\n",
|
1380 |
+
" (377, 486): 1111,\n",
|
1381 |
+
" (99, 497): 1112,\n",
|
1382 |
+
" (120, 116): 1113,\n",
|
1383 |
+
" (763, 500): 1114,\n",
|
1384 |
+
" (622, 107): 1115,\n",
|
1385 |
+
" (260, 131): 1116,\n",
|
1386 |
+
" (631, 563): 1117,\n",
|
1387 |
+
" (282, 305): 1118,\n",
|
1388 |
+
" (508, 396): 1119,\n",
|
1389 |
+
" (475, 692): 1120,\n",
|
1390 |
+
" (448, 318): 1121,\n",
|
1391 |
+
" (474, 408): 1122,\n",
|
1392 |
+
" (488, 986): 1123,\n",
|
1393 |
+
" (300, 101): 1124,\n",
|
1394 |
+
" (457, 269): 1125,\n",
|
1395 |
+
" (32, 32): 1126,\n",
|
1396 |
+
" (273, 98): 1127,\n",
|
1397 |
+
" (102, 512): 1128,\n",
|
1398 |
+
" (261, 171): 1129,\n",
|
1399 |
+
" (824, 1001): 1130,\n",
|
1400 |
+
" (325, 315): 1131,\n",
|
1401 |
+
" (270, 100): 1132,\n",
|
1402 |
+
" (695, 590): 1133,\n",
|
1403 |
+
" (282, 533): 1134,\n",
|
1404 |
+
" (423, 346): 1135,\n",
|
1405 |
+
" (531, 275): 1136,\n",
|
1406 |
+
" (265, 762): 1137,\n",
|
1407 |
+
" (301, 114): 1138,\n",
|
1408 |
+
" (329, 968): 1139,\n",
|
1409 |
+
" (284, 598): 1140,\n",
|
1410 |
+
" (579, 549): 1141,\n",
|
1411 |
+
" (511, 182): 1142,\n",
|
1412 |
+
" (278, 403): 1143,\n",
|
1413 |
+
" (105, 303): 1144,\n",
|
1414 |
+
" (357, 387): 1145,\n",
|
1415 |
+
" (619, 165): 1146,\n",
|
1416 |
+
" (266, 110): 1147,\n",
|
1417 |
+
" (111, 118): 1148,\n",
|
1418 |
+
" (276, 116): 1149,\n",
|
1419 |
+
" (451, 723): 1150,\n",
|
1420 |
+
" (265, 973): 1151,\n",
|
1421 |
+
" (819, 372): 1152,\n",
|
1422 |
+
" (451, 118): 1153,\n",
|
1423 |
+
" (310, 1058): 1154,\n",
|
1424 |
+
" (256, 171): 1155,\n",
|
1425 |
+
" (291, 170): 1156,\n",
|
1426 |
+
" (402, 306): 1157,\n",
|
1427 |
+
" (532, 115): 1158,\n",
|
1428 |
+
" (556, 115): 1159,\n",
|
1429 |
+
" (369, 275): 1160,\n",
|
1430 |
+
" (301, 104): 1161,\n",
|
1431 |
+
" (531, 315): 1162,\n",
|
1432 |
+
" (284, 784): 1163,\n",
|
1433 |
+
" (272, 110): 1164,\n",
|
1434 |
+
" (412, 101): 1165,\n",
|
1435 |
+
" (317, 274): 1166,\n",
|
1436 |
+
" (340, 109): 1167,\n",
|
1437 |
+
" (117, 99): 1168,\n",
|
1438 |
+
" (371, 110): 1169,\n",
|
1439 |
+
" (403, 110): 1170,\n",
|
1440 |
+
" (264, 161): 1171,\n",
|
1441 |
+
" (384, 319): 1172,\n",
|
1442 |
+
" (116, 305): 1173,\n",
|
1443 |
+
" (366, 104): 1174,\n",
|
1444 |
+
" (98, 323): 1175,\n",
|
1445 |
+
" (646, 303): 1176,\n",
|
1446 |
+
" (282, 104): 1177,\n",
|
1447 |
+
" (671, 346): 1178,\n",
|
1448 |
+
" (298, 136): 1179,\n",
|
1449 |
+
" (306, 115): 1180,\n",
|
1450 |
+
" (482, 115): 1181,\n",
|
1451 |
+
" (422, 115): 1182,\n",
|
1452 |
+
" (308, 110): 1183,\n",
|
1453 |
+
" (627, 285): 1184,\n",
|
1454 |
+
" (555, 770): 1185,\n",
|
1455 |
+
" (108, 318): 1186,\n",
|
1456 |
+
" (545, 101): 1187,\n",
|
1457 |
+
" (416, 718): 1188,\n",
|
1458 |
+
" (257, 137): 1189,\n",
|
1459 |
+
" (340, 103): 1190,\n",
|
1460 |
+
" (105, 333): 1191,\n",
|
1461 |
+
" (267, 107): 1192,\n",
|
1462 |
+
" (921, 115): 1193,\n",
|
1463 |
+
" (99, 121): 1194,\n",
|
1464 |
+
" (1015, 288): 1195,\n",
|
1465 |
+
" (116, 323): 1196,\n",
|
1466 |
+
" (529, 1114): 1197,\n",
|
1467 |
+
" (838, 463): 1198,\n",
|
1468 |
+
" (308, 115): 1199,\n",
|
1469 |
+
" (424, 1066): 1200,\n",
|
1470 |
+
" (267, 289): 1201,\n",
|
1471 |
+
" (406, 549): 1202,\n",
|
1472 |
+
" (674, 110): 1203,\n",
|
1473 |
+
" (401, 417): 1204,\n",
|
1474 |
+
" (391, 154): 1205,\n",
|
1475 |
+
" (292, 130): 1206,\n",
|
1476 |
+
" (1120, 1125): 1207,\n",
|
1477 |
+
" (560, 115): 1208,\n",
|
1478 |
+
" (281, 163): 1209,\n",
|
1479 |
+
" (101, 285): 1210,\n",
|
1480 |
+
" (505, 98): 1211,\n",
|
1481 |
+
" (318, 115): 1212,\n",
|
1482 |
+
" (278, 109): 1213,\n",
|
1483 |
+
" (508, 98): 1214,\n",
|
1484 |
+
" (291, 174): 1215,\n",
|
1485 |
+
" (349, 479): 1216,\n",
|
1486 |
+
" (571, 303): 1217,\n",
|
1487 |
+
" (775, 315): 1218,\n",
|
1488 |
+
" (266, 115): 1219,\n",
|
1489 |
+
" (110, 101): 1220,\n",
|
1490 |
+
" (103, 302): 1221,\n",
|
1491 |
+
" (508, 578): 1222,\n",
|
1492 |
+
" (117, 396): 1223,\n",
|
1493 |
+
" (403, 305): 1224,\n",
|
1494 |
+
" (321, 294): 1225,\n",
|
1495 |
+
" (491, 959): 1226,\n",
|
1496 |
+
" (111, 98): 1227,\n",
|
1497 |
+
" (895, 274): 1228,\n",
|
1498 |
+
" (1205, 902): 1229,\n",
|
1499 |
+
" (349, 830): 1230,\n",
|
1500 |
+
" (661, 270): 1231,\n",
|
1501 |
+
" (668, 112): 1232,\n",
|
1502 |
+
" (308, 396): 1233,\n",
|
1503 |
+
" (264, 167): 1234,\n",
|
1504 |
+
" (274, 296): 1235,\n",
|
1505 |
+
" (621, 100): 1236,\n",
|
1506 |
+
" (265, 99): 1237,\n",
|
1507 |
+
" (439, 414): 1238,\n",
|
1508 |
+
" (605, 467): 1239,\n",
|
1509 |
+
" (366, 465): 1240,\n",
|
1510 |
+
" (507, 147): 1241,\n",
|
1511 |
+
" (956, 1195): 1242,\n",
|
1512 |
+
" (302, 614): 1243,\n",
|
1513 |
+
" (116, 288): 1244,\n",
|
1514 |
+
" (518, 116): 1245,\n",
|
1515 |
+
" (697, 263): 1246,\n",
|
1516 |
+
" (341, 486): 1247,\n",
|
1517 |
+
" (320, 354): 1248,\n",
|
1518 |
+
" (331, 315): 1249,\n",
|
1519 |
+
" (733, 323): 1250,\n",
|
1520 |
+
" (843, 719): 1251,\n",
|
1521 |
+
" (276, 109): 1252,\n",
|
1522 |
+
" (446, 460): 1253,\n",
|
1523 |
+
" (310, 118): 1254,\n",
|
1524 |
+
" (534, 57): 1255,\n",
|
1525 |
+
" ...}"
|
1526 |
+
]
|
1527 |
+
},
|
1528 |
+
"execution_count": 10,
|
1529 |
+
"metadata": {},
|
1530 |
+
"output_type": "execute_result"
|
1531 |
+
}
|
1532 |
+
],
|
1533 |
+
"source": [
|
1534 |
+
"merges"
|
1535 |
+
]
|
1536 |
+
},
|
1537 |
+
{
|
1538 |
+
"cell_type": "code",
|
1539 |
+
"execution_count": 14,
|
1540 |
+
"metadata": {},
|
1541 |
+
"outputs": [],
|
1542 |
+
"source": [
|
1543 |
+
"def _encode_chunk(chunk_bytes: bytes, verbose=False) -> list[int]:\n",
|
1544 |
+
" tokens = list(chunk_bytes)\n",
|
1545 |
+
" while len(tokens) >= 2:\n",
|
1546 |
+
" if verbose:\n",
|
1547 |
+
" visualise_tokens([vocab[token] for token in tokens]) # token can be > 256 after merging\n",
|
1548 |
+
" stats = {}\n",
|
1549 |
+
" get_stats(tokens, stats)\n",
|
1550 |
+
" pair = min(stats, key=lambda p: merges.get(p, float(\"inf\")))\n",
|
1551 |
+
" if not pair in merges:\n",
|
1552 |
+
" break\n",
|
1553 |
+
" idx = merges[pair]\n",
|
1554 |
+
" tokens = merge(tokens, pair, idx)\n",
|
1555 |
+
" return tokens\n",
|
1556 |
+
"\n",
|
1557 |
+
"def encode_ordinary(text, verbose=False) -> list[int]:\n",
|
1558 |
+
" chunk_texts = re.findall(regex, text)\n",
|
1559 |
+
" ids_list = []\n",
|
1560 |
+
" for i, text in enumerate(chunk_texts):\n",
|
1561 |
+
" if verbose:\n",
|
1562 |
+
" print()\n",
|
1563 |
+
" print(f\"encoding chunk {i+1}/{len(chunk_texts)}: {text}\")\n",
|
1564 |
+
" chunk_bytes = text.encode(\"utf-8\") # raw bytes\n",
|
1565 |
+
" ids = _encode_chunk(chunk_bytes, verbose)\n",
|
1566 |
+
" ids_list.extend(ids)\n",
|
1567 |
+
" return ids_list\n",
|
1568 |
+
"\n",
|
1569 |
+
"def encode(text, verbose=False, allowed_special=\"none\") -> list[int]:\n",
|
1570 |
+
" special = {}\n",
|
1571 |
+
" if allowed_special == \"all\":\n",
|
1572 |
+
" special = special_tokens\n",
|
1573 |
+
" elif allowed_special == \"none\":\n",
|
1574 |
+
" special = {}\n",
|
1575 |
+
" elif allowed_special == \"none_raise\":\n",
|
1576 |
+
" special = {}\n",
|
1577 |
+
" assert all(token not in text for token in special_tokens), \"Text contains special tokens that are not allowed\"\n",
|
1578 |
+
" elif isinstance(allowed_special, set):\n",
|
1579 |
+
" special = {k: v for k, v in special_tokens.items() if k in allowed_special}\n",
|
1580 |
+
" else:\n",
|
1581 |
+
" raise ValueError(f\"allowed_special={allowed_special} not understood.\")\n",
|
1582 |
+
" if not special:\n",
|
1583 |
+
" return encode_ordinary(text, verbose)\n",
|
1584 |
+
" special_pattern = \"(\" + \"|\".join(re.escape(token) for token in special) + \")\"\n",
|
1585 |
+
" parts = re.split(special_pattern, text)\n",
|
1586 |
+
" ids = []\n",
|
1587 |
+
" for part in parts:\n",
|
1588 |
+
" if part in special:\n",
|
1589 |
+
" ids.append(special[part])\n",
|
1590 |
+
" else:\n",
|
1591 |
+
" ids.extend(encode_ordinary(part, verbose))\n",
|
1592 |
+
" return ids"
|
1593 |
+
]
|
1594 |
+
},
|
1595 |
+
{
|
1596 |
+
"cell_type": "code",
|
1597 |
+
"execution_count": 23,
|
1598 |
+
"metadata": {},
|
1599 |
+
"outputs": [
|
1600 |
+
{
|
1601 |
+
"name": "stdout",
|
1602 |
+
"output_type": "stream",
|
1603 |
+
"text": [
|
1604 |
+
"आज तो बहुत थक गया हूँ, ಸ್ವಲ್ಪ विश्रಾಂತಿ ಬೇಕು।\n",
|
1605 |
+
"[2637, 665, 666, 320, 443, 441, 2459, 531, 332, 752, 374, 435, 257, 129, 44, 1919, 2649, 462, 1462, 274, 389]\n",
|
1606 |
+
"\n",
|
1607 |
+
"मौसम कितना अच्छा है! ನೀವೂ ಹೊರಗೆ ಬನ್ನಿ, let's enjoy together.\n",
|
1608 |
+
"[392, 872, 369, 392, 293, 577, 873, 458, 725, 291, 155, 269, 466, 33, 1268, 321, 486, 2662, 933, 462, 383, 268, 44, 2127, 39, 115, 678, 106, 1302, 2510, 46]\n",
|
1609 |
+
"\n",
|
1610 |
+
"स्वल्पा adjust करो, बैंगलोर का ट्रैफिक ऐसा ही है।\n",
|
1611 |
+
"[369, 692, 354, 1156, 269, 756, 106, 648, 631, 320, 44, 443, 379, 1538, 354, 1751, 620, 877, 2627, 840, 510, 259, 144, 369, 269, 2581, 466, 389]\n",
|
1612 |
+
"\n",
|
1613 |
+
"ನೀವು ಚಹಾ ಕುಡಿತೀರಾ? मुझे एक cup चाहिए।\n",
|
1614 |
+
"[317, 464, 750, 833, 596, 279, 2112, 381, 488, 464, 296, 279, 63, 345, 580, 157, 275, 583, 286, 1989, 776, 1378, 642, 389]\n",
|
1615 |
+
"\n",
|
1616 |
+
"आज का काम पूरा करो, ನಾಳೆ ಎಲ್ಲಿಂದ ಆರಂಭಿಸೋದು ನೋಡಿ।\n",
|
1617 |
+
"[2637, 665, 620, 3169, 3092, 277, 269, 631, 320, 44, 424, 2810, 294, 555, 418, 764, 1986, 1529, 406, 428, 3004, 424, 2826, 268, 389]\n",
|
1618 |
+
"\n",
|
1619 |
+
"ಪಾರ್ಟಿ ಹೇಗೆ ಇತ್ತು? मुझे तो बहुत मजा आया!\n",
|
1620 |
+
"[515, 417, 526, 268, 452, 384, 933, 603, 472, 63, 345, 580, 157, 275, 666, 320, 443, 441, 2459, 345, 665, 269, 640, 257, 431, 33]\n",
|
1621 |
+
"\n",
|
1622 |
+
"ನಮ್ಮ ಚೂರು ಸಹನಶೀಲತೆಯನ್ನು ತೋರಿಸಿ, ये थोड़ी देर का मसला है।\n",
|
1623 |
+
"[317, 1552, 833, 486, 463, 1389, 317, 632, 464, 319, 327, 1122, 618, 1747, 2695, 44, 759, 275, 531, 320, 977, 315, 475, 1452, 620, 345, 369, 1515, 466, 389]\n",
|
1624 |
+
"\n",
|
1625 |
+
"ಸಮಯ ನಿಲ್ಲುತ್ತಿಲ್ಲ, जिंदगी में स्वल्पा मज़ा भी जरूरी है।\n",
|
1626 |
+
"[429, 479, 574, 424, 1289, 660, 1289, 44, 416, 1206, 633, 503, 315, 404, 1508, 354, 1156, 269, 345, 1440, 269, 1218, 416, 277, 435, 993, 466, 389]\n",
|
1627 |
+
"\n",
|
1628 |
+
"My name is Jeff Bezos, and I'm the owner of Amazon.<|endoftext|>\n",
|
1629 |
+
"[77, 121, 2142, 356, 676, 101, 695, 1717, 122, 556, 44, 312, 362, 39, 109, 272, 1323, 276, 304, 376, 109, 2730, 283, 46, 3256]\n",
|
1630 |
+
"\n"
|
1631 |
+
]
|
1632 |
+
}
|
1633 |
+
],
|
1634 |
+
"source": [
|
1635 |
+
"for test in tests:\n",
|
1636 |
+
" print(test)\n",
|
1637 |
+
" print(encode(test, allowed_special={\"<|endoftext|>\"}))\n",
|
1638 |
+
" print()"
|
1639 |
+
]
|
1640 |
+
},
|
1641 |
+
{
|
1642 |
+
"cell_type": "code",
|
1643 |
+
"execution_count": 25,
|
1644 |
+
"metadata": {},
|
1645 |
+
"outputs": [
|
1646 |
+
{
|
1647 |
+
"name": "stdout",
|
1648 |
+
"output_type": "stream",
|
1649 |
+
"text": [
|
1650 |
+
"Text: आज तो बहुत थक गया हूँ, ಸ್ವಲ್ಪ विश्रಾಂತಿ ಬೇಕು।\n",
|
1651 |
+
"Tokens: [2637, 665, 666, 320, 443, 441, 2459, 531, 332, 752, 374, 435, 257, 129, 44, 1919, 2649, 462, 1462, 274, 389]\n",
|
1652 |
+
"\n",
|
1653 |
+
"Unmerged length: 117\n",
|
1654 |
+
"Merged length: 21\n",
|
1655 |
+
"--------------------------------------------------\n",
|
1656 |
+
"Text: मौसम कितना अच्छा है! ನೀವೂ ಹೊರಗೆ ಬನ್ನಿ, let's enjoy together.\n",
|
1657 |
+
"Tokens: [392, 872, 369, 392, 293, 577, 873, 458, 725, 291, 155, 269, 466, 33, 1268, 321, 486, 2662, 933, 462, 383, 268, 44, 2127, 39, 115, 678, 106, 1302, 2510, 46]\n",
|
1658 |
+
"\n",
|
1659 |
+
"Unmerged length: 120\n",
|
1660 |
+
"Merged length: 31\n",
|
1661 |
+
"--------------------------------------------------\n",
|
1662 |
+
"Text: स्वल्पा adjust करो, बैंगलोर का ट्रैफिक ऐसा ही है।\n",
|
1663 |
+
"Tokens: [369, 692, 354, 1156, 269, 756, 106, 648, 631, 320, 44, 443, 379, 1538, 354, 1751, 620, 877, 2627, 840, 510, 259, 144, 369, 269, 2581, 466, 389]\n",
|
1664 |
+
"\n",
|
1665 |
+
"Unmerged length: 117\n",
|
1666 |
+
"Merged length: 28\n",
|
1667 |
+
"--------------------------------------------------\n",
|
1668 |
+
"Text: ನೀವು ಚಹಾ ಕುಡಿತೀರಾ? मुझे एक cup चाहिए।\n",
|
1669 |
+
"Tokens: [317, 464, 750, 833, 596, 279, 2112, 381, 488, 464, 296, 279, 63, 345, 580, 157, 275, 583, 286, 1989, 776, 1378, 642, 389]\n",
|
1670 |
+
"\n",
|
1671 |
+
"Unmerged length: 91\n",
|
1672 |
+
"Merged length: 24\n",
|
1673 |
+
"--------------------------------------------------\n",
|
1674 |
+
"Text: आज का काम पूरा करो, ನಾಳೆ ಎಲ್ಲಿಂದ ಆರಂಭಿಸೋದು ನೋಡಿ।\n",
|
1675 |
+
"Tokens: [2637, 665, 620, 3169, 3092, 277, 269, 631, 320, 44, 424, 2810, 294, 555, 418, 764, 1986, 1529, 406, 428, 3004, 424, 2826, 268, 389]\n",
|
1676 |
+
"\n",
|
1677 |
+
"Unmerged length: 126\n",
|
1678 |
+
"Merged length: 25\n",
|
1679 |
+
"--------------------------------------------------\n",
|
1680 |
+
"Text: ಪಾರ್ಟಿ ಹೇಗೆ ಇತ್ತು? मुझे तो बहुत मजा आया!\n",
|
1681 |
+
"Tokens: [515, 417, 526, 268, 452, 384, 933, 603, 472, 63, 345, 580, 157, 275, 666, 320, 443, 441, 2459, 345, 665, 269, 640, 257, 431, 33]\n",
|
1682 |
+
"\n",
|
1683 |
+
"Unmerged length: 102\n",
|
1684 |
+
"Merged length: 26\n",
|
1685 |
+
"--------------------------------------------------\n",
|
1686 |
+
"Text: ನಮ್ಮ ಚೂರು ಸಹನಶೀಲತೆಯನ್ನು ತೋರಿಸಿ, ये थोड़ी देर का मसला है।\n",
|
1687 |
+
"Tokens: [317, 1552, 833, 486, 463, 1389, 317, 632, 464, 319, 327, 1122, 618, 1747, 2695, 44, 759, 275, 531, 320, 977, 315, 475, 1452, 620, 345, 369, 1515, 466, 389]\n",
|
1688 |
+
"\n",
|
1689 |
+
"Unmerged length: 148\n",
|
1690 |
+
"Merged length: 30\n",
|
1691 |
+
"--------------------------------------------------\n",
|
1692 |
+
"Text: ಸಮಯ ನಿಲ್ಲುತ್ತಿಲ್ಲ, जिंदगी में स्वल्पा मज़ा भी जरूरी है।\n",
|
1693 |
+
"Tokens: [429, 479, 574, 424, 1289, 660, 1289, 44, 416, 1206, 633, 503, 315, 404, 1508, 354, 1156, 269, 345, 1440, 269, 1218, 416, 277, 435, 993, 466, 389]\n",
|
1694 |
+
"\n",
|
1695 |
+
"Unmerged length: 147\n",
|
1696 |
+
"Merged length: 28\n",
|
1697 |
+
"--------------------------------------------------\n",
|
1698 |
+
"Text: My name is Jeff Bezos, and I'm the owner of Amazon.<|endoftext|>\n",
|
1699 |
+
"Tokens: [77, 121, 2142, 356, 676, 101, 695, 1717, 122, 556, 44, 312, 362, 39, 109, 272, 1323, 276, 304, 376, 109, 2730, 283, 46, 3256]\n",
|
1700 |
+
"\n",
|
1701 |
+
"Unmerged length: 64\n",
|
1702 |
+
"Merged length: 25\n",
|
1703 |
+
"--------------------------------------------------\n"
|
1704 |
+
]
|
1705 |
+
}
|
1706 |
+
],
|
1707 |
+
"source": [
|
1708 |
+
"def test_encoding(text, verbose=False, allowed_special=None):\n",
|
1709 |
+
" print(f\"Text: {text}\")\n",
|
1710 |
+
" test_ids = encode(text, verbose=verbose, allowed_special=allowed_special)\n",
|
1711 |
+
" print(f\"Tokens: {test_ids}\")\n",
|
1712 |
+
" print(\"\")\n",
|
1713 |
+
" print(f\"Unmerged length: {len(text.encode('utf-8'))}\")\n",
|
1714 |
+
" print(f\"Merged length: {len(test_ids)}\")\n",
|
1715 |
+
" print(\"-\"*50)\n",
|
1716 |
+
"\n",
|
1717 |
+
"for test in tests:\n",
|
1718 |
+
" test_encoding(test, allowed_special=\"all\")"
|
1719 |
+
]
|
1720 |
+
},
|
1721 |
+
{
|
1722 |
+
"cell_type": "code",
|
1723 |
+
"execution_count": null,
|
1724 |
+
"metadata": {},
|
1725 |
+
"outputs": [],
|
1726 |
+
"source": []
|
1727 |
+
}
|
1728 |
+
],
|
1729 |
+
"metadata": {
|
1730 |
+
"kernelspec": {
|
1731 |
+
"display_name": "virtual",
|
1732 |
+
"language": "python",
|
1733 |
+
"name": "python3"
|
1734 |
+
},
|
1735 |
+
"language_info": {
|
1736 |
+
"codemirror_mode": {
|
1737 |
+
"name": "ipython",
|
1738 |
+
"version": 3
|
1739 |
+
},
|
1740 |
+
"file_extension": ".py",
|
1741 |
+
"mimetype": "text/x-python",
|
1742 |
+
"name": "python",
|
1743 |
+
"nbconvert_exporter": "python",
|
1744 |
+
"pygments_lexer": "ipython3",
|
1745 |
+
"version": "3.11.7"
|
1746 |
+
}
|
1747 |
+
},
|
1748 |
+
"nbformat": 4,
|
1749 |
+
"nbformat_minor": 2
|
1750 |
+
}
|
bpe_tok.model
ADDED
@@ -0,0 +1,3 @@
|
|
|
|
|
|
|
|
|
1 |
+
version https://git-lfs.github.com/spec/v1
|
2 |
+
oid sha256:8c6d964893049f0baaa2bb920e3c71b85ff43cad3c5b3a51352fb9177764d26b
|
3 |
+
size 24563
|
bpe_tok.vocab
ADDED
@@ -0,0 +1,3256 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
1 |
+
[\u0000] 0
|
2 |
+
[\u0001] 1
|
3 |
+
[\u0002] 2
|
4 |
+
[\u0003] 3
|
5 |
+
[\u0004] 4
|
6 |
+
[\u0005] 5
|
7 |
+
[\u0006] 6
|
8 |
+
[\u0007] 7
|
9 |
+
[\u0008] 8
|
10 |
+
[\u0009] 9
|
11 |
+
[\u000a] 10
|
12 |
+
[\u000b] 11
|
13 |
+
[\u000c] 12
|
14 |
+
[\u000d] 13
|
15 |
+
[\u000e] 14
|
16 |
+
[\u000f] 15
|
17 |
+
[\u0010] 16
|
18 |
+
[\u0011] 17
|
19 |
+
[\u0012] 18
|
20 |
+
[\u0013] 19
|
21 |
+
[\u0014] 20
|
22 |
+
[\u0015] 21
|
23 |
+
[\u0016] 22
|
24 |
+
[\u0017] 23
|
25 |
+
[\u0018] 24
|
26 |
+
[\u0019] 25
|
27 |
+
[\u001a] 26
|
28 |
+
[\u001b] 27
|
29 |
+
[\u001c] 28
|
30 |
+
[\u001d] 29
|
31 |
+
[\u001e] 30
|
32 |
+
[\u001f] 31
|
33 |
+
[ ] 32
|
34 |
+
[!] 33
|
35 |
+
["] 34
|
36 |
+
[#] 35
|
37 |
+
[$] 36
|
38 |
+
[%] 37
|
39 |
+
[&] 38
|
40 |
+
['] 39
|
41 |
+
[(] 40
|
42 |
+
[)] 41
|
43 |
+
[*] 42
|
44 |
+
[+] 43
|
45 |
+
[,] 44
|
46 |
+
[-] 45
|
47 |
+
[.] 46
|
48 |
+
[/] 47
|
49 |
+
[0] 48
|
50 |
+
[1] 49
|
51 |
+
[2] 50
|
52 |
+
[3] 51
|
53 |
+
[4] 52
|
54 |
+
[5] 53
|
55 |
+
[6] 54
|
56 |
+
[7] 55
|
57 |
+
[8] 56
|
58 |
+
[9] 57
|
59 |
+
[:] 58
|
60 |
+
[;] 59
|
61 |
+
[<] 60
|
62 |
+
[=] 61
|
63 |
+
[>] 62
|
64 |
+
[?] 63
|
65 |
+
[@] 64
|
66 |
+
[A] 65
|
67 |
+
[B] 66
|
68 |
+
[C] 67
|
69 |
+
[D] 68
|
70 |
+
[E] 69
|
71 |
+
[F] 70
|
72 |
+
[G] 71
|
73 |
+
[H] 72
|
74 |
+
[I] 73
|
75 |
+
[J] 74
|
76 |
+
[K] 75
|
77 |
+
[L] 76
|
78 |
+
[M] 77
|
79 |
+
[N] 78
|
80 |
+
[O] 79
|
81 |
+
[P] 80
|
82 |
+
[Q] 81
|
83 |
+
[R] 82
|
84 |
+
[S] 83
|
85 |
+
[T] 84
|
86 |
+
[U] 85
|
87 |
+
[V] 86
|
88 |
+
[W] 87
|
89 |
+
[X] 88
|
90 |
+
[Y] 89
|
91 |
+
[Z] 90
|
92 |
+
[[] 91
|
93 |
+
[\] 92
|
94 |
+
[]] 93
|
95 |
+
[^] 94
|
96 |
+
[_] 95
|
97 |
+
[`] 96
|
98 |
+
[a] 97
|
99 |
+
[b] 98
|
100 |
+
[c] 99
|
101 |
+
[d] 100
|
102 |
+
[e] 101
|
103 |
+
[f] 102
|
104 |
+
[g] 103
|
105 |
+
[h] 104
|
106 |
+
[i] 105
|
107 |
+
[j] 106
|
108 |
+
[k] 107
|
109 |
+
[l] 108
|
110 |
+
[m] 109
|
111 |
+
[n] 110
|
112 |
+
[o] 111
|
113 |
+
[p] 112
|
114 |
+
[q] 113
|
115 |
+
[r] 114
|
116 |
+
[s] 115
|
117 |
+
[t] 116
|
118 |
+
[u] 117
|
119 |
+
[v] 118
|
120 |
+
[w] 119
|
121 |
+
[x] 120
|
122 |
+
[y] 121
|
123 |
+
[z] 122
|
124 |
+
[{] 123
|
125 |
+
[|] 124
|
126 |
+
[}] 125
|
127 |
+
[~] 126
|
128 |
+
[\u007f] 127
|
129 |
+
[�] 128
|
130 |
+
[�] 129
|
131 |
+
[�] 130
|
132 |
+
[�] 131
|
133 |
+
[�] 132
|
134 |
+
[�] 133
|
135 |
+
[�] 134
|
136 |
+
[�] 135
|
137 |
+
[�] 136
|
138 |
+
[�] 137
|
139 |
+
[�] 138
|
140 |
+
[�] 139
|
141 |
+
[�] 140
|
142 |
+
[�] 141
|
143 |
+
[�] 142
|
144 |
+
[�] 143
|
145 |
+
[�] 144
|
146 |
+
[�] 145
|
147 |
+
[�] 146
|
148 |
+
[�] 147
|
149 |
+
[�] 148
|
150 |
+
[�] 149
|
151 |
+
[�] 150
|
152 |
+
[�] 151
|
153 |
+
[�] 152
|
154 |
+
[�] 153
|
155 |
+
[�] 154
|
156 |
+
[�] 155
|
157 |
+
[�] 156
|
158 |
+
[�] 157
|
159 |
+
[�] 158
|
160 |
+
[�] 159
|
161 |
+
[�] 160
|
162 |
+
[�] 161
|
163 |
+
[�] 162
|
164 |
+
[�] 163
|
165 |
+
[�] 164
|
166 |
+
[�] 165
|
167 |
+
[�] 166
|
168 |
+
[�] 167
|
169 |
+
[�] 168
|
170 |
+
[�] 169
|
171 |
+
[�] 170
|
172 |
+
[�] 171
|
173 |
+
[�] 172
|
174 |
+
[�] 173
|
175 |
+
[�] 174
|
176 |
+
[�] 175
|
177 |
+
[�] 176
|
178 |
+
[�] 177
|
179 |
+
[�] 178
|
180 |
+
[�] 179
|
181 |
+
[�] 180
|
182 |
+
[�] 181
|
183 |
+
[�] 182
|
184 |
+
[�] 183
|
185 |
+
[�] 184
|
186 |
+
[�] 185
|
187 |
+
[�] 186
|
188 |
+
[�] 187
|
189 |
+
[�] 188
|
190 |
+
[�] 189
|
191 |
+
[�] 190
|
192 |
+
[�] 191
|
193 |
+
[�] 192
|
194 |
+
[�] 193
|
195 |
+
[�] 194
|
196 |
+
[�] 195
|
197 |
+
[�] 196
|
198 |
+
[�] 197
|
199 |
+
[�] 198
|
200 |
+
[�] 199
|
201 |
+
[�] 200
|
202 |
+
[�] 201
|
203 |
+
[�] 202
|
204 |
+
[�] 203
|
205 |
+
[�] 204
|
206 |
+
[�] 205
|
207 |
+
[�] 206
|
208 |
+
[�] 207
|
209 |
+
[�] 208
|
210 |
+
[�] 209
|
211 |
+
[�] 210
|
212 |
+
[�] 211
|
213 |
+
[�] 212
|
214 |
+
[�] 213
|
215 |
+
[�] 214
|
216 |
+
[�] 215
|
217 |
+
[�] 216
|
218 |
+
[�] 217
|
219 |
+
[�] 218
|
220 |
+
[�] 219
|
221 |
+
[�] 220
|
222 |
+
[�] 221
|
223 |
+
[�] 222
|
224 |
+
[�] 223
|
225 |
+
[�] 224
|
226 |
+
[�] 225
|
227 |
+
[�] 226
|
228 |
+
[�] 227
|
229 |
+
[�] 228
|
230 |
+
[�] 229
|
231 |
+
[�] 230
|
232 |
+
[�] 231
|
233 |
+
[�] 232
|
234 |
+
[�] 233
|
235 |
+
[�] 234
|
236 |
+
[�] 235
|
237 |
+
[�] 236
|
238 |
+
[�] 237
|
239 |
+
[�] 238
|
240 |
+
[�] 239
|
241 |
+
[�] 240
|
242 |
+
[�] 241
|
243 |
+
[�] 242
|
244 |
+
[�] 243
|
245 |
+
[�] 244
|
246 |
+
[�] 245
|
247 |
+
[�] 246
|
248 |
+
[�] 247
|
249 |
+
[�] 248
|
250 |
+
[�] 249
|
251 |
+
[�] 250
|
252 |
+
[�] 251
|
253 |
+
[�] 252
|
254 |
+
[�] 253
|
255 |
+
[�] 254
|
256 |
+
[�] 255
|
257 |
+
[�][�] -> [�] 256
|
258 |
+
[�][�] -> [�] 257
|
259 |
+
[�][�] -> [�] 258
|
260 |
+
[ ][�] -> [ �] 259
|
261 |
+
[�][�] -> [�] 260
|
262 |
+
[ ][�] -> [ �] 261
|
263 |
+
[ ][t] -> [ t] 262
|
264 |
+
[�][�] -> [್] 263
|
265 |
+
[್][�] -> [್�] 264
|
266 |
+
[ ][a] -> [ a] 265
|
267 |
+
[h][e] -> [he] 266
|
268 |
+
[i][n] -> [in] 267
|
269 |
+
[�][�] -> [ಿ] 268
|
270 |
+
[�][�] -> [ा] 269
|
271 |
+
[r][e] -> [re] 270
|
272 |
+
[ಿ][�] -> [ಿ�] 271
|
273 |
+
[ t][he] -> [ the] 272
|
274 |
+
[ ][o] -> [ o] 273
|
275 |
+
[�][�] -> [ು] 274
|
276 |
+
[�][�] -> [े] 275
|
277 |
+
[e][r] -> [er] 276
|
278 |
+
[�][�] -> [र] 277
|
279 |
+
[ ][s] -> [ s] 278
|
280 |
+
[�][�] -> [ಾ] 279
|
281 |
+
[�][�] -> [्] 280
|
282 |
+
[ಾ][�] -> [ಾ�] 281
|
283 |
+
[a][t] -> [at] 282
|
284 |
+
[o][n] -> [on] 283
|
285 |
+
[ ][w] -> [ w] 284
|
286 |
+
[n][d] -> [nd] 285
|
287 |
+
[ ][c] -> [ c] 286
|
288 |
+
[�][�] -> [ि] 287
|
289 |
+
[e][n] -> [en] 288
|
290 |
+
[e][s] -> [es] 289
|
291 |
+
[i][s] -> [is] 290
|
292 |
+
[्][�] -> [्�] 291
|
293 |
+
[ि][�] -> [ि�] 292
|
294 |
+
[ �][�] -> [ क] 293
|
295 |
+
[�][�] -> [ೆ] 294
|
296 |
+
[i][t] -> [it] 295
|
297 |
+
[�][�] -> [ರ] 296
|
298 |
+
[o][r] -> [or] 297
|
299 |
+
[ा][�] -> [ा�] 298
|
300 |
+
[ ][b] -> [ b] 299
|
301 |
+
[ ][f] -> [ f] 300
|
302 |
+
[ ][p] -> [ p] 301
|
303 |
+
[a][n] -> [an] 302
|
304 |
+
[e][d] -> [ed] 303
|
305 |
+
[ o][f] -> [ of] 304
|
306 |
+
[in][g] -> [ing] 305
|
307 |
+
[a][l] -> [al] 306
|
308 |
+
[o][u] -> [ou] 307
|
309 |
+
[a][r] -> [ar] 308
|
310 |
+
[�][�] -> [ं] 309
|
311 |
+
[ ][in] -> [ in] 310
|
312 |
+
[ ][m] -> [ m] 311
|
313 |
+
[ a][nd] -> [ and] 312
|
314 |
+
[�][�] -> [ದ] 313
|
315 |
+
[ t][o] -> [ to] 314
|
316 |
+
[�][�] -> [ी] 315
|
317 |
+
[ ][d] -> [ d] 316
|
318 |
+
[�][�] -> [ನ] 317
|
319 |
+
[i][c] -> [ic] 318
|
320 |
+
[�][�] -> [ಲ] 319
|
321 |
+
[�][�] -> [ो] 320
|
322 |
+
[�][�] -> [ವ] 321
|
323 |
+
[ ][h] -> [ h] 322
|
324 |
+
[l][e] -> [le] 323
|
325 |
+
[a][s] -> [as] 324
|
326 |
+
[�][�] -> [न] 325
|
327 |
+
[i][on] -> [ion] 326
|
328 |
+
[�][�] -> [ತ] 327
|
329 |
+
[r][o] -> [ro] 328
|
330 |
+
[ t][h] -> [ th] 329
|
331 |
+
[�][�] -> [ಂ] 330
|
332 |
+
[ �][�] -> [ स] 331
|
333 |
+
[�][�] -> [क] 332
|
334 |
+
[en][t] -> [ent] 333
|
335 |
+
[ ][e] -> [ e] 334
|
336 |
+
[�][�] -> [ಗ] 335
|
337 |
+
[ ][l] -> [ l] 336
|
338 |
+
[�][�] -> [�] 337
|
339 |
+
[i][l] -> [il] 338
|
340 |
+
[ ][T] -> [ T] 339
|
341 |
+
[ ][re] -> [ re] 340
|
342 |
+
[ �][�] -> [ ಮ] 341
|
343 |
+
[o][m] -> [om] 342
|
344 |
+
[ ][n] -> [ n] 343
|
345 |
+
[c][t] -> [ct] 344
|
346 |
+
[ �][�] -> [ म] 345
|
347 |
+
[�][�] -> [ಳ] 346
|
348 |
+
[v][e] -> [ve] 347
|
349 |
+
[s][t] -> [st] 348
|
350 |
+
[ �][�] -> [ ಸ] 349
|
351 |
+
[�][�] -> [त] 350
|
352 |
+
[್�][�] -> [್ನ] 351
|
353 |
+
[್�][�] -> [್ತ] 352
|
354 |
+
[ b][e] -> [ be] 353
|
355 |
+
[�][�] -> [ल] 354
|
356 |
+
[l][y] -> [ly] 355
|
357 |
+
[ ][is] -> [ is] 356
|
358 |
+
[ ][g] -> [ g] 357
|
359 |
+
[�][�] -> [ಕ] 358
|
360 |
+
[ �][�] -> [ प] 359
|
361 |
+
[್�][�] -> [್ರ] 360
|
362 |
+
[ क][े] -> [ के] 361
|
363 |
+
[ ][I] -> [ I] 362
|
364 |
+
[्][र] -> [���र] 363
|
365 |
+
[ ][1] -> [ 1] 364
|
366 |
+
[s][e] -> [se] 365
|
367 |
+
[o][t] -> [ot] 366
|
368 |
+
[े][ं] -> [ें] 367
|
369 |
+
[್�][�] -> [್ಲ] 368
|
370 |
+
[�][�] -> [स] 369
|
371 |
+
[i][m] -> [im] 370
|
372 |
+
[o][w] -> [ow] 371
|
373 |
+
[i][d] -> [id] 372
|
374 |
+
[u][t] -> [ut] 373
|
375 |
+
[ �][�] -> [ ह] 374
|
376 |
+
[o][l] -> [ol] 375
|
377 |
+
[ ][A] -> [ A] 376
|
378 |
+
[್�][�] -> [್ಯ] 377
|
379 |
+
[c][e] -> [ce] 378
|
380 |
+
[�][�] -> [ै] 379
|
381 |
+
[ �][�] -> [ ಅ] 380
|
382 |
+
[�][�] -> [ಡ] 381
|
383 |
+
[ th][at] -> [ that] 382
|
384 |
+
[ನ][್ನ] -> [ನ್ನ] 383
|
385 |
+
[�][�] -> [ೇ] 384
|
386 |
+
[ ][S] -> [ S] 385
|
387 |
+
[ತ][್ತ] -> [ತ್ತ] 386
|
388 |
+
[e][t] -> [et] 387
|
389 |
+
[ �][�] -> [ ಪ] 388
|
390 |
+
[�][�] -> [।] 389
|
391 |
+
[ ][u] -> [ u] 390
|
392 |
+
[ೆ][�] -> [ೆ�] 391
|
393 |
+
[�][�] -> [म] 392
|
394 |
+
[ o][n] -> [ on] 393
|
395 |
+
[ f][or] -> [ for] 394
|
396 |
+
[ ][C] -> [ C] 395
|
397 |
+
[c][h] -> [ch] 396
|
398 |
+
[r][a] -> [ra] 397
|
399 |
+
[i][g] -> [ig] 398
|
400 |
+
[ s][t] -> [ st] 399
|
401 |
+
[�][�] -> [ु] 400
|
402 |
+
[ �][�] -> [ ಕ] 401
|
403 |
+
[at][ion] -> [ation] 402
|
404 |
+
[u][r] -> [ur] 403
|
405 |
+
[ म][ें] -> [ में] 404
|
406 |
+
[i][r] -> [ir] 405
|
407 |
+
[ಿ�][�] -> [ಿಸ] 406
|
408 |
+
[ ][y] -> [ y] 407
|
409 |
+
[ನ್ನ][ು] -> [ನ್ನು] 408
|
410 |
+
[ ][he] -> [ he] 409
|
411 |
+
[v][er] -> [ver] 410
|
412 |
+
[ T][he] -> [ The] 411
|
413 |
+
[a][d] -> [ad] 412
|
414 |
+
[ ][(] -> [ (] 413
|
415 |
+
[र][्�] -> [र्�] 414
|
416 |
+
[a][y] -> [ay] 415
|
417 |
+
[ �][�] -> [ ज] 416
|
418 |
+
[ಾ�][�] -> [ಾರ] 417
|
419 |
+
[ಲ][್ಲ] -> [ಲ್ಲ] 418
|
420 |
+
[e][l] -> [el] 419
|
421 |
+
[ಾ�][�] -> [ಾಗ] 420
|
422 |
+
[ಿ�][�] -> [ಿಯ] 421
|
423 |
+
[ a][s] -> [ as] 422
|
424 |
+
[�][�] -> [ೊ] 423
|
425 |
+
[ �][�] -> [ ನ] 424
|
426 |
+
[ಿ�][�] -> [ಿದ] 425
|
427 |
+
[a][m] -> [am] 426
|
428 |
+
[it][h] -> [ith] 427
|
429 |
+
[�][�] -> [ೋ] 428
|
430 |
+
[�][�] -> [ಸ] 429
|
431 |
+
[ ][2] -> [ 2] 430
|
432 |
+
[�][ा] -> [�ा] 431
|
433 |
+
[�][�] -> [प] 432
|
434 |
+
[ a][re] -> [ are] 433
|
435 |
+
[u][l] -> [ul] 434
|
436 |
+
[�][�] -> [ू] 435
|
437 |
+
[ a][n] -> [ an] 436
|
438 |
+
[ w][ith] -> [ with] 437
|
439 |
+
[ ][it] -> [ it] 438
|
440 |
+
[ �][�] -> [ व] 439
|
441 |
+
[ w][h] -> [ wh] 440
|
442 |
+
[�][�] -> [ह] 441
|
443 |
+
[ ][M] -> [ M] 442
|
444 |
+
[ �][�] -> [ ब] 443
|
445 |
+
[ y][ou] -> [ you] 444
|
446 |
+
[ �][�] -> [ ए] 445
|
447 |
+
[ಗ][ಳ] -> [ಗಳ] 446
|
448 |
+
[ �][�] -> [ ವ] 447
|
449 |
+
[i][f] -> [if] 448
|
450 |
+
[t][er] -> [ter] 449
|
451 |
+
[ �][�] -> [ न] 450
|
452 |
+
[ p][ro] -> [ pro] 451
|
453 |
+
[ �][�] -> [ ಹ] 452
|
454 |
+
[ c][on] -> [ con] 453
|
455 |
+
[ ][P] -> [ P] 454
|
456 |
+
[ं][�] -> [ं�] 455
|
457 |
+
[ a][l] -> [ al] 456
|
458 |
+
[ा][र] -> [ार] 457
|
459 |
+
[ �][�] -> [ अ] 458
|
460 |
+
[ o][r] -> [ or] 459
|
461 |
+
[ಲ್ಲ][ಿ] -> [ಲ್ಲಿ] 460
|
462 |
+
[�][�] -> [ಟ] 461
|
463 |
+
[ �][�] -> [ ಬ] 462
|
464 |
+
[ರ][ು] -> [ರು] 463
|
465 |
+
[�][�] -> [ೀ] 464
|
466 |
+
[he][r] -> [her] 465
|
467 |
+
[ ह][ै] -> [ है] 466
|
468 |
+
[ो][ं] -> [ों] 467
|
469 |
+
[o][d] -> [od] 468
|
470 |
+
[er][s] -> [ers] 469
|
471 |
+
[ ][W] -> [ W] 470
|
472 |
+
[ ][B] -> [ B] 471
|
473 |
+
[ತ್ತ][ು] -> [ತ್ತು] 472
|
474 |
+
[ ][H] -> [ H] 473
|
475 |
+
[ೆ�][�] -> [ೆಯ] 474
|
476 |
+
[ �][�] -> [ द] 475
|
477 |
+
[ �][�] -> [ औ] 476
|
478 |
+
[e][m] -> [em] 477
|
479 |
+
[ �][�] -> [ ल] 478
|
480 |
+
[�][�] -> [ಮ] 479
|
481 |
+
[�][�] -> [’] 480
|
482 |
+
[ w][e] -> [ we] 481
|
483 |
+
[a][nd] -> [and] 482
|
484 |
+
[p][e] -> [pe] 483
|
485 |
+
[ ಮ][ತ್ತು] -> [ ಮತ್ತು] 484
|
486 |
+
[ರ][್�] -> [ರ್�] 485
|
487 |
+
[�][�] -> [ೂ] 486
|
488 |
+
[ औ][र] -> [ और] 487
|
489 |
+
[ಿ�][�] -> [ಿತ] 488
|
490 |
+
[ಂ][ದ] -> [ಂದ] 489
|
491 |
+
[a][g] -> [ag] 490
|
492 |
+
[ �][�] -> [ र] 491
|
493 |
+
[ w][as] -> [ was] 492
|
494 |
+
[ �][�] -> [ ग] 493
|
495 |
+
[ e][x] -> [ ex] 494
|
496 |
+
[l][d] -> [ld] 495
|
497 |
+
[ಿ�][�] -> [ಿಕ] 496
|
498 |
+
[es][s] -> [ess] 497
|
499 |
+
[t][h] -> [th] 498
|
500 |
+
[ �][�] -> [ ಆ] 499
|
501 |
+
[o][p] -> [op] 500
|
502 |
+
[k][e] -> [ke] 501
|
503 |
+
[il][l] -> [ill] 502
|
504 |
+
[�][�] -> [ग] 503
|
505 |
+
[ ][E] -> [ E] 504
|
506 |
+
[r][i] -> [ri] 505
|
507 |
+
[्�][�] -> [्य] 506
|
508 |
+
[ ][�] -> [ �] 507
|
509 |
+
[ s][u] -> [ su] 508
|
510 |
+
[ c][om] -> [ com] 509
|
511 |
+
[ि�][�] -> [िक] 510
|
512 |
+
[े][�] -> [े�] 511
|
513 |
+
[o][re] -> [ore] 512
|
514 |
+
[is][t] -> [ist] 513
|
515 |
+
[ ][v] -> [ v] 514
|
516 |
+
[�][�] -> [ಪ] 515
|
517 |
+
[ h][a] -> [ ha] 516
|
518 |
+
[es][t] -> [est] 517
|
519 |
+
[ a][t] -> [ at] 518
|
520 |
+
[u][m] -> [um] 519
|
521 |
+
[ा�][�] -> [ान] 520
|
522 |
+
[0][0] -> [00] 521
|
523 |
+
[ b][y] -> [ by] 522
|
524 |
+
[ ]["] -> [ "] 523
|
525 |
+
[at][e] -> [ate] 524
|
526 |
+
[a][b] -> [ab] 525
|
527 |
+
[್�][�] -> [್ಟ] 526
|
528 |
+
[ಿ�][�] -> [ಿಗ] 527
|
529 |
+
[u][s] -> [us] 528
|
530 |
+
[ d][e] -> [ de] 529
|
531 |
+
[a][in] -> [ain] 530
|
532 |
+
[ �][�] -> [ थ] 531
|
533 |
+
[u][re] -> [ure] 532
|
534 |
+
[i][ve] -> [ive] 533
|
535 |
+
[ 1][9] -> [ 19] 534
|
536 |
+
[u][n] -> [un] 535
|
537 |
+
[ ][D] -> [ D] 536
|
538 |
+
[i][es] -> [ies] 537
|
539 |
+
[ig][h] -> [igh] 538
|
540 |
+
[�][�] -> [ಬ] 539
|
541 |
+
[a][c] -> [ac] 540
|
542 |
+
[ro][m] -> [rom] 541
|
543 |
+
[an][t] -> [ant] 542
|
544 |
+
[ s][e] -> [ se] 543
|
545 |
+
[ ][L] -> [ L] 544
|
546 |
+
[ ][R] -> [ R] 545
|
547 |
+
[�][�] -> [व] 546
|
548 |
+
[n][t] -> [nt] 547
|
549 |
+
[ प][्र] -> [ प्र] 548
|
550 |
+
[ು][ವ] -> [ುವ] 549
|
551 |
+
[ �][�] -> [ उ] 550
|
552 |
+
[ क][ो] -> [ को] 551
|
553 |
+
[ c][h] -> [ ch] 552
|
554 |
+
[it][y] -> [ity] 553
|
555 |
+
[ c][an] -> [ can] 554
|
556 |
+
[ �][�] -> [ ಎ] 555
|
557 |
+
[o][s] -> [os] 556
|
558 |
+
[ n][ot] -> [ not] 557
|
559 |
+
[ �][�] -> [ ರ] 558
|
560 |
+
[ ಪ][್ರ] -> [ ಪ್ರ] 559
|
561 |
+
[m][ent] -> [ment] 560
|
562 |
+
[q][u] -> [qu] 561
|
563 |
+
[ ][F] -> [ F] 562
|
564 |
+
[न][े] -> [ने] 563
|
565 |
+
[ s][h] -> [ sh] 564
|
566 |
+
[ ][N] -> [ N] 565
|
567 |
+
[ि�][�ा] -> [िया] 566
|
568 |
+
[i][v] -> [iv] 567
|
569 |
+
[or][t] -> [ort] 568
|
570 |
+
[ स][े] -> [ से] 569
|
571 |
+
[ f][rom] -> [ from] 570
|
572 |
+
[ n][e] -> [ ne] 571
|
573 |
+
[ಾ�][�] -> [ಾನ] 572
|
574 |
+
[ಿ�][�] -> [ಿನ] 573
|
575 |
+
[�][�] -> [ಯ] 574
|
576 |
+
[ क][ी] -> [ की] 575
|
577 |
+
[o][c] -> [oc] 576
|
578 |
+
[ि�][�] -> [ित] 577
|
579 |
+
[p][p] -> [pp] 578
|
580 |
+
[ಿ�][�] -> [ಿರ] 579
|
581 |
+
[ु][�] -> [ु�] 580
|
582 |
+
[�][�] -> [ೈ] 581
|
583 |
+
[ ha][ve] -> [ have] 582
|
584 |
+
[ ए][क] -> [ एक] 583
|
585 |
+
[u][d] -> [ud] 584
|
586 |
+
[ ][r] -> [ r] 585
|
587 |
+
[�][�] -> [य] 586
|
588 |
+
[ ][G] -> [ G] 587
|
589 |
+
[ಕ][್�] -> [ಕ್�] 588
|
590 |
+
[�][�] -> [ब] 589
|
591 |
+
[e][ct] -> [ect] 590
|
592 |
+
[�][�] -> [ಣ] 591
|
593 |
+
[ou][ld] -> [ould] 592
|
594 |
+
[ ][-] -> [ -] 593
|
595 |
+
[ar][t] -> [art] 594
|
596 |
+
[re][s] -> [res] 595
|
597 |
+
[�][�] -> [ಹ] 596
|
598 |
+
[ಿ�][�] -> [ಿಂ] 597
|
599 |
+
[el][l] -> [ell] 598
|
600 |
+
[ ][le] -> [ le] 599
|
601 |
+
[igh][t] -> [ight] 600
|
602 |
+
[ನ][್] -> [ನ್] 601
|
603 |
+
[ ][k] -> [ k] 602
|
604 |
+
[ �][�] -> [ ಇ] 603
|
605 |
+
[�][�] -> [़] 604
|
606 |
+
[ि�][�] -> [िय] 605
|
607 |
+
[ th][is] -> [ this] 606
|
608 |
+
[ic][al] -> [ical] 607
|
609 |
+
[i][al] -> [ial] 608
|
610 |
+
[�][�] -> [”] 609
|
611 |
+
[್�][�] -> [್ದ] 610
|
612 |
+
[�][�] -> [ट] 611
|
613 |
+
[ �][�] -> [ “] 612
|
614 |
+
[ ग][�] -> [ ग�] 613
|
615 |
+
[g][e] -> [ge] 614
|
616 |
+
[ಾ�][�] -> [ಾದ] 615
|
617 |
+
[g][h] -> [gh] 616
|
618 |
+
[ ಅ][ವ] -> [ ಅವ] 617
|
619 |
+
[ �][�] -> [ ತ] 618
|
620 |
+
[ स][्�] -> [ स्�] 619
|
621 |
+
[ क][ा] -> [ का] 620
|
622 |
+
[e][re] -> [ere] 621
|
623 |
+
[ w][or] -> [ wor] 622
|
624 |
+
[om][e] -> [ome] 623
|
625 |
+
[ic][h] -> [ich] 624
|
626 |
+
[ a][b] -> [ ab] 625
|
627 |
+
[�][�] -> [ड] 626
|
628 |
+
[r][ou] -> [rou] 627
|
629 |
+
[ಾ�][�] -> [ಾಯ] 628
|
630 |
+
[ ][O] -> [ O] 629
|
631 |
+
[in][e] -> [ine] 630
|
632 |
+
[ क][र] -> [ कर] 631
|
633 |
+
[�][�] -> [ಶ] 632
|
634 |
+
[�][�] -> [द] 633
|
635 |
+
[o][g] -> [og] 634
|
636 |
+
[a][p] -> [ap] 635
|
637 |
+
[al][l] -> [all] 636
|
638 |
+
[�][�] -> [ॉ] 637
|
639 |
+
[ar][d] -> [ard] 638
|
640 |
+
[ा�][�] -> [ाल] 639
|
641 |
+
[ �][�] -> [ आ] 640
|
642 |
+
[ in][t] -> [ int] 641
|
643 |
+
[ि�][�] -> [िए] 642
|
644 |
+
[्�][�] -> [्त] 643
|
645 |
+
[ou][t] -> [out] 644
|
646 |
+
[ou][r] -> [our] 645
|
647 |
+
[ u][s] -> [ us] 646
|
648 |
+
[ p][l] -> [ pl] 647
|
649 |
+
[u][st] -> [ust] 648
|
650 |
+
[ I][n] -> [ In] 649
|
651 |
+
[ the][ir] -> [ their] 650
|
652 |
+
[ �][�] -> [ ಗ] 651
|
653 |
+
[a][k] -> [ak] 652
|
654 |
+
[ि�][�] -> [िल] 653
|
655 |
+
[i][p] -> [ip] 654
|
656 |
+
[ �][�] -> [ ಜ] 655
|
657 |
+
[ಾಗ][ಿ] -> [ಾಗಿ] 656
|
658 |
+
[ಂ][ತ] -> [ಂತ] 657
|
659 |
+
[ the][y] -> [ they] 658
|
660 |
+
[o][st] -> [ost] 659
|
661 |
+
[ು][ತ್ತ] -> [ುತ್ತ] 660
|
662 |
+
[ w][he] -> [ whe] 661
|
663 |
+
[t][her] -> [ther] 662
|
664 |
+
[ಾ�][�] -> [ಾಂ] 663
|
665 |
+
[at][ed] -> [ated] 664
|
666 |
+
[�][�] -> [ज] 665
|
667 |
+
[ �][�] -> [ त] 666
|
668 |
+
[ �][�] -> [ इ] 667
|
669 |
+
[ ][im] -> [ im] 668
|
670 |
+
[ 2][0] -> [ 20] 669
|
671 |
+
[as][t] -> [ast] 670
|
672 |
+
[್�][�] -> [್ಗ] 671
|
673 |
+
[ �][�] -> [ श] 672
|
674 |
+
[ि�][�] -> [िस] 673
|
675 |
+
[ ][U] -> [ U] 674
|
676 |
+
[ d][o] -> [ do] 675
|
677 |
+
[ ][J] -> [ J] 676
|
678 |
+
[ಿತ][ು] -> [ಿತು] 677
|
679 |
+
[ ][en] -> [ en] 678
|
680 |
+
[ಾ�][�] -> [ಾಲ] 679
|
681 |
+
[ T][h] -> [ Th] 680
|
682 |
+
[್�][�] -> [್ವ] 681
|
683 |
+
[�][�] -> [ಜ] 682
|
684 |
+
[ವ][ನ್ನು] -> [ವನ್ನು] 683
|
685 |
+
[a][re] -> [are] 684
|
686 |
+
[ou][s] -> [ous] 685
|
687 |
+
[ थ][ा] -> [ था] 686
|
688 |
+
[ you][r] -> [ your] 687
|
689 |
+
[ al][l] -> [ all] 688
|
690 |
+
[p][t] -> [pt] 689
|
691 |
+
[a][ct] -> [act] 690
|
692 |
+
[ion][s] -> [ions] 691
|
693 |
+
[्�][�] -> [्व] 692
|
694 |
+
[ m][e] -> [ me] 693
|
695 |
+
[p][l] -> [pl] 694
|
696 |
+
[f][f] -> [ff] 695
|
697 |
+
[ wh][ich] -> [ which] 696
|
698 |
+
[್�][�] -> [್ಸ] 697
|
699 |
+
[ು][ದ] -> [ುದ] 698
|
700 |
+
[on][g] -> [ong] 699
|
701 |
+
[p][er] -> [per] 700
|
702 |
+
[an][s] -> [ans] 701
|
703 |
+
[e][ar] -> [ear] 702
|
704 |
+
[ा�][�] -> [ाम] 703
|
705 |
+
[ h][is] -> [ his] 704
|
706 |
+
[ w][ill] -> [ will] 705
|
707 |
+
[a][ke] -> [ake] 706
|
708 |
+
[ b][ut] -> [ but] 707
|
709 |
+
[ प][र] -> [ पर] 708
|
710 |
+
[ �][�] -> [ ಒ] 709
|
711 |
+
[ �][�] -> [ ದ] 710
|
712 |
+
[ಿ�][�] -> [ಿಲ] 711
|
713 |
+
[ g][o] -> [ go] 712
|
714 |
+
[ h][as] -> [ has] 713
|
715 |
+
[ag][e] -> [age] 714
|
716 |
+
[id][e] -> [ide] 715
|
717 |
+
[ m][ore] -> [ more] 716
|
718 |
+
[c][l] -> [cl] 717
|
719 |
+
[ा�][�] -> [ात] 718
|
720 |
+
[ಿಗ][ೆ] -> [ಿಗೆ] 719
|
721 |
+
[ಂ][ಡ] -> [ಂಡ] 720
|
722 |
+
[त][ा] -> [ता] 721
|
723 |
+
[ ल][िए] -> [ लिए] 722
|
724 |
+
[d][u] -> [du] 723
|
725 |
+
[ we][re] -> [ were] 724
|
726 |
+
[�][�] -> [च] 725
|
727 |
+
[in][d] -> [ind] 726
|
728 |
+
[्�][�] -> [्ट] 727
|
729 |
+
[k][s] -> [ks] 728
|
730 |
+
[ m][an] -> [ man] 729
|
731 |
+
[ com][p] -> [ comp] 730
|
732 |
+
[ರ][್] -> [ರ್] 731
|
733 |
+
[ि�][�] -> [िन] 732
|
734 |
+
[i][b] -> [ib] 733
|
735 |
+
[ re][s] -> [ res] 734
|
736 |
+
[a][v] -> [av] 735
|
737 |
+
[ क][िया] -> [ किया] 736
|
738 |
+
[r][u] -> [ru] 737
|
739 |
+
[ಿಸ][ಲ] -> [ಿಸಲ] 738
|
740 |
+
[ಿದ][್ದ] -> [ಿದ್ದ] 739
|
741 |
+
[ con][t] -> [ cont] 740
|
742 |
+
[ab][le] -> [able] 741
|
743 |
+
[ o][ut] -> [ out] 742
|
744 |
+
[ent][s] -> [ents] 743
|
745 |
+
[ ][j] -> [ j] 744
|
746 |
+
[್�][�] -> [್ಮ] 745
|
747 |
+
[ar][y] -> [ary] 746
|
748 |
+
[r][y] -> [ry] 747
|
749 |
+
[�][�] -> [ण] 748
|
750 |
+
[i][z] -> [iz] 749
|
751 |
+
[ವ][ು] -> [ವು] 750
|
752 |
+
[ h][ad] -> [ had] 751
|
753 |
+
[ ग�][�ा] -> [ गया] 752
|
754 |
+
[ c][l] -> [ cl] 753
|
755 |
+
[्�][�] -> [्ष] 754
|
756 |
+
[ s][o] -> [ so] 755
|
757 |
+
[ a][d] -> [ ad] 756
|
758 |
+
[ he][r] -> [ her] 757
|
759 |
+
[i][e] -> [ie] 758
|
760 |
+
[ �][�] -> [ य] 759
|
761 |
+
[or][m] -> [orm] 760
|
762 |
+
[s][o] -> [so] 761
|
763 |
+
[c][c] -> [cc] 762
|
764 |
+
[ve][l] -> [vel] 763
|
765 |
+
[ಿಂ][ದ] -> [ಿಂದ] 764
|
766 |
+
[i][a] -> [ia] 765
|
767 |
+
[as][s] -> [ass] 766
|
768 |
+
[a][ch] -> [ach] 767
|
769 |
+
[ न][े] -> [ ने] 768
|
770 |
+
[ on][e] -> [ one] 769
|
771 |
+
[ಂದ][ು] -> [ಂದು] 770
|
772 |
+
[il][d] -> [ild] 771
|
773 |
+
[ �][�] -> [ ख] 772
|
774 |
+
[ I][t] -> [ It] 773
|
775 |
+
[ ][3] -> [ 3] 774
|
776 |
+
[ �][�] -> [ भ] 775
|
777 |
+
[ �][�] -> [ च] 776
|
778 |
+
[ul][t] -> [ult] 777
|
779 |
+
[ �][�] -> [ ಉ] 778
|
780 |
+
[i][re] -> [ire] 779
|
781 |
+
[�][�] -> [ಧ] 780
|
782 |
+
[ಾ�][�] -> [ಾಡ] 781
|
783 |
+
[ಾ�][�] -> [ಾಸ] 782
|
784 |
+
[o][o] -> [oo] 783
|
785 |
+
[at][er] -> [ater] 784
|
786 |
+
[ �][�] -> [ ಯ] 785
|
787 |
+
[ o][ther] -> [ other] 786
|
788 |
+
[ wh][o] -> [ who] 787
|
789 |
+
[an][ce] -> [ance] 788
|
790 |
+
[ �][�] -> [ फ] 789
|
791 |
+
[ t][e] -> [ te] 790
|
792 |
+
[on][e] -> [one] 791
|
793 |
+
[en][ce] -> [ence] 792
|
794 |
+
[े][ल] -> [ेल] 793
|
795 |
+
[l][ow] -> [low] 794
|
796 |
+
[ 2][00] -> [ 200] 795
|
797 |
+
[ d][is] -> [ dis] 796
|
798 |
+
[ation][s] -> [ations] 797
|
799 |
+
[ಿದ][ೆ] -> [ಿದೆ] 798
|
800 |
+
[a][ce] -> [ace] 799
|
801 |
+
[�][�] -> [श] 800
|
802 |
+
[b][er] -> [ber] 801
|
803 |
+
[ा�][�] -> [ास] 802
|
804 |
+
[am][e] -> [ame] 803
|
805 |
+
[ದ][ಲ್ಲಿ] -> [ದಲ್ಲಿ] 804
|
806 |
+
[ಾ�][�] -> [ಾವ] 805
|
807 |
+
[p][ort] -> [port] 806
|
808 |
+
[u][al] -> [ual] 807
|
809 |
+
[ 1][8] -> [ 18] 808
|
810 |
+
[ 20][1] -> [ 201] 809
|
811 |
+
[re][at] -> [reat] 810
|
812 |
+
[is][h] -> [ish] 811
|
813 |
+
[n][ow] -> [now] 812
|
814 |
+
[ the][m] -> [ them] 813
|
815 |
+
[ t][im] -> [ tim] 814
|
816 |
+
[್�][�] -> [್ಪ] 815
|
817 |
+
[ al][so] -> [ also] 816
|
818 |
+
[�][�] -> [ಭ] 817
|
819 |
+
[ �][�] -> [ ಶ] 818
|
820 |
+
[ s][a] -> [ sa] 819
|
821 |
+
[an][g] -> [ang] 820
|
822 |
+
[्�][�] -> [्स] 821
|
823 |
+
[ver][y] -> [very] 822
|
824 |
+
[ಕ್�][�] -> [ಕ್ಕ] 823
|
825 |
+
[ उ][न] -> [ उन] 824
|
826 |
+
[ u][n] -> [ un] 825
|
827 |
+
[ ಸ][ಂ] -> [ ಸಂ] 826
|
828 |
+
[್�][�] -> [್ಕ] 827
|
829 |
+
[re][e] -> [ree] 828
|
830 |
+
[ct][ion] -> [ction] 829
|
831 |
+
[್�][�] -> [್ಥ] 830
|
832 |
+
[ಗಳ][ು] -> [ಗಳು] 831
|
833 |
+
[ic][e] -> [ice] 832
|
834 |
+
[ �][�] -> [ ಚ] 833
|
835 |
+
[ u][p] -> [ up] 834
|
836 |
+
[ S][t] -> [ St] 835
|
837 |
+
[ p][er] -> [ per] 836
|
838 |
+
[ p][re] -> [ pre] 837
|
839 |
+
[ಿಸ][ಿದ] -> [ಿಸಿದ] 838
|
840 |
+
[ig][n] -> [ign] 839
|
841 |
+
[�][�] -> [फ] 840
|
842 |
+
[ स][ा�] -> [ सा�] 841
|
843 |
+
[ा�][�] -> [ाद] 842
|
844 |
+
[ೊ][ಂದ] -> [ೊಂದ] 843
|
845 |
+
[ e][v] -> [ ev] 844
|
846 |
+
[ ab][out] -> [ about] 845
|
847 |
+
[�][�] -> [ಚ] 846
|
848 |
+
[ the][re] -> [ there] 847
|
849 |
+
[ಾ�][�] -> [ಾಮ] 848
|
850 |
+
[ s][y] -> [ sy] 849
|
851 |
+
[ p][art] -> [ part] 850
|
852 |
+
[ac][k] -> [ack] 851
|
853 |
+
[ a][r] -> [ ar] 852
|
854 |
+
[ಗಳ][ನ್ನು] -> [ಗಳನ್ನು] 853
|
855 |
+
[c][es] -> [ces] 854
|
856 |
+
[ g][en] -> [ gen] 855
|
857 |
+
[in][t] -> [int] 856
|
858 |
+
[ರ][ೆ] -> [ರೆ] 857
|
859 |
+
[�][�] -> [ध] 858
|
860 |
+
[i][ke] -> [ike] 859
|
861 |
+
[ व][ि�] -> [ वि�] 860
|
862 |
+
[ou][nd] -> [ound] 861
|
863 |
+
[ �][�] -> [ ड] 862
|
864 |
+
[ Th][is] -> [ This] 863
|
865 |
+
[ com][m] -> [ comm] 864
|
866 |
+
[res][s] -> [ress] 865
|
867 |
+
[it][e] -> [ite] 866
|
868 |
+
[o][se] -> [ose] 867
|
869 |
+
[ whe][n] -> [ when] 868
|
870 |
+
[i][le] -> [ile] 869
|
871 |
+
[a][u] -> [au] 870
|
872 |
+
[ou][nt] -> [ount] 871
|
873 |
+
[�][�] -> [ौ] 872
|
874 |
+
[न][ा] -> [ना] 873
|
875 |
+
[ int][o] -> [ into] 874
|
876 |
+
[ s][c] -> [ sc] 875
|
877 |
+
[ है][ं] -> [ हैं] 876
|
878 |
+
[ �][�] -> [ ट] 877
|
879 |
+
[ou][gh] -> [ough] 878
|
880 |
+
[ स][ं�] -> [ सं�] 879
|
881 |
+
[ s][pe] -> [ spe] 880
|
882 |
+
[ s][he] -> [ she] 881
|
883 |
+
[o][od] -> [ood] 882
|
884 |
+
[ol][d] -> [old] 883
|
885 |
+
[ू][प] -> [ूप] 884
|
886 |
+
[ स][म] -> [ सम] 885
|
887 |
+
[ �][�] -> [ ಲ] 886
|
888 |
+
[ st][ud] -> [ stud] 887
|
889 |
+
[ay][s] -> [ays] 888
|
890 |
+
[ s][ome] -> [ some] 889
|
891 |
+
[ th][an] -> [ than] 890
|
892 |
+
[a][il] -> [ail] 891
|
893 |
+
[as][e] -> [ase] 892
|
894 |
+
[nd][er] -> [nder] 893
|
895 |
+
[ p][e] -> [ pe] 894
|
896 |
+
[ ಇ][ದ] -> [ ಇದ] 895
|
897 |
+
[ಿ�][�] -> [ಿವ] 896
|
898 |
+
[ಿಗ][ಳ] -> [ಿಗಳ] 897
|
899 |
+
[if][f] -> [iff] 898
|
900 |
+
[ ][Y] -> [ Y] 899
|
901 |
+
[्�][�] -> [्ल] 900
|
902 |
+
[u][e] -> [ue] 901
|
903 |
+
[್�][�] -> [್ಚ] 902
|
904 |
+
[ �][�] -> [ ಡ] 903
|
905 |
+
[ಲ][್] -> [ಲ್] 904
|
906 |
+
[ h][ow] -> [ how] 905
|
907 |
+
[ s][p] -> [ sp] 906
|
908 |
+
[e][w] -> [ew] 907
|
909 |
+
[en][s] -> [ens] 908
|
910 |
+
[u][b] -> [ub] 909
|
911 |
+
[ n][o] -> [ no] 910
|
912 |
+
[ be][en] -> [ been] 911
|
913 |
+
[ t][w] -> [ tw] 912
|
914 |
+
[ ][K] -> [ K] 913
|
915 |
+
[ k][now] -> [ know] 914
|
916 |
+
[it][ion] -> [ition] 915
|
917 |
+
[ इ][स] -> [ इस] 916
|
918 |
+
[ be][c] -> [ bec] 917
|
919 |
+
[)][.] -> [).] 918
|
920 |
+
[ಂ][ಬ] -> [ಂಬ] 919
|
921 |
+
[ tim][e] -> [ time] 920
|
922 |
+
[ y][ear] -> [ year] 921
|
923 |
+
[ा�][�] -> [ां] 922
|
924 |
+
[ ][4] -> [ 4] 923
|
925 |
+
[k][ing] -> [king] 924
|
926 |
+
[or][y] -> [ory] 925
|
927 |
+
[e][p] -> [ep] 926
|
928 |
+
[ ][if] -> [ if] 927
|
929 |
+
[ o][ver] -> [ over] 928
|
930 |
+
[ा�][�] -> [ाइ] 929
|
931 |
+
[or][d] -> [ord] 930
|
932 |
+
[ सा�][�] -> [ साथ] 931
|
933 |
+
[ ][ra] -> [ ra] 932
|
934 |
+
[ಗ][ೆ] -> [ಗೆ] 933
|
935 |
+
[ ह][ो] -> [ हो] 934
|
936 |
+
[ in][f] -> [ inf] 935
|
937 |
+
[or][s] -> [ors] 936
|
938 |
+
[ र][ूप] -> [ रूप] 937
|
939 |
+
[ ][qu] -> [ qu] 938
|
940 |
+
[ ಮ][ಾಡ] -> [ ಮಾಡ] 939
|
941 |
+
[ f][ir] -> [ fir] 940
|
942 |
+
[ಾಯ][ಿತು] -> [ಾಯಿತು] 941
|
943 |
+
[ಾ�][�] -> [ಾಕ] 942
|
944 |
+
[ा�][�] -> [ाप] 943
|
945 |
+
[ d][iff] -> [ diff] 944
|
946 |
+
[st][em] -> [stem] 945
|
947 |
+
[i][an] -> [ian] 946
|
948 |
+
[ C][h] -> [ Ch] 947
|
949 |
+
[op][le] -> [ople] 948
|
950 |
+
[ �][�] -> [ ಈ] 949
|
951 |
+
[ a][g] -> [ ag] 950
|
952 |
+
[त][्र] -> [त्र] 951
|
953 |
+
[t][e] -> [te] 952
|
954 |
+
[ l][ike] -> [ like] 953
|
955 |
+
[w][n] -> [wn] 954
|
956 |
+
[p][le] -> [ple] 955
|
957 |
+
[ be][t] -> [ bet] 956
|
958 |
+
[ w][ould] -> [ would] 957
|
959 |
+
[ h][im] -> [ him] 958
|
960 |
+
[ा�][�] -> [ाज] 959
|
961 |
+
[on][d] -> [ond] 960
|
962 |
+
[n][ing] -> [ning] 961
|
963 |
+
[ a][pp] -> [ app] 962
|
964 |
+
[ m][ay] -> [ may] 963
|
965 |
+
[m][er] -> [mer] 964
|
966 |
+
[ೇ][ರ] -> [ೇರ] 965
|
967 |
+
[l][l] -> [ll] 966
|
968 |
+
[ m][y] -> [ my] 967
|
969 |
+
[rou][gh] -> [rough] 968
|
970 |
+
[ ][ro] -> [ ro] 969
|
971 |
+
[ ज][ो] -> [ जो] 970
|
972 |
+
[�][�] -> [ೌ] 971
|
973 |
+
[ic][k] -> [ick] 972
|
974 |
+
[f][ter] -> [fter] 973
|
975 |
+
[i][ous] -> [ious] 974
|
976 |
+
[ an][y] -> [ any] 975
|
977 |
+
[ದ][ೆ] -> [ದೆ] 976
|
978 |
+
[ड][़] -> [ड़] 977
|
979 |
+
[ wh][at] -> [ what] 978
|
980 |
+
[c][re] -> [cre] 979
|
981 |
+
[ಕ್�][�] -> [ಕ್ಷ] 980
|
982 |
+
[ �][�] -> [ ಟ] 981
|
983 |
+
[ अ][प] -> [ अप] 982
|
984 |
+
[ವ][ಾಗಿ] -> [ವಾಗಿ] 983
|
985 |
+
[ it][s] -> [ its] 984
|
986 |
+
[er][v] -> [erv] 985
|
987 |
+
[್ತ][ು] -> [್ತು] 986
|
988 |
+
[ु][र] -> [ुर] 987
|
989 |
+
[ಲ][ು] -> [ಲು] 988
|
990 |
+
[o][ve] -> [ove] 989
|
991 |
+
[ in][d] -> [ ind] 990
|
992 |
+
[ो][ग] -> [ोग] 991
|
993 |
+
[ o][ur] -> [ our] 992
|
994 |
+
[र][ी] -> [री] 993
|
995 |
+
[ u][se] -> [ use] 994
|
996 |
+
[ of][f] -> [ off] 995
|
997 |
+
[)][,] -> [),] 996
|
998 |
+
[on][s] -> [ons] 997
|
999 |
+
[ the][se] -> [ these] 998
|
1000 |
+
[ re][c] -> [ rec] 999
|
1001 |
+
[ ಅವ][ರು] -> [ ಅವರು] 1000
|
1002 |
+
[्�][�] -> [्ह] 1001
|
1003 |
+
[ing][s] -> [ings] 1002
|
1004 |
+
[ ][V] -> [ V] 1003
|
1005 |
+
[ pe][ople] -> [ people] 1004
|
1006 |
+
[re][n] -> [ren] 1005
|
1007 |
+
[ a][ct] -> [ act] 1006
|
1008 |
+
[�][�] -> [ಷ] 1007
|
1009 |
+
[ol][og] -> [olog] 1008
|
1010 |
+
[r][it] -> [rit] 1009
|
1011 |
+
[ u][nder] -> [ under] 1010
|
1012 |
+
[ಟ][್ಟ] -> [ಟ್ಟ] 1011
|
1013 |
+
[ A][s] -> [ As] 1012
|
1014 |
+
[ಾ�][�] -> [ಾಜ] 1013
|
1015 |
+
[p][h] -> [ph] 1014
|
1016 |
+
[w][e] -> [we] 1015
|
1017 |
+
[ d][es] -> [ des] 1016
|
1018 |
+
[ಿ�][�] -> [ಿಮ] 1017
|
1019 |
+
[ೇ][ಶ] -> [ೇಶ] 1018
|
1020 |
+
[ tw][o] -> [ two] 1019
|
1021 |
+
[ �][�] -> [ ಭ] 1020
|
1022 |
+
[ ][5] -> [ 5] 1021
|
1023 |
+
[ प्र][त] -> [ प्रत] 1022
|
1024 |
+
[al][ly] -> [ally] 1023
|
1025 |
+
[y][s] -> [ys] 1024
|
1026 |
+
[l][es] -> [les] 1025
|
1027 |
+
[se][l] -> [sel] 1026
|
1028 |
+
[ t][ra] -> [ tra] 1027
|
1029 |
+
[ೆ�][�] -> [ೆಸ] 1028
|
1030 |
+
[ he][l] -> [ hel] 1029
|
1031 |
+
[ा�][�] -> [ाय] 1030
|
1032 |
+
[is][s] -> [iss] 1031
|
1033 |
+
[j][ect] -> [ject] 1032
|
1034 |
+
[ sy][stem] -> [ system] 1033
|
1035 |
+
[ c][o] -> [ co] 1034
|
1036 |
+
[m][s] -> [ms] 1035
|
1037 |
+
[ा][र्�] -> [ार्�] 1036
|
1038 |
+
[ ne][w] -> [ new] 1037
|
1039 |
+
[ಂ][ಟ] -> [ಂಟ] 1038
|
1040 |
+
[le][ct] -> [lect] 1039
|
1041 |
+
[ic][t] -> [ict] 1040
|
1042 |
+
[ H][e] -> [ He] 1041
|
1043 |
+
[क][्ष] -> [क्ष] 1042
|
1044 |
+
[ re][l] -> [ rel] 1043
|
1045 |
+
[ hel][p] -> [ help] 1044
|
1046 |
+
[it][ies] -> [ities] 1045
|
1047 |
+
[au][se] -> [ause] 1046
|
1048 |
+
[्�][�] -> [्क] 1047
|
1049 |
+
[ar][k] -> [ark] 1048
|
1050 |
+
[at][es] -> [ates] 1049
|
1051 |
+
[ on][ly] -> [ only] 1050
|
1052 |
+
[ೊ][ಂಡ] -> [ೊಂಡ] 1051
|
1053 |
+
[ೆ][ಲ] -> [ೆಲ] 1052
|
1054 |
+
[ver][s] -> [vers] 1053
|
1055 |
+
[ m][ost] -> [ most] 1054
|
1056 |
+
[�][�] -> [भ] 1055
|
1057 |
+
[�][�] -> [ई] 1056
|
1058 |
+
[is][e] -> [ise] 1057
|
1059 |
+
[cl][ud] -> [clud] 1058
|
1060 |
+
[ ch][ild] -> [ child] 1059
|
1061 |
+
[ी][य] -> [ीय] 1060
|
1062 |
+
[ fir][st] -> [ first] 1061
|
1063 |
+
[ con][s] -> [ cons] 1062
|
1064 |
+
[ं�][�] -> [ंड] 1063
|
1065 |
+
[ t][r] -> [ tr] 1064
|
1066 |
+
[ W][h] -> [ Wh] 1065
|
1067 |
+
[ಂತ][ರ] -> [ಂತರ] 1066
|
1068 |
+
[ sh][ould] -> [ should] 1067
|
1069 |
+
[ul][ar] -> [ular] 1068
|
1070 |
+
[ ನ][ಿರ] -> [ ನಿರ] 1069
|
1071 |
+
[ಸ][್] -> [ಸ್] 1070
|
1072 |
+
[ ವ][ಿ�] -> [ ವಿ�] 1071
|
1073 |
+
[oo][k] -> [ook] 1072
|
1074 |
+
[f][ul] -> [ful] 1073
|
1075 |
+
[ುತ್ತ][ದೆ] -> [ುತ್ತದೆ] 1074
|
1076 |
+
[ವ][ಾಗ] -> [ವಾಗ] 1075
|
1077 |
+
[al][th] -> [alth] 1076
|
1078 |
+
[ man][y] -> [ many] 1077
|
1079 |
+
[ಕ್ಕ][ೆ] -> [ಕ್ಕೆ] 1078
|
1080 |
+
[ अ][न] -> [ अन] 1079
|
1081 |
+
[ा�][�] -> [ाव] 1080
|
1082 |
+
[್�][�] -> [್ಬ] 1081
|
1083 |
+
[ b][l] -> [ bl] 1082
|
1084 |
+
[�][�] -> [ಖ] 1083
|
1085 |
+
[|][|] -> [||] 1084
|
1086 |
+
[ p][o] -> [ po] 1085
|
1087 |
+
[ರ][ಣ] -> [ರಣ] 1086
|
1088 |
+
[�][�] -> [ೃ] 1087
|
1089 |
+
[ a][m] -> [ am] 1088
|
1090 |
+
[i][et] -> [iet] 1089
|
1091 |
+
[ಾ�][�] -> [ಾತ] 1090
|
1092 |
+
[ter][n] -> [tern] 1091
|
1093 |
+
[he][d] -> [hed] 1092
|
1094 |
+
[ प][ह] -> [ पह] 1093
|
1095 |
+
[ल][े] -> [ले] 1094
|
1096 |
+
[ c][ol] -> [ col] 1095
|
1097 |
+
[क][े] -> [के] 1096
|
1098 |
+
[ ][6] -> [ 6] 1097
|
1099 |
+
[�][�] -> [ए] 1098
|
1100 |
+
[ diff][ere] -> [ differe] 1099
|
1101 |
+
[ f][l] -> [ fl] 1100
|
1102 |
+
[1][9] -> [19] 1101
|
1103 |
+
[त][े] -> [ते] 1102
|
1104 |
+
[ m][od] -> [ mod] 1103
|
1105 |
+
[ ज][िस] -> [ जिस] 1104
|
1106 |
+
[ ಅವ][ರ] -> [ ಅವರ] 1105
|
1107 |
+
[ಾ�][�] -> [ಾಪ] 1106
|
1108 |
+
[c][ed] -> [ced] 1107
|
1109 |
+
[ spe][c] -> [ spec] 1108
|
1110 |
+
[ c][ould] -> [ could] 1109
|
1111 |
+
[if][e] -> [ife] 1110
|
1112 |
+
[್ಯ][ೂ] -> [್ಯೂ] 1111
|
1113 |
+
[c][ess] -> [cess] 1112
|
1114 |
+
[x][t] -> [xt] 1113
|
1115 |
+
[vel][op] -> [velop] 1114
|
1116 |
+
[ wor][k] -> [ work] 1115
|
1117 |
+
[�][�] -> [ृ] 1116
|
1118 |
+
[ कर][ने] -> [ करने] 1117
|
1119 |
+
[at][ing] -> [ating] 1118
|
1120 |
+
[ su][ch] -> [ such] 1119
|
1121 |
+
[ द][्व] -> [ द्व] 1120
|
1122 |
+
[if][ic] -> [ific] 1121
|
1123 |
+
[ೆಯ][ನ್ನು] -> [ೆಯನ್ನು] 1122
|
1124 |
+
[ಿತ][್ತು] -> [ಿತ್ತು] 1123
|
1125 |
+
[ f][e] -> [ fe] 1124
|
1126 |
+
[ार][ा] -> [ारा] 1125
|
1127 |
+
[ ][ ] -> [ ] 1126
|
1128 |
+
[ o][b] -> [ ob] 1127
|
1129 |
+
[f][ore] -> [fore] 1128
|
1130 |
+
[ �][�] -> [ ಫ] 1129
|
1131 |
+
[ उन][्ह] -> [ उन्ह] 1130
|
1132 |
+
[न][ी] -> [नी] 1131
|
1133 |
+
[re][d] -> [red] 1132
|
1134 |
+
[ff][ect] -> [ffect] 1133
|
1135 |
+
[at][ive] -> [ative] 1134
|
1136 |
+
[ೊ][ಳ] -> [ೊಳ] 1135
|
1137 |
+
[ थ][े] -> [ थे] 1136
|
1138 |
+
[ a][cc] -> [ acc] 1137
|
1139 |
+
[ p][r] -> [ pr] 1138
|
1140 |
+
[ th][rough] -> [ through] 1139
|
1141 |
+
[ w][ell] -> [ well] 1140
|
1142 |
+
[ಿರ][ುವ] -> [ಿರುವ] 1141
|
1143 |
+
[े�][�] -> [ेश] 1142
|
1144 |
+
[ s][ur] -> [ sur] 1143
|
1145 |
+
[i][ed] -> [ied] 1144
|
1146 |
+
[ g][et] -> [ get] 1145
|
1147 |
+
[ स्�][�] -> [ स्थ] 1146
|
1148 |
+
[he][n] -> [hen] 1147
|
1149 |
+
[o][v] -> [ov] 1148
|
1150 |
+
[er][t] -> [ert] 1149
|
1151 |
+
[ pro][du] -> [ produ] 1150
|
1152 |
+
[ a][fter] -> [ after] 1151
|
1153 |
+
[ sa][id] -> [ said] 1152
|
1154 |
+
[ pro][v] -> [ prov] 1153
|
1155 |
+
[ in][clud] -> [ includ] 1154
|
1156 |
+
[�][�] -> [ಫ] 1155
|
1157 |
+
[्�][�] -> [्प] 1156
|
1158 |
+
[ation][al] -> [ational] 1157
|
1159 |
+
[ure][s] -> [ures] 1158
|
1160 |
+
[os][s] -> [oss] 1159
|
1161 |
+
[स][े] -> [से] 1160
|
1162 |
+
[ p][h] -> [ ph] 1161
|
1163 |
+
[ थ][ी] -> [ थी] 1162
|
1164 |
+
[ w][ater] -> [ water] 1163
|
1165 |
+
[ the][n] -> [ then] 1164
|
1166 |
+
[ad][e] -> [ade] 1165
|
1167 |
+
[ನ][ು] -> [ನು] 1166
|
1168 |
+
[ re][m] -> [ rem] 1167
|
1169 |
+
[u][c] -> [uc] 1168
|
1170 |
+
[ow][n] -> [own] 1169
|
1171 |
+
[ur][n] -> [urn] 1170
|
1172 |
+
[್�][�] -> [್ಡ] 1171
|
1173 |
+
[ೇ][ಲ] -> [ೇಲ] 1172
|
1174 |
+
[t][ing] -> [ting] 1173
|
1175 |
+
[ot][h] -> [oth] 1174
|
1176 |
+
[b][le] -> [ble] 1175
|
1177 |
+
[ us][ed] -> [ used] 1176
|
1178 |
+
[at][h] -> [ath] 1177
|
1179 |
+
[್ಗ][ಳ] -> [್ಗಳ] 1178
|
1180 |
+
[ा�][�] -> [ाई] 1179
|
1181 |
+
[al][s] -> [als] 1180
|
1182 |
+
[and][s] -> [ands] 1181
|
1183 |
+
[ as][s] -> [ ass] 1182
|
1184 |
+
[ar][n] -> [arn] 1183
|
1185 |
+
[rou][nd] -> [round] 1184
|
1186 |
+
[ ಎ][ಂದು] -> [ ಎಂದು] 1185
|
1187 |
+
[l][ic] -> [lic] 1186
|
1188 |
+
[ R][e] -> [ Re] 1187
|
1189 |
+
[ ज][ात] -> [ जात] 1188
|
1190 |
+
[�][�] -> [उ] 1189
|
1191 |
+
[ re][g] -> [ reg] 1190
|
1192 |
+
[i][ent] -> [ient] 1191
|
1193 |
+
[in][k] -> [ink] 1192
|
1194 |
+
[ year][s] -> [ years] 1193
|
1195 |
+
[c][y] -> [cy] 1194
|
1196 |
+
[we][en] -> [ween] 1195
|
1197 |
+
[t][le] -> [tle] 1196
|
1198 |
+
[ de][velop] -> [ develop] 1197
|
1199 |
+
[ಿಸಿದ][ರು] -> [ಿಸಿದರು] 1198
|
1200 |
+
[ar][s] -> [ars] 1199
|
1201 |
+
[ ನ][ಂತರ] -> [ ನಂತರ] 1200
|
1202 |
+
[in][es] -> [ines] 1201
|
1203 |
+
[ಿಸ][ುವ] -> [ಿಸುವ] 1202
|
1204 |
+
[ U][n] -> [ Un] 1203
|
1205 |
+
[ ಕ][ಾರ] -> [ ಕಾರ] 1204
|
1206 |
+
[ೆ�][�] -> [ೆಚ] 1205
|
1207 |
+
[ि�][�] -> [िं] 1206
|
1208 |
+
[ द्व][ारा] -> [ द्वारा] 1207
|
1209 |
+
[ment][s] -> [ments] 1208
|
1210 |
+
[ಾ�][�] -> [ಾಣ] 1209
|
1211 |
+
[e][nd] -> [end] 1210
|
1212 |
+
[ri][b] -> [rib] 1211
|
1213 |
+
[ic][s] -> [ics] 1212
|
1214 |
+
[ s][m] -> [ sm] 1213
|
1215 |
+
[ su][b] -> [ sub] 1214
|
1216 |
+
[्�][�] -> [्म] 1215
|
1217 |
+
[ ಸ][ಮ] -> [ ಸಮ] 1216
|
1218 |
+
[ ne][ed] -> [ need] 1217
|
1219 |
+
[ भ][ी] -> [ भी] 1218
|
1220 |
+
[he][s] -> [hes] 1219
|
1221 |
+
[n][e] -> [ne] 1220
|
1222 |
+
[g][an] -> [gan] 1221
|
1223 |
+
[ su][pp] -> [ supp] 1222
|
1224 |
+
[u][ch] -> [uch] 1223
|
1225 |
+
[ur][ing] -> [uring] 1224
|
1226 |
+
[ವ][ೆ] -> [ವೆ] 1225
|
1227 |
+
[ र][ाज] -> [ राज] 1226
|
1228 |
+
[o][b] -> [ob] 1227
|
1229 |
+
[ ಇದ][ು] -> [ ಇದು] 1228
|
1230 |
+
[ೆಚ][್ಚ] -> [ೆಚ್ಚ] 1229
|
1231 |
+
[ ಸ][್ಥ] -> [ ಸ್ಥ] 1230
|
1232 |
+
[ whe][re] -> [ where] 1231
|
1233 |
+
[ im][p] -> [ imp] 1232
|
1234 |
+
[ar][ch] -> [arch] 1233
|
1235 |
+
[್�][�] -> [್ಧ] 1234
|
1236 |
+
[ು][ರ] -> [ುರ] 1235
|
1237 |
+
[ere][d] -> [ered] 1236
|
1238 |
+
[ a][c] -> [ ac] 1237
|
1239 |
+
[ व][र्�] -> [ वर्�] 1238
|
1240 |
+
[िय][ों] -> [ियों] 1239
|
1241 |
+
[ot][her] -> [other] 1240
|
1242 |
+
[ �][�] -> [ –] 1241
|
1243 |
+
[ bet][ween] -> [ between] 1242
|
1244 |
+
[an][ge] -> [ange] 1243
|
1245 |
+
[t][en] -> [ten] 1244
|
1246 |
+
[ at][t] -> [ att] 1245
|
1247 |
+
[್ಸ][್] -> [್ಸ್] 1246
|
1248 |
+
[ ಮ][ೂ] -> [ ಮೂ] 1247
|
1249 |
+
[ो][ल] -> [ोल] 1248
|
1250 |
+
[ स][ी] -> [ सी] 1249
|
1251 |
+
[ib][le] -> [ible] 1250
|
1252 |
+
[ೊಂದ][ಿಗೆ] -> [ೊಂದಿಗೆ] 1251
|
1253 |
+
[er][m] -> [erm] 1252
|
1254 |
+
[ಗಳ][ಲ್ಲಿ] -> [ಗಳಲ್ಲಿ] 1253
|
1255 |
+
[ in][v] -> [ inv] 1254
|
1256 |
+
[ 19][9] -> [ 199] 1255
|
1257 |
+
[ m][ake] -> [ make] 1256
|
1258 |
+
[2][0] -> [20] 1257
|
1259 |
+
[ अ][ध] -> [ अध] 1258
|
1260 |
+
[ य][ह] -> [ यह] 1259
|
1261 |
+
[ೆ][ರ] -> [ೆರ] 1260
|
1262 |
+
[ क][ि] -> [ कि] 1261
|
1263 |
+
[ ಮ][ು] -> [ ಮು] 1262
|
1264 |
+
[r][ic] -> [ric] 1263
|
1265 |
+
[ p][res] -> [ pres] 1264
|
1266 |
+
[ w][ay] -> [ way] 1265
|
1267 |
+
[et][h] -> [eth] 1266
|
1268 |
+
[ W][e] -> [ We] 1267
|
1269 |
+
[ ನ][ೀ] -> [ ನೀ] 1268
|
1270 |
+
[n][g] -> [ng] 1269
|
1271 |
+
[े][क] -> [ेक] 1270
|
1272 |
+
[ l][ong] -> [ long] 1271
|
1273 |
+
[ि�][�] -> [िम] 1272
|
1274 |
+
[े][न] -> [ेन] 1273
|
1275 |
+
[ in][ter] -> [ inter] 1274
|
1276 |
+
[ ad][d] -> [ add] 1275
|
1277 |
+
[ ][8] -> [ 8] 1276
|
1278 |
+
[ ज][ी] -> [ जी] 1277
|
1279 |
+
[ ex][am] -> [ exam] 1278
|
1280 |
+
[il][ity] -> [ility] 1279
|
1281 |
+
[p][s] -> [ps] 1280
|
1282 |
+
[ ह][ु�] -> [ हु�] 1281
|
1283 |
+
[it][s] -> [its] 1282
|
1284 |
+
[ಿಯ][ನ್] -> [ಿಯನ್] 1283
|
1285 |
+
[ro][w] -> [row] 1284
|
1286 |
+
[ l][ife] -> [ life] 1285
|
1287 |
+
[pe][ct] -> [pect] 1286
|
1288 |
+
[ for][m] -> [ form] 1287
|
1289 |
+
[ ][9] -> [ 9] 1288
|
1290 |
+
[ಿಲ][್ಲ] -> [ಿಲ್ಲ] 1289
|
1291 |
+
[ow][er] -> [ower] 1290
|
1292 |
+
[े�][�] -> [ेट] 1291
|
1293 |
+
[ f][ound] -> [ found] 1292
|
1294 |
+
[ उ][प] -> [ उप] 1293
|
1295 |
+
[as][h] -> [ash] 1294
|
1296 |
+
[ I][f] -> [ If] 1295
|
1297 |
+
[ु][ल] -> [ुल] 1296
|
1298 |
+
[�][�] -> [–] 1297
|
1299 |
+
[ gen][er] -> [ gener] 1298
|
1300 |
+
[ಂ][ಗ] -> [ಂಗ] 1299
|
1301 |
+
[ b][u] -> [ bu] 1300
|
1302 |
+
[ೆ][ಗೆ] -> [ೆಗೆ] 1301
|
1303 |
+
[o][y] -> [oy] 1302
|
1304 |
+
[as][ed] -> [ased] 1303
|
1305 |
+
[ in][st] -> [ inst] 1304
|
1306 |
+
[ j][ust] -> [ just] 1305
|
1307 |
+
[ ಪ][ರ] -> [ ಪರ] 1306
|
1308 |
+
[ ev][en] -> [ even] 1307
|
1309 |
+
[ differe][nt] -> [ different] 1308
|
1310 |
+
[ಿ�][�] -> [ಿಡ] 1309
|
1311 |
+
[ se][e] -> [ see] 1310
|
1312 |
+
[ ][7] -> [ 7] 1311
|
1313 |
+
[ द][िया] -> [ दिया] 1312
|
1314 |
+
[ ][ed] -> [ ed] 1313
|
1315 |
+
[ ख][ेल] -> [ खेल] 1314
|
1316 |
+
[ p][ers] -> [ pers] 1315
|
1317 |
+
[ H][ow] -> [ How] 1316
|
1318 |
+
[ c][all] -> [ call] 1317
|
1319 |
+
[in][s] -> [ins] 1318
|
1320 |
+
[�][�] -> [ಥ] 1319
|
1321 |
+
[ ಮ][ೊ] -> [ ಮೊ] 1320
|
1322 |
+
[e][ver] -> [ever] 1321
|
1323 |
+
[ों][ने] -> [ोंने] 1322
|
1324 |
+
[ o][wn] -> [ own] 1323
|
1325 |
+
[ ब][ाद] -> [ बाद] 1324
|
1326 |
+
[ th][ose] -> [ those] 1325
|
1327 |
+
[ e][m] -> [ em] 1326
|
1328 |
+
[ ಹ][ೆಚ್ಚ] -> [ ಹೆಚ್ಚ] 1327
|
1329 |
+
[ra][m] -> [ram] 1328
|
1330 |
+
[ch][n] -> [chn] 1329
|
1331 |
+
[ುದ][ು] -> [ುದು] 1330
|
1332 |
+
[ ಆ][ದ] -> [ ಆದ] 1331
|
1333 |
+
[ि�][�] -> [��व] 1332
|
1334 |
+
[sel][f] -> [self] 1333
|
1335 |
+
[mer][ic] -> [meric] 1334
|
1336 |
+
[ ][very] -> [ very] 1335
|
1337 |
+
[f][orm] -> [form] 1336
|
1338 |
+
[f][t] -> [ft] 1337
|
1339 |
+
[od][y] -> [ody] 1338
|
1340 |
+
[ n][um] -> [ num] 1339
|
1341 |
+
[ t][y] -> [ ty] 1340
|
1342 |
+
[उ][प] -> [उप] 1341
|
1343 |
+
[ೀ][ಯ] -> [ೀಯ] 1342
|
1344 |
+
[ tr][ans] -> [ trans] 1343
|
1345 |
+
[or][n] -> [orn] 1344
|
1346 |
+
[ಡ][್] -> [ಡ್] 1345
|
1347 |
+
[an][y] -> [any] 1346
|
1348 |
+
[ 1][0] -> [ 10] 1347
|
1349 |
+
[g][et] -> [get] 1348
|
1350 |
+
[ar][ly] -> [arly] 1349
|
1351 |
+
[o][ol] -> [ool] 1350
|
1352 |
+
[ ಹ][ೊಂದ] -> [ ಹೊಂದ] 1351
|
1353 |
+
[ant][s] -> [ants] 1352
|
1354 |
+
[್ನ][ಲ್ಲಿ] -> [್ನಲ್ಲಿ] 1353
|
1355 |
+
[ं][त] -> [ंत] 1354
|
1356 |
+
[ त][क] -> [ तक] 1355
|
1357 |
+
[a][w] -> [aw] 1356
|
1358 |
+
[ he][alth] -> [ health] 1357
|
1359 |
+
[r][on] -> [ron] 1358
|
1360 |
+
[ಿಸಲ][ು] -> [ಿಸಲು] 1359
|
1361 |
+
[ C][om] -> [ Com] 1360
|
1362 |
+
[ak][es] -> [akes] 1361
|
1363 |
+
[क][ार] -> [कार] 1362
|
1364 |
+
[ak][ing] -> [aking] 1363
|
1365 |
+
[ be][ing] -> [ being] 1364
|
1366 |
+
[ा�][�] -> [ाक] 1365
|
1367 |
+
[ in][cre] -> [ incre] 1366
|
1368 |
+
[र][ण] -> [रण] 1367
|
1369 |
+
[il][y] -> [ily] 1368
|
1370 |
+
[ e][very] -> [ every] 1369
|
1371 |
+
[ू][ल] -> [ूल] 1370
|
1372 |
+
[ ಹ][ೊ] -> [ ಹೊ] 1371
|
1373 |
+
[ಲ][್ಗ] -> [ಲ್ಗ] 1372
|
1374 |
+
[ ವ][್ಯ] -> [ ವ್ಯ] 1373
|
1375 |
+
[ಾ�][�] -> [ಾಟ] 1374
|
1376 |
+
[ to][o] -> [ too] 1375
|
1377 |
+
[t][y] -> [ty] 1376
|
1378 |
+
[�][्ट] -> [�्ट] 1377
|
1379 |
+
[ा�][�] -> [ाह] 1378
|
1380 |
+
[iv][es] -> [ives] 1379
|
1381 |
+
[1][0] -> [10] 1380
|
1382 |
+
[ra][ct] -> [ract] 1381
|
1383 |
+
[ical][ly] -> [ically] 1382
|
1384 |
+
[್ಯ][ಾಂ] -> [್ಯಾಂ] 1383
|
1385 |
+
[ d][et] -> [ det] 1384
|
1386 |
+
[ P][ro] -> [ Pro] 1385
|
1387 |
+
[ h][um] -> [ hum] 1386
|
1388 |
+
[t][s] -> [ts] 1387
|
1389 |
+
[ h][igh] -> [ high] 1388
|
1390 |
+
[ ಸ][ಹ] -> [ ಸಹ] 1389
|
1391 |
+
[ ಅ][ನ್ನು] -> [ ಅನ್ನು] 1390
|
1392 |
+
[c][k] -> [ck] 1391
|
1393 |
+
[ ][ent] -> [ ent] 1392
|
1394 |
+
[ The][re] -> [ There] 1393
|
1395 |
+
[ಾ�][�] -> [ಾಹ] 1394
|
1396 |
+
[ಸ][್ತ] -> [ಸ್ತ] 1395
|
1397 |
+
[ A][meric] -> [ Americ] 1396
|
1398 |
+
[ l][ar] -> [ lar] 1397
|
1399 |
+
[ be][fore] -> [ before] 1398
|
1400 |
+
[ವ][ಾದ] -> [ವಾದ] 1399
|
1401 |
+
[00][0] -> [000] 1400
|
1402 |
+
[ह][ी] -> [ही] 1401
|
1403 |
+
[ ex][p] -> [ exp] 1402
|
1404 |
+
[ B][ut] -> [ But] 1403
|
1405 |
+
[क][्त] -> [क्त] 1404
|
1406 |
+
[ e][ach] -> [ each] 1405
|
1407 |
+
[ ex][per] -> [ exper] 1406
|
1408 |
+
[oc][k] -> [ock] 1407
|
1409 |
+
[ ಅ][ದ] -> [ ಅದ] 1408
|
1410 |
+
[i][ence] -> [ience] 1409
|
1411 |
+
[ m][uch] -> [ much] 1410
|
1412 |
+
[ist][s] -> [ists] 1411
|
1413 |
+
[ व][ाल] -> [ वाल] 1412
|
1414 |
+
[id][ent] -> [ident] 1413
|
1415 |
+
[ಿ�][�] -> [ಿಟ] 1414
|
1416 |
+
[n][ess] -> [ness] 1415
|
1417 |
+
[ c][ount] -> [ count] 1416
|
1418 |
+
[ s][im] -> [ sim] 1417
|
1419 |
+
[ ][ke] -> [ ke] 1418
|
1420 |
+
[ S][e] -> [ Se] 1419
|
1421 |
+
[ar][m] -> [arm] 1420
|
1422 |
+
[ ಜ][ನ] -> [ ಜನ] 1421
|
1423 |
+
[ा�][�] -> [ाग] 1422
|
1424 |
+
[ut][ion] -> [ution] 1423
|
1425 |
+
[ಗ][್] -> [ಗ್] 1424
|
1426 |
+
[ि][र्�] -> [िर्�] 1425
|
1427 |
+
[ f][ol] -> [ fol] 1426
|
1428 |
+
[ e][nd] -> [ end] 1427
|
1429 |
+
[er][g] -> [erg] 1428
|
1430 |
+
[ಡ][ೆ] -> [ಡೆ] 1429
|
1431 |
+
[ ][|] -> [ |] 1430
|
1432 |
+
[ im][port] -> [ import] 1431
|
1433 |
+
[ de][c] -> [ dec] 1432
|
1434 |
+
[ child][ren] -> [ children] 1433
|
1435 |
+
[ f][ind] -> [ find] 1434
|
1436 |
+
[ re][f] -> [ ref] 1435
|
1437 |
+
[ wor][ld] -> [ world] 1436
|
1438 |
+
[ The][y] -> [ They] 1437
|
1439 |
+
[ನ][ೆಯ] -> [ನೆಯ] 1438
|
1440 |
+
[ೆ�][�] -> [ೆಟ] 1439
|
1441 |
+
[ज][़] -> [ज़] 1440
|
1442 |
+
[ re][ad] -> [ read] 1441
|
1443 |
+
[ m][ade] -> [ made] 1442
|
1444 |
+
[ದ][ಲ] -> [ದಲ] 1443
|
1445 |
+
[ राज][्य] -> [ राज्य] 1444
|
1446 |
+
[ do][es] -> [ does] 1445
|
1447 |
+
[ual][ly] -> [ually] 1446
|
1448 |
+
[ s][oc] -> [ soc] 1447
|
1449 |
+
[olog][y] -> [ology] 1448
|
1450 |
+
[ A][l] -> [ Al] 1449
|
1451 |
+
[ n][ow] -> [ now] 1450
|
1452 |
+
[ಿದ][ರು] -> [ಿದರು] 1451
|
1453 |
+
[े][र] -> [ेर] 1452
|
1454 |
+
[ं�][�] -> [ंद] 1453
|
1455 |
+
[ go][od] -> [ good] 1454
|
1456 |
+
[ जात][ा] -> [ जाता] 1455
|
1457 |
+
[ಾನ][್] -> [ಾನ್] 1456
|
1458 |
+
[ s][k] -> [ sk] 1457
|
1459 |
+
[ d][own] -> [ down] 1458
|
1460 |
+
[er][n] -> [ern] 1459
|
1461 |
+
[ e][ffect] -> [ effect] 1460
|
1462 |
+
[at][a] -> [ata] 1461
|
1463 |
+
[ೇ][ಕ] -> [ೇಕ] 1462
|
1464 |
+
[ उन्ह][ोंने] -> [ उन्होंने] 1463
|
1465 |
+
[त][ी] -> [ती] 1464
|
1466 |
+
[ c][re] -> [ cre] 1465
|
1467 |
+
[en][e] -> [ene] 1466
|
1468 |
+
[ೋ][ಜ] -> [ೋಜ] 1467
|
1469 |
+
[ू][र्�] -> [ूर्�] 1468
|
1470 |
+
[ಿ�][�] -> [ಿಷ] 1469
|
1471 |
+
[at][ure] -> [ature] 1470
|
1472 |
+
[l][ed] -> [led] 1471
|
1473 |
+
[ರ][ಡ] -> [ರಡ] 1472
|
1474 |
+
[v][ent] -> [vent] 1473
|
1475 |
+
[ur][al] -> [ural] 1474
|
1476 |
+
[ಾರ][ೆ] -> [ಾರೆ] 1475
|
1477 |
+
[ re][t] -> [ ret] 1476
|
1478 |
+
[म][ें] -> [में] 1477
|
1479 |
+
[ಕ][್ರ] -> [ಕ್ರ] 1478
|
1480 |
+
[ ವ][ರ್�] -> [ ವರ್�] 1479
|
1481 |
+
[ल][ी] -> [ली] 1480
|
1482 |
+
[ि�][�] -> [िश] 1481
|
1483 |
+
[ t][ake] -> [ take] 1482
|
1484 |
+
[ st][r] -> [ str] 1483
|
1485 |
+
[ b][ack] -> [ back] 1484
|
1486 |
+
[ F][or] -> [ For] 1485
|
1487 |
+
[ಾಗ][ಿದ್ದ] -> [ಾಗಿದ್ದ] 1486
|
1488 |
+
[ C][l] -> [ Cl] 1487
|
1489 |
+
[ಿಯ][ಲ್ಲಿ] -> [ಿಯಲ್ಲಿ] 1488
|
1490 |
+
[ t][ri] -> [ tri] 1489
|
1491 |
+
[ಿ�][�] -> [ಿಪ] 1490
|
1492 |
+
[ಸ][್ಥ] -> [ಸ್ಥ] 1491
|
1493 |
+
[ स्�][�] -> [ स्ट] 1492
|
1494 |
+
[ अध][िक] -> [ अधिक] 1493
|
1495 |
+
[ re][se] -> [ rese] 1494
|
1496 |
+
[ l][it] -> [ lit] 1495
|
1497 |
+
[ag][es] -> [ages] 1496
|
1498 |
+
[it][ed] -> [ited] 1497
|
1499 |
+
[ bec][ause] -> [ because] 1498
|
1500 |
+
[ ब][न] -> [ बन] 1499
|
1501 |
+
[ द][ो] -> [ दो] 1500
|
1502 |
+
[ c][ent] -> [ cent] 1501
|
1503 |
+
[ th][ough] -> [ though] 1502
|
1504 |
+
[ou][n] -> [oun] 1503
|
1505 |
+
[ ರ][ಾಜ] -> [ ರಾಜ] 1504
|
1506 |
+
[ ಮ][ೇಲ] -> [ ಮೇಲ] 1505
|
1507 |
+
[ v][ar] -> [ var] 1506
|
1508 |
+
[ fol][low] -> [ follow] 1507
|
1509 |
+
[ स्�][�] -> [ स्व] 1508
|
1510 |
+
[ c][le] -> [ cle] 1509
|
1511 |
+
[ m][on] -> [ mon] 1510
|
1512 |
+
[ ag][ain] -> [ again] 1511
|
1513 |
+
[ ಪ್ರ][ತ] -> [ ಪ್ರತ] 1512
|
1514 |
+
[ಿಸಲ][ಾಯಿತು] -> [ಿಸಲಾಯಿತು] 1513
|
1515 |
+
[े][स] -> [ेस] 1514
|
1516 |
+
[ल][ा] -> [ला] 1515
|
1517 |
+
[v][ed] -> [ved] 1516
|
1518 |
+
[v][es] -> [ves] 1517
|
1519 |
+
[ d][ay] -> [ day] 1518
|
1520 |
+
[ce][pt] -> [cept] 1519
|
1521 |
+
[ l][and] -> [ land] 1520
|
1522 |
+
[at][her] -> [ather] 1521
|
1523 |
+
[ ಮ][ಾರ] -> [ ಮಾರ] 1522
|
1524 |
+
[l][and] -> [land] 1523
|
1525 |
+
[ನ][ೆ] -> [ನೆ] 1524
|
1526 |
+
[ell][s] -> [ells] 1525
|
1527 |
+
[ inf][orm] -> [ inform] 1526
|
1528 |
+
[ m][ed] -> [ med] 1527
|
1529 |
+
[ु�][�] -> [ुद] 1528
|
1530 |
+
[ಂ][ಭ] -> [ಂಭ] 1529
|
1531 |
+
[ु][न] -> [ुन] 1530
|
1532 |
+
[in][ce] -> [ince] 1531
|
1533 |
+
[c][on] -> [con] 1532
|
1534 |
+
[ru][ct] -> [ruct] 1533
|
1535 |
+
[ಿಯ][ನ್ನು] -> [ಿಯನ್ನು] 1534
|
1536 |
+
[ि][र] -> [िर] 1535
|
1537 |
+
[ ಸ][ಾಮ] -> [ ಸಾಮ] 1536
|
1538 |
+
[ch][ool] -> [chool] 1537
|
1539 |
+
[ं�][�] -> [ंग] 1538
|
1540 |
+
[र्�][�] -> [र्य] 1539
|
1541 |
+
[w][ard] -> [ward] 1540
|
1542 |
+
[ te][chn] -> [ techn] 1541
|
1543 |
+
[ic][es] -> [ices] 1542
|
1544 |
+
[ न][िर्�] -> [ निर्�] 1543
|
1545 |
+
[ f][act] -> [ fact] 1544
|
1546 |
+
[ s][ame] -> [ same] 1545
|
1547 |
+
[ d][id] -> [ did] 1546
|
1548 |
+
[ೂ][ರ್�] -> [ೂರ್�] 1547
|
1549 |
+
[ cl][ass] -> [ class] 1548
|
1550 |
+
[क][ी] -> [की] 1549
|
1551 |
+
[ A][n] -> [ An] 1550
|
1552 |
+
[ೋ][ಗ] -> [ೋಗ] 1551
|
1553 |
+
[ಮ][್ಮ] -> [ಮ್ಮ] 1552
|
1554 |
+
[र्�][�] -> [र्ड] 1553
|
1555 |
+
[a][ir] -> [air] 1554
|
1556 |
+
[c][ial] -> [cial] 1555
|
1557 |
+
[ub][lic] -> [ublic] 1556
|
1558 |
+
[ra][ph] -> [raph] 1557
|
1559 |
+
[क][ि] -> [कि] 1558
|
1560 |
+
[ ಕ][್ರ] -> [ ಕ್ರ] 1559
|
1561 |
+
[a][j] -> [aj] 1560
|
1562 |
+
[ res][ult] -> [ result] 1561
|
1563 |
+
[िं][ग] -> [िंग] 1562
|
1564 |
+
[ಾಗ][ಿದೆ] -> [ಾಗಿದೆ] 1563
|
1565 |
+
[ pro][cess] -> [ process] 1564
|
1566 |
+
[ pers][on] -> [ person] 1565
|
1567 |
+
[i][vers] -> [ivers] 1566
|
1568 |
+
[ s][et] -> [ set] 1567
|
1569 |
+
[ b][oth] -> [ both] 1568
|
1570 |
+
[ in][s] -> [ ins] 1569
|
1571 |
+
[ು][ಗ] -> [ುಗ] 1570
|
1572 |
+
[ ex][pl] -> [ expl] 1571
|
1573 |
+
[a][h] -> [ah] 1572
|
1574 |
+
[iz][ed] -> [ized] 1573
|
1575 |
+
[ b][ody] -> [ body] 1574
|
1576 |
+
[स][्क] -> [स्क] 1575
|
1577 |
+
[ अप][ने] -> [ अपने] 1576
|
1578 |
+
[T][he] -> [The] 1577
|
1579 |
+
[d][s] -> [ds] 1578
|
1580 |
+
[ಕ][ರ] -> [ಕರ] 1579
|
1581 |
+
[ॉ][ल] -> [ॉल] 1580
|
1582 |
+
[d][er] -> [der] 1581
|
1583 |
+
[स][्त] -> [स्त] 1582
|
1584 |
+
[ W][hen] -> [ When] 1583
|
1585 |
+
[ s][erv] -> [ serv] 1584
|
1586 |
+
[w][ay] -> [way] 1585
|
1587 |
+
[ी][न] -> [ीन] 1586
|
1588 |
+
[ವ][ಾ] -> [ವಾ] 1587
|
1589 |
+
[ ಒ][ಂದು] -> [ ಒಂದು] 1588
|
1590 |
+
[ stud][ents] -> [ students] 1589
|
1591 |
+
[ ಬ][ಲ್ಗ] -> [ ಬಲ್ಗ] 1590
|
1592 |
+
[u][es] -> [ues] 1591
|
1593 |
+
[ th][ree] -> [ three] 1592
|
1594 |
+
[ �][�] -> [ ಏ] 1593
|
1595 |
+
[ ಆ][ಗ] -> [ ಆಗ] 1594
|
1596 |
+
[ro][s] -> [ros] 1595
|
1597 |
+
[ ब][ी] -> [ बी] 1596
|
1598 |
+
[ार][ी] -> [ारी] 1597
|
1599 |
+
[ stud][y] -> [ study] 1598
|
1600 |
+
[ dis][c] -> [ disc] 1599
|
1601 |
+
[ sm][all] -> [ small] 1600
|
1602 |
+
[ m][ight] -> [ might] 1601
|
1603 |
+
[ o][p] -> [ op] 1602
|
1604 |
+
[ re][p] -> [ rep] 1603
|
1605 |
+
[i][x] -> [ix] 1604
|
1606 |
+
[्य][ू] -> [्यू] 1605
|
1607 |
+
[ d][ist] -> [ dist] 1606
|
1608 |
+
[ c][ar] -> [ car] 1607
|
1609 |
+
[v][ing] -> [ving] 1608
|
1610 |
+
[್ಟ][್ರ] -> [್ಟ್ರ] 1609
|
1611 |
+
[ pro][g] -> [ prog] 1610
|
1612 |
+
[ा�][�] -> [ाँ] 1611
|
1613 |
+
[ p][at] -> [ pat] 1612
|
1614 |
+
[ import][ant] -> [ important] 1613
|
1615 |
+
[t][ed] -> [ted] 1614
|
1616 |
+
[ comm][un] -> [ commun] 1615
|
1617 |
+
[ ಎ][ರಡ] -> [ ಎರಡ] 1616
|
1618 |
+
[ಿಯ][ಾದ] -> [ಿಯಾದ] 1617
|
1619 |
+
[ वि�][�] -> [ विश] 1618
|
1620 |
+
[ �][�] -> [ घ] 1619
|
1621 |
+
[ S][he] -> [ She] 1620
|
1622 |
+
[ अ][ंत] -> [ अंत] 1621
|
1623 |
+
[ wh][ile] -> [ while] 1622
|
1624 |
+
[ O][n] -> [ On] 1623
|
1625 |
+
[ b][o] -> [ bo] 1624
|
1626 |
+
[ de][f] -> [ def] 1625
|
1627 |
+
[ g][rou] -> [ grou] 1626
|
1628 |
+
[�][ं] -> [�ं] 1627
|
1629 |
+
[ c][ap] -> [ cap] 1628
|
1630 |
+
[ re][qu] -> [ requ] 1629
|
1631 |
+
[ क][्ष] -> [ क्ष] 1630
|
1632 |
+
[ा�][�] -> [ाड] 1631
|
1633 |
+
[ c][ur] -> [ cur] 1632
|
1634 |
+
[ ಅ][ಧ] -> [ ಅಧ] 1633
|
1635 |
+
[ं][प] -> [ंप] 1634
|
1636 |
+
[ h][and] -> [ hand] 1635
|
1637 |
+
[ow][s] -> [ows] 1636
|
1638 |
+
[ E][th] -> [ Eth] 1637
|
1639 |
+
[st][and] -> [stand] 1638
|
1640 |
+
[ c][he] -> [ che] 1639
|
1641 |
+
[ 19][7] -> [ 197] 1640
|
1642 |
+
[ು][ಕ] -> [ುಕ] 1641
|
1643 |
+
[ en][g] -> [ eng] 1642
|
1644 |
+
[ro][ad] -> [road] 1643
|
1645 |
+
[ are][a] -> [ area] 1644
|
1646 |
+
[ವ][ರ] -> [ವರ] 1645
|
1647 |
+
[g][en] -> [gen] 1646
|
1648 |
+
[ f][low] -> [ flow] 1647
|
1649 |
+
[ st][art] -> [ start] 1648
|
1650 |
+
[ म][ै] -> [ मै] 1649
|
1651 |
+
[ान][े] -> [ाने] 1650
|
1652 |
+
[्�][�] -> [्न] 1651
|
1653 |
+
[ le][vel] -> [ level] 1652
|
1654 |
+
[ f][am] -> [ fam] 1653
|
1655 |
+
[ ಮೂ][ಲ] -> [ ಮೂಲ] 1654
|
1656 |
+
[ि�][�] -> [िट] 1655
|
1657 |
+
[ l][ook] -> [ look] 1656
|
1658 |
+
[ po][int] -> [ point] 1657
|
1659 |
+
[್�][�] -> [್ಳ] 1658
|
1660 |
+
[े�][�] -> [ेव] 1659
|
1661 |
+
[�][�] -> [ख] 1660
|
1662 |
+
[id][es] -> [ides] 1661
|
1663 |
+
[ ar][t] -> [ art] 1662
|
1664 |
+
[ comp][le] -> [ comple] 1663
|
1665 |
+
[i][o] -> [io] 1664
|
1666 |
+
[ ಉ][ದ] -> [ ಉದ] 1665
|
1667 |
+
[ m][ust] -> [ must] 1666
|
1668 |
+
[ us][ing] -> [ using] 1667
|
1669 |
+
[ d][uring] -> [ during] 1668
|
1670 |
+
[ie][w] -> [iew] 1669
|
1671 |
+
[d][ay] -> [day] 1670
|
1672 |
+
[ ನ][ಡ] -> [ ನಡ] 1671
|
1673 |
+
[ ch][ar] -> [ char] 1672
|
1674 |
+
[ part][ic] -> [ partic] 1673
|
1675 |
+
[ �][�] -> [ छ] 1674
|
1676 |
+
[ p][oss] -> [ poss] 1675
|
1677 |
+
[ p][ol] -> [ pol] 1676
|
1678 |
+
[ ಪ್ರ][ದ] -> [ ಪ್ರದ] 1677
|
1679 |
+
[್ಗ][ೆ] -> [್ಗೆ] 1678
|
1680 |
+
[ g][ra] -> [ gra] 1679
|
1681 |
+
[ s][ign] -> [ sign] 1680
|
1682 |
+
[ b][r] -> [ br] 1681
|
1683 |
+
[ 1][7] -> [ 17] 1682
|
1684 |
+
[v][en] -> [ven] 1683
|
1685 |
+
[ pl][ace] -> [ place] 1684
|
1686 |
+
[ v][al] -> [ val] 1685
|
1687 |
+
[ತ][ರ] -> [ತರ] 1686
|
1688 |
+
[ೆ][ನ] -> [ೆನ] 1687
|
1689 |
+
[ि�][�] -> [िप] 1688
|
1690 |
+
[ही][ं] -> [हीं] 1689
|
1691 |
+
[ಿಕ][್] -> [ಿಕ್] 1690
|
1692 |
+
[े][म] -> [ेम] 1691
|
1693 |
+
[ exam][ple] -> [ example] 1692
|
1694 |
+
[क][ा] -> [का] 1693
|
1695 |
+
[ उ][त] -> [ उत] 1694
|
1696 |
+
[ g][reat] -> [ great] 1695
|
1697 |
+
[य][ोग] -> [योग] 1696
|
1698 |
+
[c][ent] -> [cent] 1697
|
1699 |
+
[ ग�][�] -> [ गए] 1698
|
1700 |
+
[h][ip] -> [hip] 1699
|
1701 |
+
[ m][in] -> [ min] 1700
|
1702 |
+
[ ಅ][ಥ] -> [ ಅಥ] 1701
|
1703 |
+
[ಗ][ೊಂಡ] -> [ಗೊಂಡ] 1702
|
1704 |
+
[g][er] -> [ger] 1703
|
1705 |
+
[ि�][�] -> [िज] 1704
|
1706 |
+
[re][ad] -> [read] 1705
|
1707 |
+
[ain][s] -> [ains] 1706
|
1708 |
+
[iet][y] -> [iety] 1707
|
1709 |
+
[ ನೀ][ಡ] -> [ ನೀಡ] 1708
|
1710 |
+
[ौ][र] -> [ौर] 1709
|
1711 |
+
[ter][s] -> [ters] 1710
|
1712 |
+
[ or][gan] -> [ organ] 1711
|
1713 |
+
[ pl][an] -> [ plan] 1712
|
1714 |
+
[ f][un] -> [ fun] 1713
|
1715 |
+
[ व][्य] -> [ व्य] 1714
|
1716 |
+
[ाप][्त] -> [ाप्त] 1715
|
1717 |
+
[ು][ನ] -> [ುನ] 1716
|
1718 |
+
[ B][e] -> [ Be] 1717
|
1719 |
+
[ತ][್ಯ] -> [ತ್ಯ] 1718
|
1720 |
+
[ r][ight] -> [ right] 1719
|
1721 |
+
[iz][e] -> [ize] 1720
|
1722 |
+
[ an][other] -> [ another] 1721
|
1723 |
+
[ 19][8] -> [ 198] 1722
|
1724 |
+
[ e][lect] -> [ elect] 1723
|
1725 |
+
[ a][p] -> [ ap] 1724
|
1726 |
+
[ d][ata] -> [ data] 1725
|
1727 |
+
[ ][est] -> [ est] 1726
|
1728 |
+
[ं�][�] -> [ंट] 1727
|
1729 |
+
[ न][हीं] -> [ नहीं] 1728
|
1730 |
+
[ f][ree] -> [ free] 1729
|
1731 |
+
[क][्र] -> [क्र] 1730
|
1732 |
+
[ ಸ][ೇ] -> [ ಸೇ] 1731
|
1733 |
+
[g][g] -> [gg] 1732
|
1734 |
+
[ಿಸ][ುತ್ತ] -> [ಿಸುತ್ತ] 1733
|
1735 |
+
[ou][se] -> [ouse] 1734
|
1736 |
+
[in][ed] -> [ined] 1735
|
1737 |
+
[n][ot] -> [not] 1736
|
1738 |
+
[ut][e] -> [ute] 1737
|
1739 |
+
[्�][�] -> [्ध] 1738
|
1740 |
+
[re][en] -> [reen] 1739
|
1741 |
+
[ Y][ou] -> [ You] 1740
|
1742 |
+
[ app][ro] -> [ appro] 1741
|
1743 |
+
[ರ್�][�] -> [ರ್ಗ] 1742
|
1744 |
+
[ ಗ][ು] -> [ ಗು] 1743
|
1745 |
+
[ des][ign] -> [ design] 1744
|
1746 |
+
[ A][r] -> [ Ar] 1745
|
1747 |
+
[is][m] -> [ism] 1746
|
1748 |
+
[ೋ][ರ] -> [ೋರ] 1747
|
1749 |
+
[ s][om] -> [ som] 1748
|
1750 |
+
[ st][ill] -> [ still] 1749
|
1751 |
+
[ S][c] -> [ Sc] 1750
|
1752 |
+
[ो][र] -> [ोर] 1751
|
1753 |
+
[ con][f] -> [ conf] 1752
|
1754 |
+
[ D][e] -> [ De] 1753
|
1755 |
+
[े�][�] -> [ेज] 1754
|
1756 |
+
[ a][ut] -> [ aut] 1755
|
1757 |
+
[ar][th] -> [arth] 1756
|
1758 |
+
[ೆಯ][ು] -> [ೆಯು] 1757
|
1759 |
+
[ र][ह] -> [ रह] 1758
|
1760 |
+
[id][s] -> [ids] 1759
|
1761 |
+
[a][ve] -> [ave] 1760
|
1762 |
+
[ी][त] -> [ीत] 1761
|
1763 |
+
[iv][id] -> [ivid] 1762
|
1764 |
+
[ो][ट] -> [ोट] 1763
|
1765 |
+
[ान][ा] -> [ाना] 1764
|
1766 |
+
[ ತ][ನ್ನ] -> [ ತನ್ನ] 1765
|
1767 |
+
[ಾ�][�] -> [ಾಷ] 1766
|
1768 |
+
[ ಪ][ೂರ್�] -> [ ಪೂರ್�] 1767
|
1769 |
+
[ ಕ][ೊ] -> [ ಕೊ] 1768
|
1770 |
+
[ ಬಲ್ಗ][ೇರ] -> [ ಬಲ್ಗೇರ] 1769
|
1771 |
+
[ ex][t] -> [ ext] 1770
|
1772 |
+
[ of][ten] -> [ often] 1771
|
1773 |
+
[ा�][�ा] -> [ाया] 1772
|
1774 |
+
[ ಗ][್ರ] -> [ ಗ್ರ] 1773
|
1775 |
+
[ m][ain] -> [ main] 1774
|
1776 |
+
[ 1][2] -> [ 12] 1775
|
1777 |
+
[ य][ू] -> [ यू] 1776
|
1778 |
+
[ा�][�ं] -> [ाओं] 1777
|
1779 |
+
[ con][st] -> [ const] 1778
|
1780 |
+
[ l][oc] -> [ loc] 1779
|
1781 |
+
[ ಅಥ][ವಾ] -> [ ಅಥವಾ] 1780
|
1782 |
+
[ಿಸ][ಿತು] -> [ಿಸಿತು] 1781
|
1783 |
+
[ಪ][್ರ] -> [ಪ್ರ] 1782
|
1784 |
+
[ ][.] -> [ .] 1783
|
1785 |
+
[are][d] -> [ared] 1784
|
1786 |
+
[erg][y] -> [ergy] 1785
|
1787 |
+
[ का][र्य] -> [ कार्य] 1786
|
1788 |
+
[ 1][5] -> [ 15] 1787
|
1789 |
+
[er][ing] -> [ering] 1788
|
1790 |
+
[ pro][ble] -> [ proble] 1789
|
1791 |
+
[ ch][ang] -> [ chang] 1790
|
1792 |
+
[ s][ol] -> [ sol] 1791
|
1793 |
+
[ Wh][at] -> [ What] 1792
|
1794 |
+
[ num][ber] -> [ number] 1793
|
1795 |
+
[ಪ][್ಪ] -> [ಪ್ಪ] 1794
|
1796 |
+
[ a][v] -> [ av] 1795
|
1797 |
+
[ inform][ation] -> [ information] 1796
|
1798 |
+
[ w][om] -> [ wom] 1797
|
1799 |
+
[i][er] -> [ier] 1798
|
1800 |
+
[ क][ार] -> [ कार] 1799
|
1801 |
+
[act][er] -> [acter] 1800
|
1802 |
+
[ cons][id] -> [ consid] 1801
|
1803 |
+
[ T][o] -> [ To] 1802
|
1804 |
+
[ s][chool] -> [ school] 1803
|
1805 |
+
[it][al] -> [ital] 1804
|
1806 |
+
[ ಕಾರ][್ಯ] -> [ ಕಾರ್ಯ] 1805
|
1807 |
+
[ ]['] -> [ '] 1806
|
1808 |
+
[o][int] -> [oint] 1807
|
1809 |
+
[ರ್�][�] -> [ರ್ಶ] 1808
|
1810 |
+
[ ed][uc] -> [ educ] 1809
|
1811 |
+
[og][raph] -> [ograph] 1810
|
1812 |
+
[ d][ire] -> [ dire] 1811
|
1813 |
+
[े][त्र] -> [ेत्र] 1812
|
1814 |
+
[ ಭ][ಾಗ] -> [ ಭಾಗ] 1813
|
1815 |
+
[ಕ][ಾರ] -> [ಕಾರ] 1814
|
1816 |
+
[ C][on] -> [ Con] 1815
|
1817 |
+
[ A][nd] -> [ And] 1816
|
1818 |
+
[c][om] -> [com] 1817
|
1819 |
+
[ 19][4] -> [ 194] 1818
|
1820 |
+
[ le][ad] -> [ lead] 1819
|
1821 |
+
[�][�] -> [ಎ] 1820
|
1822 |
+
[ಾ�][�] -> [ಾಧ] 1821
|
1823 |
+
[ स्�][�] -> [ स्क] 1822
|
1824 |
+
[in][al] -> [inal] 1823
|
1825 |
+
[ l][ess] -> [ less] 1824
|
1826 |
+
[ pro][t] -> [ prot] 1825
|
1827 |
+
[ o][ld] -> [ old] 1826
|
1828 |
+
[em][ent] -> [ement] 1827
|
1829 |
+
[ v][is] -> [ vis] 1828
|
1830 |
+
[ f][ew] -> [ few] 1829
|
1831 |
+
[ 19][6] -> [ 196] 1830
|
1832 |
+
[ಖ][್ಯ] -> [ಖ್ಯ] 1831
|
1833 |
+
[ृ][त] -> [ृत] 1832
|
1834 |
+
[ a][round] -> [ around] 1833
|
1835 |
+
[ ][ve] -> [ ve] 1834
|
1836 |
+
[ s][er] -> [ ser] 1835
|
1837 |
+
[ಿಗಳ][ು] -> [ಿಗಳು] 1836
|
1838 |
+
[ಂ][ಪ] -> [ಂಪ] 1837
|
1839 |
+
[ पह][ले] -> [ पहले] 1838
|
1840 |
+
[ff][ic] -> [ffic] 1839
|
1841 |
+
[ g][l] -> [ gl] 1840
|
1842 |
+
[ದ][ಿಂದ] -> [ದಿಂದ] 1841
|
1843 |
+
[ೋ][ಪ] -> [ೋಪ] 1842
|
1844 |
+
[ वर्�][�] -> [ वर्ष] 1843
|
1845 |
+
[ pl][ay] -> [ play] 1844
|
1846 |
+
[ ಬ][ಳ] -> [ ಬಳ] 1845
|
1847 |
+
[ಾಂ][ಟ] -> [ಾಂಟ] 1846
|
1848 |
+
[े�][�] -> [ेड] 1847
|
1849 |
+
[ate][ly] -> [ately] 1848
|
1850 |
+
[ pro][f] -> [ prof] 1849
|
1851 |
+
[u][ro] -> [uro] 1850
|
1852 |
+
[ p][ower] -> [ power] 1851
|
1853 |
+
[ श][ुर] -> [ ���ुर] 1852
|
1854 |
+
[ स्थ][ान] -> [ स्थान] 1853
|
1855 |
+
[ೀ][ವ] -> [ೀವ] 1854
|
1856 |
+
[el][s] -> [els] 1855
|
1857 |
+
[ P][h] -> [ Ph] 1856
|
1858 |
+
[ क्ष][ेत्र] -> [ क्षेत्र] 1857
|
1859 |
+
[ स][ं] -> [ सं] 1858
|
1860 |
+
[स][ी] -> [सी] 1859
|
1861 |
+
[ o][cc] -> [ occ] 1860
|
1862 |
+
[ Eth][an] -> [ Ethan] 1861
|
1863 |
+
[ h][ist] -> [ hist] 1862
|
1864 |
+
[ಟ][್] -> [ಟ್] 1863
|
1865 |
+
[ प][ुर] -> [ पुर] 1864
|
1866 |
+
[o][ot] -> [oot] 1865
|
1867 |
+
[i][en] -> [ien] 1866
|
1868 |
+
[her][s] -> [hers] 1867
|
1869 |
+
[u][nd] -> [und] 1868
|
1870 |
+
[ re][port] -> [ report] 1869
|
1871 |
+
[ �][�] -> [ ಖ] 1870
|
1872 |
+
[ೆಯ][ಲ್ಲಿ] -> [ೆಯಲ್ಲಿ] 1871
|
1873 |
+
[ act][iv] -> [ activ] 1872
|
1874 |
+
[ w][rit] -> [ writ] 1873
|
1875 |
+
[ श][ाम] -> [ शाम] 1874
|
1876 |
+
[ul][l] -> [ull] 1875
|
1877 |
+
[ c][a] -> [ ca] 1876
|
1878 |
+
[he][re] -> [here] 1877
|
1879 |
+
[ d][i] -> [ di] 1878
|
1880 |
+
[ೊಳ][್ಳ] -> [ೊಳ್ಳ] 1879
|
1881 |
+
[ver][n] -> [vern] 1880
|
1882 |
+
[r][al] -> [ral] 1881
|
1883 |
+
[as][es] -> [ases] 1882
|
1884 |
+
[1][2] -> [12] 1883
|
1885 |
+
[ere][st] -> [erest] 1884
|
1886 |
+
[ರ್�][�] -> [ರ್ಕ] 1885
|
1887 |
+
[ ಕ][ೆಲ] -> [ ಕೆಲ] 1886
|
1888 |
+
[ ज][ब] -> [ जब] 1887
|
1889 |
+
[स][्ट] -> [स्ट] 1888
|
1890 |
+
[ l][ast] -> [ last] 1889
|
1891 |
+
[ m][us] -> [ mus] 1890
|
1892 |
+
[or][k] -> [ork] 1891
|
1893 |
+
[ं�][�] -> [ंब] 1892
|
1894 |
+
[ se][c] -> [ sec] 1893
|
1895 |
+
[ comm][on] -> [ common] 1894
|
1896 |
+
[pe][nd] -> [pend] 1895
|
1897 |
+
[ be][l] -> [ bel] 1896
|
1898 |
+
[ ಮೊ][ದಲ] -> [ ಮೊದಲ] 1897
|
1899 |
+
[ call][ed] -> [ called] 1898
|
1900 |
+
[ೇ][ಳ] -> [ೇಳ] 1899
|
1901 |
+
[ pres][ent] -> [ present] 1900
|
1902 |
+
[ st][and] -> [ stand] 1901
|
1903 |
+
[b][o] -> [bo] 1902
|
1904 |
+
[ou][th] -> [outh] 1903
|
1905 |
+
[ p][ass] -> [ pass] 1904
|
1906 |
+
[ ][<] -> [ <] 1905
|
1907 |
+
[ v][ir] -> [ vir] 1906
|
1908 |
+
[ A][t] -> [ At] 1907
|
1909 |
+
[ಾರ][ು] -> [ಾರು] 1908
|
1910 |
+
[ಿ�][�] -> [ಿಶ] 1909
|
1911 |
+
[ ಎ][ಂಬ] -> [ ಎಂಬ] 1910
|
1912 |
+
[ सं�][�] -> [ संग] 1911
|
1913 |
+
[ r][is] -> [ ris] 1912
|
1914 |
+
[ know][n] -> [ known] 1913
|
1915 |
+
[ U][S] -> [ US] 1914
|
1916 |
+
[al][e] -> [ale] 1915
|
1917 |
+
[ri][es] -> [ries] 1916
|
1918 |
+
[ c][or] -> [ cor] 1917
|
1919 |
+
[ The][se] -> [ These] 1918
|
1920 |
+
[ ಸ][್ವ] -> [ ಸ್ವ] 1919
|
1921 |
+
[ under][stand] -> [ understand] 1920
|
1922 |
+
[्�][�] -> [्च] 1921
|
1923 |
+
[ bu][ild] -> [ build] 1922
|
1924 |
+
[ ][[] -> [ [] 1923
|
1925 |
+
[ शाम][िल] -> [ शामिल] 1924
|
1926 |
+
[ert][ain] -> [ertain] 1925
|
1927 |
+
[ op][en] -> [ open] 1926
|
1928 |
+
[ ಮೇಲ][ೆ] -> [ ಮೇಲೆ] 1927
|
1929 |
+
[ಷ][್ಟ] -> [ಷ್ಟ] 1928
|
1930 |
+
[ ट][ी] -> [ टी] 1929
|
1931 |
+
[ अन][ु] -> [ अनु] 1930
|
1932 |
+
[en][a] -> [ena] 1931
|
1933 |
+
[ be][g] -> [ beg] 1932
|
1934 |
+
[ಿ�][�] -> [ಿಣ] 1933
|
1935 |
+
[िय][ो] -> [ियो] 1934
|
1936 |
+
[ E][x] -> [ Ex] 1935
|
1937 |
+
[ res][p] -> [ resp] 1936
|
1938 |
+
[ ಅ][ನ] -> [ ಅನ] 1937
|
1939 |
+
[ s][ay] -> [ say] 1938
|
1940 |
+
[ा�][�] -> [ाध] 1939
|
1941 |
+
[ e][qu] -> [ equ] 1940
|
1942 |
+
[ೆ][ಂಟ] -> [ೆಂಟ] 1941
|
1943 |
+
[w][ays] -> [ways] 1942
|
1944 |
+
[ supp][ort] -> [ support] 1943
|
1945 |
+
[et][s] -> [ets] 1944
|
1946 |
+
[ c][our] -> [ cour] 1945
|
1947 |
+
[ d][on] -> [ don] 1946
|
1948 |
+
[ु�][�] -> [ुख] 1947
|
1949 |
+
[is][hed] -> [ished] 1948
|
1950 |
+
[ lar][ge] -> [ large] 1949
|
1951 |
+
[ E][ng] -> [ Eng] 1950
|
1952 |
+
[ 1][6] -> [ 16] 1951
|
1953 |
+
[ ][0] -> [ 0] 1952
|
1954 |
+
[ �][�] -> [ ऑ] 1953
|
1955 |
+
[ w][ant] -> [ want] 1954
|
1956 |
+
[ with][out] -> [ without] 1955
|
1957 |
+
[ p][ain] -> [ pain] 1956
|
1958 |
+
[ h][y] -> [ hy] 1957
|
1959 |
+
[ h][ome] -> [ home] 1958
|
1960 |
+
[ e][as] -> [ eas] 1959
|
1961 |
+
[ire][d] -> [ired] 1960
|
1962 |
+
[್�][�] -> [್ಷ] 1961
|
1963 |
+
[ ಒ][ಳ] -> [ ಒಳ] 1962
|
1964 |
+
[ क][ै] -> [ कै] 1963
|
1965 |
+
[ ನಿರ][್ಮ] -> [ ನಿರ್ಮ] 1964
|
1966 |
+
[ ವರ್�][�] -> [ ವರ್ಷ] 1965
|
1967 |
+
[ರ][ದ] -> [ರದ] 1966
|
1968 |
+
[ 1][4] -> [ 14] 1967
|
1969 |
+
[ा�][�्ट] -> [ाष्ट] 1968
|
1970 |
+
[ाष्ट][्र] -> [ाष्ट्र] 1969
|
1971 |
+
[ा�][�] -> [ाए] 1970
|
1972 |
+
[ ग�][�] -> [ गई] 1971
|
1973 |
+
[ं][त्र] -> [ंत्र] 1972
|
1974 |
+
[ t][em] -> [ tem] 1973
|
1975 |
+
[v][ir] -> [vir] 1974
|
1976 |
+
[ t][reat] -> [ treat] 1975
|
1977 |
+
[ನ][ೇ] -> [ನೇ] 1976
|
1978 |
+
[ೋ][ಲ] -> [ೋಲ] 1977
|
1979 |
+
[ಿಕ][ೊಂಡ] -> [ಿಕೊಂಡ] 1978
|
1980 |
+
[ ख][िल] -> [ खिल] 1979
|
1981 |
+
[ lit][tle] -> [ little] 1980
|
1982 |
+
[ bet][ter] -> [ better] 1981
|
1983 |
+
[ c][are] -> [ care] 1982
|
1984 |
+
[ p][op] -> [ pop] 1983
|
1985 |
+
[in][ing] -> [ining] 1984
|
1986 |
+
[i][el] -> [iel] 1985
|
1987 |
+
[ ಆ][ರ] -> [ ಆರ] 1986
|
1988 |
+
[ि�][�] -> [िग] 1987
|
1989 |
+
[न][ों] -> [नों] 1988
|
1990 |
+
[u][p] -> [up] 1989
|
1991 |
+
[ros][s] -> [ross] 1990
|
1992 |
+
[ಲ್ಲ][ಿರುವ] -> [ಲ್ಲಿರುವ] 1991
|
1993 |
+
[un][k] -> [unk] 1992
|
1994 |
+
[h][old] -> [hold] 1993
|
1995 |
+
[ivers][ity] -> [iversity] 1994
|
1996 |
+
[ im][m] -> [ imm] 1995
|
1997 |
+
[्�][�] -> [्थ] 1996
|
1998 |
+
[p][r] -> [pr] 1997
|
1999 |
+
[ hum][an] -> [ human] 1998
|
2000 |
+
[iss][ion] -> [ission] 1999
|
2001 |
+
[ill][s] -> [ills] 2000
|
2002 |
+
[ with][in] -> [ within] 2001
|
2003 |
+
[ ತ][ಮ್ಮ] -> [ ತಮ್ಮ] 2002
|
2004 |
+
[भ][ी] -> [भी] 2003
|
2005 |
+
[ �][�ा] -> [ या] 2004
|
2006 |
+
[d][ition] -> [dition] 2005
|
2007 |
+
[ाल][य] -> [ालय] 2006
|
2008 |
+
[ l][ight] -> [ light] 2007
|
2009 |
+
[ ए][फ] -> [ एफ] 2008
|
2010 |
+
[ा�][�] -> [ाउ] 2009
|
2011 |
+
[ न][ाम] -> [ नाम] 2010
|
2012 |
+
[iv][ing] -> [iving] 2011
|
2013 |
+
[ int][erest] -> [ interest] 2012
|
2014 |
+
[ಲ][ಾಯಿತು] -> [ಲಾಯಿತು] 2013
|
2015 |
+
[ p][ublic] -> [ public] 2014
|
2016 |
+
[ I][nd] -> [ Ind] 2015
|
2017 |
+
[p][es] -> [pes] 2016
|
2018 |
+
[ C][ol] -> [ Col] 2017
|
2019 |
+
[ per][form] -> [ perform] 2018
|
2020 |
+
[ಕ್�][�] -> [ಕ್ಸ] 2019
|
2021 |
+
[ಾತ][್ರ] -> [ಾತ್ರ] 2020
|
2022 |
+
[ಕ][್ತ] -> [ಕ್ತ] 2021
|
2023 |
+
[ುದ][್ಧ] -> [ುದ್ಧ] 2022
|
2024 |
+
[ ए][स] -> [ एस] 2023
|
2025 |
+
[ p][os] -> [ pos] 2024
|
2026 |
+
[್ಯ][ದ] -> [್ಯದ] 2025
|
2027 |
+
[ent][ial] -> [ential] 2026
|
2028 |
+
[ ಯ][ೋಜ] -> [ ಯೋಜ] 2027
|
2029 |
+
[ा�][�] -> [ाब] 2028
|
2030 |
+
[ क][म] -> [ कम] 2029
|
2031 |
+
[t][ain] -> [tain] 2030
|
2032 |
+
[ H][er] -> [ Her] 2031
|
2033 |
+
[ se][em] -> [ seem] 2032
|
2034 |
+
[ sa][f] -> [ saf] 2033
|
2035 |
+
[ t][urn] -> [ turn] 2034
|
2036 |
+
[ re][al] -> [ real] 2035
|
2037 |
+
[on][y] -> [ony] 2036
|
2038 |
+
[ह][र] -> [हर] 2037
|
2039 |
+
[ he][re] -> [ here] 2038
|
2040 |
+
[ cont][in] -> [ contin] 2039
|
2041 |
+
[ur][y] -> [ury] 2040
|
2042 |
+
[ ][es] -> [ es] 2041
|
2043 |
+
[ m][em] -> [ mem] 2042
|
2044 |
+
[t][he] -> [the] 2043
|
2045 |
+
[ d][ig] -> [ dig] 2044
|
2046 |
+
[it][ive] -> [itive] 2045
|
2047 |
+
[b][s] -> [bs] 2046
|
2048 |
+
[ಿವ][ೆ] -> [ಿವೆ] 2047
|
2049 |
+
[ ಆದ][ರೆ] -> [ ಆದರೆ] 2048
|
2050 |
+
[ �][�] -> [ ओ] 2049
|
2051 |
+
['][t] -> ['t] 2050
|
2052 |
+
[ ][ide] -> [ ide] 2051
|
2053 |
+
[ e][t] -> [ et] 2052
|
2054 |
+
[ t][est] -> [ test] 2053
|
2055 |
+
[ c][ase] -> [ case] 2054
|
2056 |
+
[5][0] -> [50] 2055
|
2057 |
+
[ ಸ][ೇರ] -> [ ಸೇರ] 2056
|
2058 |
+
[ क][्ल] -> [ क्ल] 2057
|
2059 |
+
[�][�] -> [थ] 2058
|
2060 |
+
[ ಆ][ತ] -> [ ಆತ] 2059
|
2061 |
+
[ी][क] -> [ीक] 2060
|
2062 |
+
[ान][ी] -> [ानी] 2061
|
2063 |
+
[ g][row] -> [ grow] 2062
|
2064 |
+
[d][e] -> [de] 2063
|
2065 |
+
[ಿಸ][ುತ್ತದೆ] -> [ಿಸುತ್ತದೆ] 2064
|
2066 |
+
[ ब][्र] -> [ ब्र] 2065
|
2067 |
+
[ Americ][an] -> [ American] 2066
|
2068 |
+
[t][on] -> [ton] 2067
|
2069 |
+
[ ए][ल] -> [ एल] 2068
|
2070 |
+
[as][ter] -> [aster] 2069
|
2071 |
+
[ ap][pe] -> [ appe] 2070
|
2072 |
+
[aj][or] -> [ajor] 2071
|
2073 |
+
[िक][ा] -> [िका] 2072
|
2074 |
+
[ಾನ][್ಯ] -> [ಾನ್ಯ] 2073
|
2075 |
+
[ I][s] -> [ Is] 2074
|
2076 |
+
[ै][ंड] -> [ैंड] 2075
|
2077 |
+
[ಿಯ][ು] -> [ಿಯು] 2076
|
2078 |
+
[ जिस][में] -> [ जिसमें] 2077
|
2079 |
+
[ en][ergy] -> [ energy] 2078
|
2080 |
+
[ ][=] -> [ =] 2079
|
2081 |
+
[ರ್�][�] -> [ರ್ಸ] 2080
|
2082 |
+
[in][ess] -> [iness] 2081
|
2083 |
+
[ w][ar] -> [ war] 2082
|
2084 |
+
[ L][e] -> [ Le] 2083
|
2085 |
+
[a][x] -> [ax] 2084
|
2086 |
+
[ �][�] -> [ ‘] 2085
|
2087 |
+
[ P][l] -> [ Pl] 2086
|
2088 |
+
[ re][du] -> [ redu] 2087
|
2089 |
+
[ �][�] -> [ ಐ] 2088
|
2090 |
+
[ಂ][ಧ] -> [ಂಧ] 2089
|
2091 |
+
[ಿಯ][ಾ] -> [ಿಯಾ] 2090
|
2092 |
+
[ang][u] -> [angu] 2091
|
2093 |
+
[ೈ][ನ್] -> [ೈನ್] 2092
|
2094 |
+
[l][ine] -> [line] 2093
|
2095 |
+
[ S][h] -> [ Sh] 2094
|
2096 |
+
[ com][e] -> [ come] 2095
|
2097 |
+
[ ch][ange] -> [ change] 2096
|
2098 |
+
[as][on] -> [ason] 2097
|
2099 |
+
[ ob][ject] -> [ object] 2098
|
2100 |
+
[ಿಂ][ಗ್] -> [ಿಂಗ್] 2099
|
2101 |
+
[ೈ][ನ] -> [ೈನ] 2100
|
2102 |
+
[ ಸ][ು] -> [ ಸು] 2101
|
2103 |
+
[ಬ][ಹ] -> [ಬಹ] 2102
|
2104 |
+
[्�][�] -> [्ब] 2103
|
2105 |
+
[om][s] -> [oms] 2104
|
2106 |
+
[ म][ह] -> [ मह] 2105
|
2107 |
+
[er][c] -> [erc] 2106
|
2108 |
+
[an][k] -> [ank] 2107
|
2109 |
+
[1][8] -> [18] 2108
|
2110 |
+
[nt][il] -> [ntil] 2109
|
2111 |
+
[os][ed] -> [osed] 2110
|
2112 |
+
[ f][ood] -> [ food] 2111
|
2113 |
+
[ ಕ][ು] -> [ ಕು] 2112
|
2114 |
+
[e][x] -> [ex] 2113
|
2115 |
+
[ ][$] -> [ $] 2114
|
2116 |
+
[b][ers] -> [bers] 2115
|
2117 |
+
[ent][ion] -> [ention] 2116
|
2118 |
+
[ E][uro] -> [ Euro] 2117
|
2119 |
+
[ a][ir] -> [ air] 2118
|
2120 |
+
[ಸ][್ಟ] -> [ಸ್ಟ] 2119
|
2121 |
+
[ಿಕ][ೆ] -> [ಿಕೆ] 2120
|
2122 |
+
[ be][h] -> [ beh] 2121
|
2123 |
+
[क][ों] -> [कों] 2122
|
2124 |
+
[ವಾಗ][ಿದೆ] -> [ವಾಗಿದೆ] 2123
|
2125 |
+
[್ಯ][ಾನ] -> [್ಯಾನ] 2124
|
2126 |
+
[ि�][�] -> [िद] 2125
|
2127 |
+
[o][on] -> [oon] 2126
|
2128 |
+
[ le][t] -> [ let] 2127
|
2129 |
+
[ me][ans] -> [ means] 2128
|
2130 |
+
[l][ess] -> [less] 2129
|
2131 |
+
[ s][ince] -> [ since] 2130
|
2132 |
+
[ आ][य] -> [ आय] 2131
|
2133 |
+
[�][�] -> [ठ] 2132
|
2134 |
+
[�][�] -> [ः] 2133
|
2135 |
+
[ಬ][್ಬ] -> [ಬ್ಬ] 2134
|
2136 |
+
[ स][क] -> [ सक] 2135
|
2137 |
+
[ರ್�][�] -> [ರ್ಮ] 2136
|
2138 |
+
[त][ि] -> [ति] 2137
|
2139 |
+
[ कर][ता] -> [ करता] 2138
|
2140 |
+
[oc][i] -> [oci] 2139
|
2141 |
+
[ ವಿ�][�] -> [ ವಿಶ] 2140
|
2142 |
+
[ॉ][र्�] -> [ॉर्�] 2141
|
2143 |
+
[ n][ame] -> [ name] 2142
|
2144 |
+
[ b][re] -> [ bre] 2143
|
2145 |
+
[ L][ena] -> [ Lena] 2144
|
2146 |
+
[ಿತ][್ರ] -> [ಿತ್ರ] 2145
|
2147 |
+
[ात][्र] -> [ात्र] 2146
|
2148 |
+
[ अ][म] -> [ अम] 2147
|
2149 |
+
[ go][vern] -> [ govern] 2148
|
2150 |
+
[ 1][3] -> [ 13] 2149
|
2151 |
+
[े�][�] -> [ेष] 2150
|
2152 |
+
[i][qu] -> [iqu] 2151
|
2153 |
+
[ b][as] -> [ bas] 2152
|
2154 |
+
[ p][ut] -> [ put] 2153
|
2155 |
+
[ e][arly] -> [ early] 2154
|
2156 |
+
[ me][an] -> [ mean] 2155
|
2157 |
+
[n][ce] -> [nce] 2156
|
2158 |
+
[o][h] -> [oh] 2157
|
2159 |
+
[u][g] -> [ug] 2158
|
2160 |
+
[ं�][�] -> [ंज] 2159
|
2161 |
+
[h][or] -> [hor] 2160
|
2162 |
+
[ ha][pp] -> [ happ] 2161
|
2163 |
+
[iv][en] -> [iven] 2162
|
2164 |
+
[ dire][ct] -> [ direct] 2163
|
2165 |
+
[ ಅ][ಂತ] -> [ ಅಂತ] 2164
|
2166 |
+
[im][e] -> [ime] 2165
|
2167 |
+
[ं�][�] -> [ंध] 2166
|
2168 |
+
[ाड][़] -> [ाड़] 2167
|
2169 |
+
[re][nt] -> [rent] 2168
|
2170 |
+
[ o][per] -> [ oper] 2169
|
2171 |
+
[ go][ing] -> [ going] 2170
|
2172 |
+
[ sh][ow] -> [ show] 2171
|
2173 |
+
[ How][ever] -> [ However] 2172
|
2174 |
+
[ivid][ual] -> [ividual] 2173
|
2175 |
+
[cc][ess] -> [ccess] 2174
|
2176 |
+
[i][od] -> [iod] 2175
|
2177 |
+
[ g][u] -> [ gu] 2176
|
2178 |
+
[ cont][ro] -> [ contro] 2177
|
2179 |
+
[ 1][1] -> [ 11] 2178
|
2180 |
+
[ಪ][ಡ] -> [ಪಡ] 2179
|
2181 |
+
[ st][e] -> [ ste] 2180
|
2182 |
+
[ क][िए] -> [ किए] 2181
|
2183 |
+
[ m][ar] -> [ mar] 2182
|
2184 |
+
[iv][ed] -> [ived] 2183
|
2185 |
+
[ o][il] -> [ oil] 2184
|
2186 |
+
[our][ces] -> [ources] 2185
|
2187 |
+
[ಳ][ು] -> [ಳು] 2186
|
2188 |
+
[ ಉ][ತ್ತ] -> [ ಉತ್ತ] 2187
|
2189 |
+
[ip][s] -> [ips] 2188
|
2190 |
+
[ m][en] -> [ men] 2189
|
2191 |
+
[ N][av] -> [ Nav] 2190
|
2192 |
+
[ p][ur] -> [ pur] 2191
|
2193 |
+
[en][se] -> [ense] 2192
|
2194 |
+
[ ca][us] -> [ caus] 2193
|
2195 |
+
[ ][&] -> [ &] 2194
|
2196 |
+
[if][ied] -> [ified] 2195
|
2197 |
+
[ion][al] -> [ional] 2196
|
2198 |
+
[ u][ntil] -> [ until] 2197
|
2199 |
+
[ an][t] -> [ ant] 2198
|
2200 |
+
[ व][े] -> [ वे] 2199
|
2201 |
+
[ हु�][�] -> [ हुए] 2200
|
2202 |
+
[ar][ing] -> [aring] 2201
|
2203 |
+
[ा�][�] -> [ाण] 2202
|
2204 |
+
[ A][ll] -> [ All] 2203
|
2205 |
+
[id][ed] -> [ided] 2204
|
2206 |
+
[ m][et] -> [ met] 2205
|
2207 |
+
[ ke][ep] -> [ keep] 2206
|
2208 |
+
[ re][st] -> [ rest] 2207
|
2209 |
+
[al][k] -> [alk] 2208
|
2210 |
+
[ up][on] -> [ upon] 2209
|
2211 |
+
[ h][ard] -> [ hard] 2210
|
2212 |
+
[ A][nt] -> [ Ant] 2211
|
2213 |
+
[i][or] -> [ior] 2212
|
2214 |
+
[ m][at] -> [ mat] 2213
|
2215 |
+
[ rese][arch] -> [ research] 2214
|
2216 |
+
[ಾರ][ಂಭ] -> [ಾರಂಭ] 2215
|
2217 |
+
[್ಗಳ][ು] -> [್ಗಳು] 2216
|
2218 |
+
[ b][est] -> [ best] 2217
|
2219 |
+
[c][le] -> [cle] 2218
|
2220 |
+
[ie][ve] -> [ieve] 2219
|
2221 |
+
[ p][ap] -> [ pap] 2220
|
2222 |
+
[ೆ][ಂಬ] -> [ೆಂಬ] 2221
|
2223 |
+
[ೆ�][�] -> [ೆಮ] 2222
|
2224 |
+
[�][�] -> [ಅ] 2223
|
2225 |
+
[p][or] -> [por] 2224
|
2226 |
+
[w][are] -> [ware] 2225
|
2227 |
+
[c][ing] -> [cing] 2226
|
2228 |
+
[ो][म] -> [ोम] 2227
|
2229 |
+
[ bec][ome] -> [ become] 2228
|
2230 |
+
[ se][ver] -> [ sever] 2229
|
2231 |
+
[िय][न] -> [ियन] 2230
|
2232 |
+
[ater][ial] -> [aterial] 2231
|
2233 |
+
[क][्स] -> [क्स] 2232
|
2234 |
+
[ व][ह] -> [ वह] 2233
|
2235 |
+
[ t][re] -> [ tre] 2234
|
2236 |
+
[ e][y] -> [ ey] 2235
|
2237 |
+
[ W][ith] -> [ With] 2236
|
2238 |
+
[ includ][ing] -> [ including] 2237
|
2239 |
+
[ bl][ood] -> [ blood] 2238
|
2240 |
+
[ f][our] -> [ four] 2239
|
2241 |
+
[ಡ][ೆಯ] -> [ಡೆಯ] 2240
|
2242 |
+
[ಂತ][ೆ] -> [ಂತೆ] 2241
|
2243 |
+
[ or][der] -> [ order] 2242
|
2244 |
+
[ ][ident] -> [ ident] 2243
|
2245 |
+
[l][ing] -> [ling] 2244
|
2246 |
+
[ d][em] -> [ dem] 2245
|
2247 |
+
[स][र] -> [सर] 2246
|
2248 |
+
[�][़] -> [�़] 2247
|
2249 |
+
[ p][ar] -> [ par] 2248
|
2250 |
+
[ ल][ेक] -> [ लेक] 2249
|
2251 |
+
[ आ][ई] -> [ आई] 2250
|
2252 |
+
[ ne][ver] -> [ never] 2251
|
2253 |
+
[us][s] -> [uss] 2252
|
2254 |
+
[r][id] -> [rid] 2253
|
2255 |
+
[ೋ][ಟ] -> [ೋಟ] 2254
|
2256 |
+
[ he][ad] -> [ head] 2255
|
2257 |
+
[ो][न] -> [ोन] 2256
|
2258 |
+
[ ग][्र] -> [ ग्र] 2257
|
2259 |
+
[ उप][योग] -> [ उपयोग] 2258
|
2260 |
+
[ T][y] -> [ Ty] 2259
|
2261 |
+
[ect][ion] -> [ection] 2260
|
2262 |
+
[ c][ertain] -> [ certain] 2261
|
2263 |
+
[ R][ail] -> [ Rail] 2262
|
2264 |
+
[ ab][le] -> [ able] 2263
|
2265 |
+
[en][g] -> [eng] 2264
|
2266 |
+
[ ड][ी] -> [ डी] 2265
|
2267 |
+
[ वाल][े] -> [ वाले] 2266
|
2268 |
+
[ sy][m] -> [ sym] 2267
|
2269 |
+
[ ty][pe] -> [ type] 2268
|
2270 |
+
[ gen][es] -> [ genes] 2269
|
2271 |
+
[it][ions] -> [itions] 2270
|
2272 |
+
[ st][ate] -> [ state] 2271
|
2273 |
+
[ produ][ct] -> [ product] 2272
|
2274 |
+
[ m][ajor] -> [ major] 2273
|
2275 |
+
[ ][ess] -> [ ess] 2274
|
2276 |
+
[್�][�] -> [್ಜ] 2275
|
2277 |
+
[ಿ�][�] -> [ಿಜ] 2276
|
2278 |
+
[ th][ink] -> [ think] 2277
|
2279 |
+
[ उ][स] -> [ उस] 2278
|
2280 |
+
[ar][ning] -> [arning] 2279
|
2281 |
+
[ क][ु�] -> [ कु�] 2280
|
2282 |
+
[ e][l] -> [ el] 2281
|
2283 |
+
[ತ][್ರ] -> [ತ್ರ] 2282
|
2284 |
+
[ೀ][ಕ] -> [ೀಕ] 2283
|
2285 |
+
[ज][न] -> [जन] 2284
|
2286 |
+
[ ब][ार] -> [ बार] 2285
|
2287 |
+
[ M][r] -> [ Mr] 2286
|
2288 |
+
[un][e] -> [une] 2287
|
2289 |
+
[ स्क][ूल] -> [ स्कूल] 2288
|
2290 |
+
[ r][un] -> [ run] 2289
|
2291 |
+
[at][ic] -> [atic] 2290
|
2292 |
+
[ s][it] -> [ sit] 2291
|
2293 |
+
[ಿ�][�] -> [ಿಹ] 2292
|
2294 |
+
[ de][p] -> [ dep] 2293
|
2295 |
+
[ro][du] -> [rodu] 2294
|
2296 |
+
[ಕ][ೆ] -> [ಕೆ] 2295
|
2297 |
+
[िय][र] -> [ियर] 2296
|
2298 |
+
[ु][क्त] -> [ुक्त] 2297
|
2299 |
+
[ te][xt] -> [ text] 2298
|
2300 |
+
[ S][o] -> [ So] 2299
|
2301 |
+
[ ind][ividual] -> [ individual] 2300
|
2302 |
+
[ m][ark] -> [ mark] 2301
|
2303 |
+
[v][is] -> [vis] 2302
|
2304 |
+
[ಿದ್ದ][ರು] -> [ಿದ್ದರು] 2303
|
2305 |
+
[ ಕ][ೇ] -> [ ಕೇ] 2304
|
2306 |
+
[ स][ब] -> [ सब] 2305
|
2307 |
+
[ ल][ग] -> [ लग] 2306
|
2308 |
+
[ grou][p] -> [ group] 2307
|
2309 |
+
[ an][al] -> [ anal] 2308
|
2310 |
+
[ ल][ी] -> [ ली] 2309
|
2311 |
+
[े�][�] -> [ेख] 2310
|
2312 |
+
[ord][ing] -> [ording] 2311
|
2313 |
+
[ ret][urn] -> [ return] 2312
|
2314 |
+
[ N][ew] -> [ New] 2313
|
2315 |
+
[ soc][ial] -> [ social] 2314
|
2316 |
+
[oo][ks] -> [ooks] 2315
|
2317 |
+
[ ಕ][ಡ] -> [ ಕಡ] 2316
|
2318 |
+
[ಡೆ][ದ] -> [ಡೆದ] 2317
|
2319 |
+
[ 3][0] -> [ 30] 2318
|
2320 |
+
[ाइ][न] -> [ाइन] 2319
|
2321 |
+
[ l][ow] -> [ low] 2320
|
2322 |
+
[ sub][ject] -> [ subject] 2321
|
2323 |
+
[ द][े] -> [ दे] 2322
|
2324 |
+
[o][ad] -> [oad] 2323
|
2325 |
+
[ ए][म] -> [ एम] 2324
|
2326 |
+
[ ಬ][ೈ] -> [ ಬೈ] 2325
|
2327 |
+
[ develop][ment] -> [ development] 2326
|
2328 |
+
[ic][le] -> [icle] 2327
|
2329 |
+
[ c][ause] -> [ cause] 2328
|
2330 |
+
[if][y] -> [ify] 2329
|
2331 |
+
[ent][al] -> [ental] 2330
|
2332 |
+
[con][om] -> [conom] 2331
|
2333 |
+
[ partic][ular] -> [ particular] 2332
|
2334 |
+
[ sever][al] -> [ several] 2333
|
2335 |
+
[ c][reat] -> [ creat] 2334
|
2336 |
+
[ಗಳ][ಿಗೆ] -> [ಗಳಿಗೆ] 2335
|
2337 |
+
[ಗ][ೊಳ] -> [ಗೊಳ] 2336
|
2338 |
+
[ೊ][ಡ] -> [ೊಡ] 2337
|
2339 |
+
[ೆ][ಳ] -> [ೆಳ] 2338
|
2340 |
+
[ उन्ह][ें] -> [ उन्हें] 2339
|
2341 |
+
[f][ace] -> [face] 2340
|
2342 |
+
[eth][ing] -> [ething] 2341
|
2343 |
+
[iz][ation] -> [ization] 2342
|
2344 |
+
[ pre][vent] -> [ prevent] 2343
|
2345 |
+
[ಲ][ಾಗ] -> [ಲಾಗ] 2344
|
2346 |
+
[ೃ][ತ] -> [ೃತ] 2345
|
2347 |
+
[1][1] -> [11] 2346
|
2348 |
+
[ द][ौर] -> [ दौर] 2347
|
2349 |
+
[ man][ag] -> [ manag] 2348
|
2350 |
+
[ उन][के] -> [ उनके] 2349
|
2351 |
+
[h][ing] -> [hing] 2350
|
2352 |
+
[ a][way] -> [ away] 2351
|
2353 |
+
[ कर][ते] -> [ करते] 2352
|
2354 |
+
[ c][ash] -> [ cash] 2353
|
2355 |
+
[ N][arn] -> [ Narn] 2354
|
2356 |
+
[ includ][e] -> [ include] 2355
|
2357 |
+
[ s][ent] -> [ sent] 2356
|
2358 |
+
[ ಸ್ಥ][ಳ] -> [ ಸ್ಥಳ] 2357
|
2359 |
+
[ ಪ್ರ][ಾರಂಭ] -> [ ಪ್ರಾರಂಭ] 2358
|
2360 |
+
[ ಬ][್ರ] -> [ ಬ್ರ] 2359
|
2361 |
+
[angu][age] -> [anguage] 2360
|
2362 |
+
[ pro][per] -> [ proper] 2361
|
2363 |
+
[ ಪ್ರ][ಮ] -> [ ಪ್ರಮ] 2362
|
2364 |
+
[ 18][3] -> [ 183] 2363
|
2365 |
+
[u][se] -> [use] 2364
|
2366 |
+
[�][�] -> [—] 2365
|
2367 |
+
[ूर्�][��] -> [ूर्व] 2366
|
2368 |
+
[ि�][�] -> [िण] 2367
|
2369 |
+
[at][s] -> [ats] 2368
|
2370 |
+
[ab][ly] -> [ably] 2369
|
2371 |
+
[ other][s] -> [ others] 2370
|
2372 |
+
[ m][ove] -> [ move] 2371
|
2373 |
+
[ber][t] -> [bert] 2372
|
2374 |
+
[ f][ore] -> [ fore] 2373
|
2375 |
+
[ o][pp] -> [ opp] 2374
|
2376 |
+
[ qu][est] -> [ quest] 2375
|
2377 |
+
[ೇ][ಟ] -> [ೇಟ] 2376
|
2378 |
+
[ ಸಮ][ಯ] -> [ ಸಮಯ] 2377
|
2379 |
+
[ th][ings] -> [ things] 2378
|
2380 |
+
[ c][r] -> [ cr] 2379
|
2381 |
+
[vir][on] -> [viron] 2380
|
2382 |
+
[ b][ene] -> [ bene] 2381
|
2383 |
+
[ार्�][�] -> [ार्ट] 2382
|
2384 |
+
[ sec][ond] -> [ second] 2383
|
2385 |
+
[ inv][ol] -> [ invol] 2384
|
2386 |
+
[i][ans] -> [ians] 2385
|
2387 |
+
[ al][ways] -> [ always] 2386
|
2388 |
+
[m][an] -> [man] 2387
|
2389 |
+
[ b][us] -> [ bus] 2388
|
2390 |
+
[ ph][ys] -> [ phys] 2389
|
2391 |
+
[ tim][es] -> [ times] 2390
|
2392 |
+
[ 19][5] -> [ 195] 2391
|
2393 |
+
[ ज][न] -> [ जन] 2392
|
2394 |
+
[�][�़] -> [ढ़] 2393
|
2395 |
+
[ re][pl] -> [ repl] 2394
|
2396 |
+
[is][on] -> [ison] 2395
|
2397 |
+
[ स][ह] -> [ सह] 2396
|
2398 |
+
[y][l] -> [yl] 2397
|
2399 |
+
[ s][l] -> [ sl] 2398
|
2400 |
+
[ count][ry] -> [ country] 2399
|
2401 |
+
[ill][ion] -> [illion] 2400
|
2402 |
+
[ O][r] -> [ Or] 2401
|
2403 |
+
[ Euro][pe] -> [ Europe] 2402
|
2404 |
+
[ gener][al] -> [ general] 2403
|
2405 |
+
[ಿಗಳ][ನ್ನು] -> [ಿಗಳನ್ನು] 2404
|
2406 |
+
[am][es] -> [ames] 2405
|
2407 |
+
[ak][en] -> [aken] 2406
|
2408 |
+
[ro][p] -> [rop] 2407
|
2409 |
+
[ ne][xt] -> [ next] 2408
|
2410 |
+
[b][e] -> [be] 2409
|
2411 |
+
[c][er] -> [cer] 2410
|
2412 |
+
[ ಸ][್ಟ] -> [ ಸ್ಟ] 2411
|
2413 |
+
[ಬಹ][ುದು] -> [ಬಹುದು] 2412
|
2414 |
+
[ द][ू] -> [ दू] 2413
|
2415 |
+
[i][ents] -> [ients] 2414
|
2416 |
+
[ Com][m] -> [ Comm] 2415
|
2417 |
+
[ अ][ल] -> [ अल] 2416
|
2418 |
+
[�][�] -> [अ] 2417
|
2419 |
+
[ प्र][द] -> [ प्रद] 2418
|
2420 |
+
[eth][od] -> [ethod] 2419
|
2421 |
+
[or][th] -> [orth] 2420
|
2422 |
+
[ v][ol] -> [ vol] 2421
|
2423 |
+
[r][ation] -> [ration] 2422
|
2424 |
+
[ p][ri] -> [ pri] 2423
|
2425 |
+
[ ವ][ಿವ] -> [ ವಿವ] 2424
|
2426 |
+
[ೀ][ತ] -> [ೀತ] 2425
|
2427 |
+
[್�][�] -> [್ಹ] 2426
|
2428 |
+
[m][e] -> [me] 2427
|
2429 |
+
[ m][aterial] -> [ material] 2428
|
2430 |
+
[ s][ays] -> [ says] 2429
|
2431 |
+
[ e][le] -> [ ele] 2430
|
2432 |
+
[ कु�][�] -> [ कुछ] 2431
|
2433 |
+
[ ind][ust] -> [ indust] 2432
|
2434 |
+
[ p][ost] -> [ post] 2433
|
2435 |
+
[ಿನ][್] -> [ಿನ್] 2434
|
2436 |
+
[ श][्र] -> [ श्र] 2435
|
2437 |
+
[ le][arn] -> [ learn] 2436
|
2438 |
+
[ ris][k] -> [ risk] 2437
|
2439 |
+
[ै][न] -> [ैन] 2438
|
2440 |
+
[ फ][िल] -> [ फिल] 2439
|
2441 |
+
[v][ious] -> [vious] 2440
|
2442 |
+
[our][ce] -> [ource] 2441
|
2443 |
+
[ard][s] -> [ards] 2442
|
2444 |
+
[ imp][ro] -> [ impro] 2443
|
2445 |
+
[ st][at] -> [ stat] 2444
|
2446 |
+
[ಂತ][್ರ] -> [ಂತ್ರ] 2445
|
2447 |
+
[ರ][ೆಯ] -> [ರೆಯ] 2446
|
2448 |
+
[im][es] -> [imes] 2447
|
2449 |
+
[ लेक][िन] -> [ लेकिन] 2448
|
2450 |
+
[ d][one] -> [ done] 2449
|
2451 |
+
[im][ate] -> [imate] 2450
|
2452 |
+
[i][ver] -> [iver] 2451
|
2453 |
+
[ig][ure] -> [igure] 2452
|
2454 |
+
[ poss][ible] -> [ possible] 2453
|
2455 |
+
[ fam][ily] -> [ family] 2454
|
2456 |
+
[a][pe] -> [ape] 2455
|
2457 |
+
[ ಪೂರ್�][�] -> [ ಪೂರ್ವ] 2456
|
2458 |
+
[ಮ][್] -> [ಮ್] 2457
|
2459 |
+
[ र][ाष्ट्र] -> [ राष्ट्र] 2458
|
2460 |
+
[ु][त] -> [ुत] 2459
|
2461 |
+
[t][o] -> [to] 2460
|
2462 |
+
[ prof][ess] -> [ profess] 2461
|
2463 |
+
[viron][ment] -> [vironment] 2462
|
2464 |
+
[ ][Z] -> [ Z] 2463
|
2465 |
+
[ w][ind] -> [ wind] 2464
|
2466 |
+
[oc][us] -> [ocus] 2465
|
2467 |
+
[ul][ation] -> [ulation] 2466
|
2468 |
+
[ra][p] -> [rap] 2467
|
2469 |
+
[ ಅಧ][್ಯ] -> [ ಅಧ್ಯ] 2468
|
2470 |
+
[ಿ�][�] -> [ಿಧ] 2469
|
2471 |
+
[ ಇ][ತರ] -> [ ಇತರ] 2470
|
2472 |
+
[ಿಯ][ೋ] -> [ಿಯೋ] 2471
|
2473 |
+
[ wor][ds] -> [ words] 2472
|
2474 |
+
[em][ber] -> [ember] 2473
|
2475 |
+
[ O][ne] -> [ One] 2474
|
2476 |
+
[ ಟ][ಿ] -> [ ಟಿ] 2475
|
2477 |
+
[ 19][3] -> [ 193] 2476
|
2478 |
+
[್ಯ][ಾರ] -> [್ಯಾರ] 2477
|
2479 |
+
[ ಪ][ಂದ] -> [ ಪಂದ] 2478
|
2480 |
+
[at][ural] -> [atural] 2479
|
2481 |
+
[ is][s] -> [ iss] 2480
|
2482 |
+
[ C][an] -> [ Can] 2481
|
2483 |
+
[ ಹೊ][ಸ] -> [ ಹೊಸ] 2482
|
2484 |
+
[ा�][�] -> [ाफ] 2483
|
2485 |
+
[ pro][m] -> [ prom] 2484
|
2486 |
+
[ S][ome] -> [ Some] 2485
|
2487 |
+
[2][00] -> [200] 2486
|
2488 |
+
[A][D] -> [AD] 2487
|
2489 |
+
[ पुर][स्क] -> [ पुरस्क] 2488
|
2490 |
+
[ ह][ाल] -> [ हाल] 2489
|
2491 |
+
[ प्र][ाप्त] -> [ प्राप्त] 2490
|
2492 |
+
[t][t] -> [tt] 2491
|
2493 |
+
[ S][p] -> [ Sp] 2492
|
2494 |
+
[ som][ething] -> [ something] 2493
|
2495 |
+
[ A][fter] -> [ After] 2494
|
2496 |
+
[ A][d] -> [ Ad] 2495
|
2497 |
+
[al][ity] -> [ality] 2496
|
2498 |
+
[ spec][ial] -> [ special] 2497
|
2499 |
+
[ per][iod] -> [ period] 2498
|
2500 |
+
[ೇ][ಷ] -> [ೇಷ] 2499
|
2501 |
+
[ h][ouse] -> [ house] 2500
|
2502 |
+
[le][y] -> [ley] 2501
|
2503 |
+
[ to][p] -> [ top] 2502
|
2504 |
+
[ mon][th] -> [ month] 2503
|
2505 |
+
[ cour][se] -> [ course] 2504
|
2506 |
+
[ ಪ್ರದ][ೇಶ] -> [ ಪ್ರದೇಶ] 2505
|
2507 |
+
[ಾ�][�] -> [ಾಶ] 2506
|
2508 |
+
[ follow][ing] -> [ following] 2507
|
2509 |
+
[ स][ाम] -> [ साम] 2508
|
2510 |
+
[ to][get] -> [ toget] 2509
|
2511 |
+
[ toget][her] -> [ together] 2510
|
2512 |
+
[र्�][�] -> [र्स] 2511
|
2513 |
+
[ c][ells] -> [ cells] 2512
|
2514 |
+
[ पुरस्क][ार] -> [ पुरस्कार] 2513
|
2515 |
+
[ क][ई] -> [ कई] 2514
|
2516 |
+
[i][ot] -> [iot] 2515
|
2517 |
+
[ bene][f] -> [ benef] 2516
|
2518 |
+
[ an][im] -> [ anim] 2517
|
2519 |
+
[ hist][ory] -> [ history] 2518
|
2520 |
+
[are][nt] -> [arent] 2519
|
2521 |
+
[ Un][iversity] -> [ University] 2520
|
2522 |
+
[ ಎರಡ][ು] -> [ ಎರಡು] 2521
|
2523 |
+
[ೂ][ಪ] -> [ೂಪ] 2522
|
2524 |
+
[ व][िक] -> [ विक] 2523
|
2525 |
+
[ी][ल] -> [ील] 2524
|
2526 |
+
[ निर्�][�] -> [ निर्म] 2525
|
2527 |
+
[ಜ][ಾಂಟ] -> [ಜಾಂಟ] 2526
|
2528 |
+
[e][g] -> [eg] 2527
|
2529 |
+
[ E][arth] -> [ Earth] 2528
|
2530 |
+
[ dis][e] -> [ dise] 2529
|
2531 |
+
[್ಡ][್] -> [್ಡ್] 2530
|
2532 |
+
[ಾಲ][ಯ] -> [ಾಲಯ] 2531
|
2533 |
+
[ೀ][ನ] -> [ೀನ] 2532
|
2534 |
+
[ ನ][ಿಯ] -> [ ನಿಯ] 2533
|
2535 |
+
[ अम][ेर] -> [ अमेर] 2534
|
2536 |
+
[ l][ine] -> [ line] 2535
|
2537 |
+
[ w][r] -> [ wr] 2536
|
2538 |
+
[l][ish] -> [lish] 2537
|
2539 |
+
[ar][c] -> [arc] 2538
|
2540 |
+
[2][1] -> [21] 2539
|
2541 |
+
[ ಬೈ][ಜಾಂಟ] -> [ ಬೈಜಾಂಟ] 2540
|
2542 |
+
[ �][�] -> [ —] 2541
|
2543 |
+
[ic][ult] -> [icult] 2542
|
2544 |
+
[ R][es] -> [ Res] 2543
|
2545 |
+
[ wh][y] -> [ why] 2544
|
2546 |
+
[ ab][ove] -> [ above] 2545
|
2547 |
+
[ all][ow] -> [ allow] 2546
|
2548 |
+
[ d][ays] -> [ days] 2547
|
2549 |
+
[iel][d] -> [ield] 2548
|
2550 |
+
[ ಮೂಲ][ಕ] -> [ ಮೂಲಕ] 2549
|
2551 |
+
[ pro][ject] -> [ project] 2550
|
2552 |
+
[ am][ong] -> [ among] 2551
|
2553 |
+
[n][er] -> [ner] 2552
|
2554 |
+
[ or][ig] -> [ orig] 2553
|
2555 |
+
[ too][k] -> [ took] 2554
|
2556 |
+
[ n][ight] -> [ night] 2555
|
2557 |
+
[्�][�] -> [्द] 2556
|
2558 |
+
[ क][ह] -> [ कह] 2557
|
2559 |
+
[ ಅದ][ರ] -> [ ಅದರ] 2558
|
2560 |
+
[ s][w] -> [ sw] 2559
|
2561 |
+
[ress][ion] -> [ression] 2560
|
2562 |
+
[ fe][el] -> [ feel] 2561
|
2563 |
+
[ ][Q] -> [ Q] 2562
|
2564 |
+
[ me][as] -> [ meas] 2563
|
2565 |
+
[ to][ol] -> [ tool] 2564
|
2566 |
+
[in][ation] -> [ination] 2565
|
2567 |
+
[o][pe] -> [ope] 2566
|
2568 |
+
[ಫ][್] -> [ಫ್] 2567
|
2569 |
+
[�][�] -> [ಃ] 2568
|
2570 |
+
[ಕ್ರ][ಮ] -> [ಕ್ರಮ] 2569
|
2571 |
+
[ ಸ][್ಪ] -> [ ಸ್ಪ] 2570
|
2572 |
+
[ro][gen] -> [rogen] 2571
|
2573 |
+
[ c][ame] -> [ came] 2572
|
2574 |
+
[ ज][िन] -> [ जिन] 2573
|
2575 |
+
[ Ant][ony] -> [ Antony] 2574
|
2576 |
+
[ le][ft] -> [ left] 2575
|
2577 |
+
[ 2][5] -> [ 25] 2576
|
2578 |
+
[ su][ccess] -> [ success] 2577
|
2579 |
+
[ಸ][್ಯ] -> [ಸ್ಯ] 2578
|
2580 |
+
[ क्ल][ब] -> [ क्लब] 2579
|
2581 |
+
[ eng][ine] -> [ engine] 2580
|
2582 |
+
[ ह][ी] -> [ ही] 2581
|
2583 |
+
[ent][ly] -> [ently] 2582
|
2584 |
+
[i][k] -> [ik] 2583
|
2585 |
+
[her][n] -> [hern] 2584
|
2586 |
+
[ le][arning] -> [ learning] 2585
|
2587 |
+
[ರ್ಶ][ನ] -> [ರ್ಶನ] 2586
|
2588 |
+
[ C][ent] -> [ Cent] 2587
|
2589 |
+
[ d][el] -> [ del] 2588
|
2590 |
+
[ ha][ving] -> [ having] 2589
|
2591 |
+
[v][an] -> [van] 2590
|
2592 |
+
[ in][c] -> [ inc] 2591
|
2593 |
+
[ಿ�][�] -> [ಿಳ] 2592
|
2594 |
+
[ut][ure] -> [uture] 2593
|
2595 |
+
[ pl][ant] -> [ plant] 2594
|
2596 |
+
[c][rib] -> [crib] 2595
|
2597 |
+
[ंब][र] -> [ंबर] 2596
|
2598 |
+
[ो][क] -> [ोक] 2597
|
2599 |
+
[ं�][�] -> [ंख] 2598
|
2600 |
+
[th][ing] -> [thing] 2599
|
2601 |
+
[ spec][ies] -> [ species] 2600
|
2602 |
+
[ Rail][road] -> [ Railroad] 2601
|
2603 |
+
[ f][ar] -> [ far] 2602
|
2604 |
+
[ h][old] -> [ hold] 2603
|
2605 |
+
[ beg][in] -> [ begin] 2604
|
2606 |
+
[ det][erm] -> [ determ] 2605
|
2607 |
+
[ ಅ][ಡ] -> [ ಅಡ] 2606
|
2608 |
+
[ ಪಂದ][್ಯ] -> [ ಪಂದ್ಯ] 2607
|
2609 |
+
[ m][aking] -> [ making] 2608
|
2610 |
+
[ टी][म] -> [ टीम] 2609
|
2611 |
+
[ p][ot] -> [ pot] 2610
|
2612 |
+
[ರ][ಿ] -> [ರಿ] 2611
|
2613 |
+
[ ज][ै] -> [ जै] 2612
|
2614 |
+
[्य][ालय] -> [्यालय] 2613
|
2615 |
+
[ प][ी] -> [ पी] 2614
|
2616 |
+
[ अप][नी] -> [ अपनी] 2615
|
2617 |
+
[ �][�] -> [ ध] 2616
|
2618 |
+
[ am][ount] -> [ amount] 2617
|
2619 |
+
[r][an] -> [ran] 2618
|
2620 |
+
[ aut][hor] -> [ author] 2619
|
2621 |
+
[ treat][ment] -> [ treatment] 2620
|
2622 |
+
[ sc][ient] -> [ scient] 2621
|
2623 |
+
[ಣ][ೆ] -> [ಣೆ] 2622
|
2624 |
+
[ವ][ರೆ] -> [ವರೆ] 2623
|
2625 |
+
[ಾರ][್ಕ] -> [ಾರ್ಕ] 2624
|
2626 |
+
[ सब][से] -> [ सबसे] 2625
|
2627 |
+
[ अन][्य] -> [ अन्य] 2626
|
2628 |
+
[्र][ै] -> [्रै] 2627
|
2629 |
+
[ell][a] -> [ella] 2628
|
2630 |
+
[ f][ocus] -> [ focus] 2629
|
2631 |
+
[pt][ion] -> [ption] 2630
|
2632 |
+
[ again][st] -> [ against] 2631
|
2633 |
+
[ wom][en] -> [ women] 2632
|
2634 |
+
[ಾಷ][್ಟ್ರ] -> [ಾಷ್ಟ್ರ] 2633
|
2635 |
+
[ ಸಂ][ಪ] -> [ ಸಂಪ] 2634
|
2636 |
+
[ि�][�] -> [िध] 2635
|
2637 |
+
[्र][ी] -> [्री] 2636
|
2638 |
+
[�][�] -> [आ] 2637
|
2639 |
+
[s][w] -> [sw] 2638
|
2640 |
+
[ ಬಳ][ಸ] -> [ ಬಳಸ] 2639
|
2641 |
+
[ se][en] -> [ seen] 2640
|
2642 |
+
[म][ान] -> [मान] 2641
|
2643 |
+
[ s][n] -> [ sn] 2642
|
2644 |
+
[ f][in] -> [ fin] 2643
|
2645 |
+
[ al][ong] -> [ along] 2644
|
2646 |
+
[it][her] -> [ither] 2645
|
2647 |
+
[pe][cial] -> [pecial] 2646
|
2648 |
+
[ೇ][ಜ] -> [ೇಜ] 2647
|
2649 |
+
[ ಸಾಮ][ಾನ್ಯ] -> [ ಸಾಮಾನ್ಯ] 2648
|
2650 |
+
[ಲ][್ಪ] -> [ಲ್ಪ] 2649
|
2651 |
+
[ �][�] -> [ ಧ] 2650
|
2652 |
+
[ಿದ್ದ][ಾರೆ] -> [ಿದ್ದಾರೆ] 2651
|
2653 |
+
[ c][ult] -> [ cult] 2652
|
2654 |
+
[ l][ist] -> [ list] 2653
|
2655 |
+
[ರ್�][�] -> [ರ್ಟ] 2654
|
2656 |
+
[ಿಕ][ಾರ] -> [ಿಕಾರ] 2655
|
2657 |
+
[ E][n] -> [ En] 2656
|
2658 |
+
[ic][ro] -> [icro] 2657
|
2659 |
+
[ श][हर] -> [ शहर] 2658
|
2660 |
+
[ उत][्प] -> [ उत्प] 2659
|
2661 |
+
[ d][am] -> [ dam] 2660
|
2662 |
+
[ इस][े] -> [ इसे] 2661
|
2663 |
+
[ ಹೊ][ರ] -> [ ಹೊರ] 2662
|
2664 |
+
[ಲ][ೆ] -> [ಲೆ] 2663
|
2665 |
+
[ಾ�][�] -> [ಾಬ] 2664
|
2666 |
+
[ ज][ारी] -> [ जारी] 2665
|
2667 |
+
[र्�][�] -> [र्ट] 2666
|
2668 |
+
[ p][ract] -> [ pract] 2667
|
2669 |
+
[um][ent] -> [ument] 2668
|
2670 |
+
[ t][ell] -> [ tell] 2669
|
2671 |
+
[ 18][6] -> [ 186] 2670
|
2672 |
+
[र्�][�] -> [र्म] 2671
|
2673 |
+
[ sh][are] -> [ share] 2672
|
2674 |
+
[ m][ethod] -> [ method] 2673
|
2675 |
+
[ chang][es] -> [ changes] 2674
|
2676 |
+
[ t][ry] -> [ try] 2675
|
2677 |
+
[ ass][oci] -> [ associ] 2676
|
2678 |
+
[ �][�] -> [ ಘ] 2677
|
2679 |
+
[ ನಡ][ುವ] -> [ ನಡುವ] 2678
|
2680 |
+
[ prog][ram] -> [ program] 2679
|
2681 |
+
[त][र] -> [तर] 2680
|
2682 |
+
[ add][ition] -> [ addition] 2681
|
2683 |
+
[ c][ut] -> [ cut] 2682
|
2684 |
+
[ G][e] -> [ Ge] 2683
|
2685 |
+
[ र][ख] -> [ रख] 2684
|
2686 |
+
[er][al] -> [eral] 2685
|
2687 |
+
[िय][म] -> [ियम] 2686
|
2688 |
+
[ि�][�] -> [िख] 2687
|
2689 |
+
[ Her][bert] -> [ Herbert] 2688
|
2690 |
+
[om][m] -> [omm] 2689
|
2691 |
+
[a][im] -> [aim] 2690
|
2692 |
+
[ to][day] -> [ today] 2691
|
2693 |
+
[ acc][ess] -> [ access] 2692
|
2694 |
+
[ educ][ation] -> [ education] 2693
|
2695 |
+
[ re][ce] -> [ rece] 2694
|
2696 |
+
[ಿಸ][ಿ] -> [ಿಸಿ] 2695
|
2697 |
+
[ಿಕ][ೆಯ] -> [ಿಕೆಯ] 2696
|
2698 |
+
[ Wh][ile] -> [ While] 2697
|
2699 |
+
[ occ][ur] -> [ occur] 2698
|
2700 |
+
[en][n] -> [enn] 2699
|
2701 |
+
[ we][e] -> [ wee] 2700
|
2702 |
+
[े�][�] -> [ेद] 2701
|
2703 |
+
[ೀವ][ನ] -> [ೀವನ] 2702
|
2704 |
+
[ಿಸಲ][ಾಗ] -> [ಿಸಲಾಗ] 2703
|
2705 |
+
[ಬ][್ಲ] -> [ಬ್ಲ] 2704
|
2706 |
+
[ क][्र] -> [ क्र] 2705
|
2707 |
+
[ sk][ills] -> [ skills] 2706
|
2708 |
+
[ l][anguage] -> [ language] 2707
|
2709 |
+
[ f][ac] -> [ fac] 2708
|
2710 |
+
[i][um] -> [ium] 2709
|
2711 |
+
[ char][acter] -> [ character] 2710
|
2712 |
+
[ म][िल] -> [ मिल] 2711
|
2713 |
+
[िक][्ष] -> [िक्ष] 2712
|
2714 |
+
[p][ar] -> [par] 2713
|
2715 |
+
[th][ough] -> [though] 2714
|
2716 |
+
[ B][y] -> [ By] 2715
|
2717 |
+
[ bec][ame] -> [ became] 2716
|
2718 |
+
[ a][ge] -> [ age] 2717
|
2719 |
+
[ ಒಳ][ಗೊಂಡ] -> [ ಒಳಗೊಂಡ] 2718
|
2720 |
+
[ ಅ][ಭ] -> [ ಅಭ] 2719
|
2721 |
+
[स][्थ] -> [स्थ] 2720
|
2722 |
+
[te][nd] -> [tend] 2721
|
2723 |
+
[o][ver] -> [over] 2722
|
2724 |
+
[ S][y] -> [ Sy] 2723
|
2725 |
+
[ G][erm] -> [ Germ] 2724
|
2726 |
+
[ward][s] -> [wards] 2725
|
2727 |
+
[ir][d] -> [ird] 2726
|
2728 |
+
[ प्र][श] -> [ प्रश] 2727
|
2729 |
+
[ diff][icult] -> [ difficult] 2728
|
2730 |
+
[ g][iven] -> [ given] 2729
|
2731 |
+
[a][z] -> [az] 2730
|
2732 |
+
[is][ion] -> [ision] 2731
|
2733 |
+
[ ಕ][ಾಲ] -> [ ಕಾಲ] 2732
|
2734 |
+
[ತ][ೆ] -> [ತೆ] 2733
|
2735 |
+
[en][cy] -> [ency] 2734
|
2736 |
+
[ಿಂ][ಗ] -> [ಿಂಗ] 2735
|
2737 |
+
[�][�] -> [ष] 2736
|
2738 |
+
[ं][क] -> [ंक] 2737
|
2739 |
+
[ b][i] -> [ bi] 2738
|
2740 |
+
[ Un][ited] -> [ United] 2739
|
2741 |
+
[ comp][ut] -> [ comput] 2740
|
2742 |
+
[e][c] -> [ec] 2741
|
2743 |
+
[v][iew] -> [view] 2742
|
2744 |
+
[ st][ruct] -> [ struct] 2743
|
2745 |
+
[gg][est] -> [ggest] 2744
|
2746 |
+
[ contro][l] -> [ control] 2745
|
2747 |
+
[ I][I] -> [ II] 2746
|
2748 |
+
[ರ್�][�] -> [ರ್ಥ] 2747
|
2749 |
+
[ ex][erc] -> [ exerc] 2748
|
2750 |
+
[g][ar] -> [gar] 2749
|
2751 |
+
[ c][ost] -> [ cost] 2750
|
2752 |
+
[ib][ility] -> [ibility] 2751
|
2753 |
+
[ k][ind] -> [ kind] 2752
|
2754 |
+
[र्�][�] -> [र्श] 2753
|
2755 |
+
[og][n] -> [ogn] 2754
|
2756 |
+
[ de][v] -> [ dev] 2755
|
2757 |
+
[ri][end] -> [riend] 2756
|
2758 |
+
[ Ty][pe] -> [ Type] 2757
|
2759 |
+
[ f][re] -> [ fre] 2758
|
2760 |
+
[क्र][म] -> [क्रम] 2759
|
2761 |
+
[ हो][ने] -> [ होने] 2760
|
2762 |
+
[ t][ells] -> [ tells] 2761
|
2763 |
+
[ cur][rent] -> [ current] 2762
|
2764 |
+
[re][am] -> [ream] 2763
|
2765 |
+
[il][es] -> [iles] 2764
|
2766 |
+
[ B][rit] -> [ Brit] 2765
|
2767 |
+
[ ph][ot] -> [ phot] 2766
|
2768 |
+
[ಿಸ][ಿಕೊಂಡ] -> [ಿಸಿಕೊಂಡ] 2767
|
2769 |
+
[ಿನ][ಲ್ಲಿ] -> [ಿನಲ್ಲಿ] 2768
|
2770 |
+
[ f][uture] -> [ future] 2769
|
2771 |
+
[ p][ress] -> [ press] 2770
|
2772 |
+
[o][id] -> [oid] 2771
|
2773 |
+
[ b][at] -> [ bat] 2772
|
2774 |
+
[led][ge] -> [ledge] 2773
|
2775 |
+
[ खिल][ाड़] -> [ खिलाड़] 2774
|
2776 |
+
[re][g] -> [reg] 2775
|
2777 |
+
[ consid][er] -> [ consider] 2776
|
2778 |
+
[ a][ffect] -> [ affect] 2777
|
2779 |
+
[ sp][ace] -> [ space] 2778
|
2780 |
+
[ inf][l] -> [ infl] 2779
|
2781 |
+
[್ದ][ೇಶ] -> [್ದೇಶ] 2780
|
2782 |
+
[್�][�] -> [್ಣ] 2781
|
2783 |
+
[ು][ಡ] -> [ುಡ] 2782
|
2784 |
+
[ wor][ks] -> [ works] 2783
|
2785 |
+
[)][:] -> [):] 2784
|
2786 |
+
[िक][ी] -> [िकी] 2785
|
2787 |
+
[ort][un] -> [ortun] 2786
|
2788 |
+
[in][c] -> [inc] 2787
|
2789 |
+
[ su][re] -> [ sure] 2788
|
2790 |
+
[ govern][ment] -> [ government] 2789
|
2791 |
+
[ಿಕ][ೊಳ್ಳ] -> [ಿಕೊಳ್ಳ] 2790
|
2792 |
+
[s][ide] -> [side] 2791
|
2793 |
+
[op][s] -> [ops] 2792
|
2794 |
+
[pl][oy] -> [ploy] 2793
|
2795 |
+
[ ಒ][ಪ್ಪ] -> [ ಒಪ್ಪ] 2794
|
2796 |
+
[ f][ire] -> [ fire] 2795
|
2797 |
+
[ उ][द] -> [ उद] 2796
|
2798 |
+
[ prov][ide] -> [ provide] 2797
|
2799 |
+
[ ex][ist] -> [ exist] 2798
|
2800 |
+
[ str][ong] -> [ strong] 2799
|
2801 |
+
[om][es] -> [omes] 2800
|
2802 |
+
[ level][s] -> [ levels] 2801
|
2803 |
+
[ d][om] -> [ dom] 2802
|
2804 |
+
[ W][or] -> [ Wor] 2803
|
2805 |
+
[or][ies] -> [ories] 2804
|
2806 |
+
[ mod][el] -> [ model] 2805
|
2807 |
+
[ are][as] -> [ areas] 2806
|
2808 |
+
[ಾಕ][್] -> [ಾಕ್] 2807
|
2809 |
+
[ ಪ್ರ][ಯ] -> [ ಪ್ರಯ] 2808
|
2810 |
+
[ಿಂ][ತ] -> [ಿಂತ] 2809
|
2811 |
+
[ಾ�][�] -> [ಾಳ] 2810
|
2812 |
+
[ d][r] -> [ dr] 2811
|
2813 |
+
[ಾರ][ರ] -> [ಾರರ] 2812
|
2814 |
+
[ ಸ][ುರ] -> [ ಸುರ] 2813
|
2815 |
+
[1][3] -> [13] 2814
|
2816 |
+
[ सं�][�] -> [ संय] 2815
|
2817 |
+
[ु�][�] -> [ुआ] 2816
|
2818 |
+
[w][h] -> [wh] 2817
|
2819 |
+
[ av][ail] -> [ avail] 2818
|
2820 |
+
[ en][ough] -> [ enough] 2819
|
2821 |
+
[ag][n] -> [agn] 2820
|
2822 |
+
[ s][old] -> [ sold] 2821
|
2823 |
+
[ f][at] -> [ fat] 2822
|
2824 |
+
[ su][ggest] -> [ suggest] 2823
|
2825 |
+
[ ಇದ][ನ್ನು] -> [ ಇದನ್ನು] 2824
|
2826 |
+
[ೋ][ನ] -> [ೋನ] 2825
|
2827 |
+
[ೋ][ಡ] -> [ೋಡ] 2826
|
2828 |
+
[್ಯ][ಾಸ] -> [್ಯಾಸ] 2827
|
2829 |
+
[�][�] -> [इ] 2828
|
2830 |
+
[1][4] -> [14] 2829
|
2831 |
+
[ wor][d] -> [ word] 2830
|
2832 |
+
[ು][ಖ] -> [ುಖ] 2831
|
2833 |
+
[ h][ig] -> [ hig] 2832
|
2834 |
+
[ि�][�] -> [िड] 2833
|
2835 |
+
[ व][ी] -> [ वी] 2834
|
2836 |
+
[ C][le] -> [ Cle] 2835
|
2837 |
+
[ot][s] -> [ots] 2836
|
2838 |
+
[ ne][cess] -> [ necess] 2837
|
2839 |
+
[olog][ical] -> [ological] 2838
|
2840 |
+
[e][ad] -> [ead] 2839
|
2841 |
+
[ though][t] -> [ thought] 2840
|
2842 |
+
[ W][ar] -> [ War] 2841
|
2843 |
+
[ imp][act] -> [ impact] 2842
|
2844 |
+
[ ಪ][ಾಲ] -> [ ಪಾಲ] 2843
|
2845 |
+
[ दौर][ान] -> [ दौरान] 2844
|
2846 |
+
[ें][ट] -> [ेंट] 2845
|
2847 |
+
[र][ो] -> [रो] 2846
|
2848 |
+
[ you][ng] -> [ young] 2847
|
2849 |
+
[ಬ][ಂಧ] -> [ಬಂಧ] 2848
|
2850 |
+
[ fun][ction] -> [ function] 2849
|
2851 |
+
[र्�][�] -> [र्ज] 2850
|
2852 |
+
[ be][low] -> [ below] 2851
|
2853 |
+
[ w][ent] -> [ went] 2852
|
2854 |
+
[ain][ed] -> [ained] 2853
|
2855 |
+
[ inv][est] -> [ invest] 2854
|
2856 |
+
[1][5] -> [15] 2855
|
2857 |
+
[ rep][res] -> [ repres] 2856
|
2858 |
+
[ol][ution] -> [olution] 2857
|
2859 |
+
[ sh][ort] -> [ short] 2858
|
2860 |
+
[sel][ves] -> [selves] 2859
|
2861 |
+
[ re][ason] -> [ reason] 2860
|
2862 |
+
[ ವ][ಿದ] -> [ ವಿದ] 2861
|
2863 |
+
[ ��ुर][ू] -> [ शुरू] 2862
|
2864 |
+
[ j][oint] -> [ joint] 2863
|
2865 |
+
[ C][al] -> [ Cal] 2864
|
2866 |
+
[id][d] -> [idd] 2865
|
2867 |
+
[z][e] -> [ze] 2866
|
2868 |
+
[ರ][ಿಯ] -> [ರಿಯ] 2867
|
2869 |
+
[ how][ever] -> [ however] 2868
|
2870 |
+
[ t][aken] -> [ taken] 2869
|
2871 |
+
[ उत्प][ाद] -> [ उत्पाद] 2870
|
2872 |
+
[ sur][face] -> [ surface] 2871
|
2873 |
+
[ ne][g] -> [ neg] 2872
|
2874 |
+
[ l][ive] -> [ live] 2873
|
2875 |
+
[ ne][ar] -> [ near] 2874
|
2876 |
+
[ d][ra] -> [ dra] 2875
|
2877 |
+
[l][ished] -> [lished] 2876
|
2878 |
+
[ ra][d] -> [ rad] 2877
|
2879 |
+
[os][p] -> [osp] 2878
|
2880 |
+
[il][ar] -> [ilar] 2879
|
2881 |
+
[ ಹ][ೆಸ] -> [ ಹೆಸ] 2880
|
2882 |
+
[ प][ूर्व] -> [ पूर्व] 2881
|
2883 |
+
[ विश][्व] -> [ विश्व] 2882
|
2884 |
+
[2][2] -> [22] 2883
|
2885 |
+
[ c][ir] -> [ cir] 2884
|
2886 |
+
[ per][cent] -> [ percent] 2885
|
2887 |
+
[ b][ra] -> [ bra] 2886
|
2888 |
+
[र्�][�] -> [र्त] 2887
|
2889 |
+
[्य][म] -> [्यम] 2888
|
2890 |
+
[ आ][व] -> [ आव] 2889
|
2891 |
+
[्�][�] -> [्ड] 2890
|
2892 |
+
[ॉ][न] -> [ॉन] 2891
|
2893 |
+
[em][m] -> [emm] 2892
|
2894 |
+
[ं][स] -> [ंस] 2893
|
2895 |
+
[id][ence] -> [idence] 2894
|
2896 |
+
[um][e] -> [ume] 2895
|
2897 |
+
[ incre][ase] -> [ increase] 2896
|
2898 |
+
[is][ing] -> [ising] 2897
|
2899 |
+
[ ar][m] -> [ arm] 2898
|
2900 |
+
[ St][ates] -> [ States] 2899
|
2901 |
+
[ಲ][ವ] -> [ಲವ] 2900
|
2902 |
+
[್�][�] -> [್ಫ] 2901
|
2903 |
+
[ಿಮ][ೆ] -> [ಿಮೆ] 2902
|
2904 |
+
[ೋ][ಸ] -> [ೋಸ] 2903
|
2905 |
+
[र्�][�] -> [र्थ] 2904
|
2906 |
+
[on][es] -> [ones] 2905
|
2907 |
+
[ ಸ][ರಣ] -> [ ಸರಣ] 2906
|
2908 |
+
[ಮ][ಾನ] -> [ಮಾನ] 2907
|
2909 |
+
[ि�][�] -> [िह] 2908
|
2910 |
+
[ f][oot] -> [ foot] 2909
|
2911 |
+
[ ][ill] -> [ ill] 2910
|
2912 |
+
[m][ost] -> [most] 2911
|
2913 |
+
[ಗ][್ರ] -> [ಗ್ರ] 2912
|
2914 |
+
[ whe][ther] -> [ whether] 2913
|
2915 |
+
[ ಡ][ಿ] -> [ ಡಿ] 2914
|
2916 |
+
[ದ][್ಧ] -> [ದ್ಧ] 2915
|
2917 |
+
[ सम][य] -> [ समय] 2916
|
2918 |
+
[र्�][�] -> [र्ष] 2917
|
2919 |
+
[ m][ach] -> [ mach] 2918
|
2920 |
+
[ like][ly] -> [ likely] 2919
|
2921 |
+
[ Com][p] -> [ Comp] 2920
|
2922 |
+
[ೇ][ಖ] -> [ೇಖ] 2921
|
2923 |
+
[ग][ा] -> [गा] 2922
|
2924 |
+
[ंद][्र] -> [ंद्र] 2923
|
2925 |
+
[ en][vironment] -> [ environment] 2924
|
2926 |
+
[g][ed] -> [ged] 2925
|
2927 |
+
[ हु�][�] -> [ हुआ] 2926
|
2928 |
+
[ Narn][ia] -> [ Narnia] 2927
|
2929 |
+
[ de][ath] -> [ death] 2928
|
2930 |
+
[ M][ed] -> [ Med] 2929
|
2931 |
+
[ ಎ][ಂ] -> [ ಎಂ] 2930
|
2932 |
+
[ 200][8] -> [ 2008] 2931
|
2933 |
+
[ರ್�][�] -> [ರ್ಡ] 2932
|
2934 |
+
[ T][e] -> [ Te] 2933
|
2935 |
+
[il][ities] -> [ilities] 2934
|
2936 |
+
[oo][king] -> [ooking] 2935
|
2937 |
+
[ k][ids] -> [ kids] 2936
|
2938 |
+
[ s][ound] -> [ sound] 2937
|
2939 |
+
[ sa][w] -> [ saw] 2938
|
2940 |
+
[ l][ater] -> [ later] 2939
|
2941 |
+
[ d][est] -> [ dest] 2940
|
2942 |
+
[ system][s] -> [ systems] 2941
|
2943 |
+
[il][t] -> [ilt] 2942
|
2944 |
+
[is][ed] -> [ised] 2943
|
2945 |
+
[ bus][iness] -> [ business] 2944
|
2946 |
+
[ ಪ್ರ][ವ] -> [ ಪ್ರವ] 2945
|
2947 |
+
[ ಮು][ಖ್ಯ] -> [ ಮುಖ್ಯ] 2946
|
2948 |
+
[ ರ][ೈ] -> [ ರೈ] 2947
|
2949 |
+
[ ಸಮಯ][ದಲ್ಲಿ] -> [ ಸಮಯದಲ್ಲಿ] 2948
|
2950 |
+
[ h][ands] -> [ hands] 2949
|
2951 |
+
[ ಹ][ೇಳ] -> [ ಹೇಳ] 2950
|
2952 |
+
[ pap][er] -> [ paper] 2951
|
2953 |
+
[ಿಸ][್] -> [ಿಸ್] 2952
|
2954 |
+
[ l][aw] -> [ law] 2953
|
2955 |
+
[ proble][m] -> [ problem] 2954
|
2956 |
+
[ d][ri] -> [ dri] 2955
|
2957 |
+
[ore][d] -> [ored] 2956
|
2958 |
+
[ v][iew] -> [ view] 2957
|
2959 |
+
[ತ][ಿಹ] -> [ತಿಹ] 2958
|
2960 |
+
[ತಿಹ][ಾಸ] -> [ತಿಹಾಸ] 2959
|
2961 |
+
[ 200][7] -> [ 2007] 2960
|
2962 |
+
[am][p] -> [amp] 2961
|
2963 |
+
[ D][r] -> [ Dr] 2962
|
2964 |
+
[ ವ][ೈ] -> [ ವೈ] 2963
|
2965 |
+
[ cle][an] -> [ clean] 2964
|
2966 |
+
[ic][ation] -> [ication] 2965
|
2967 |
+
[ app][lic] -> [ applic] 2966
|
2968 |
+
[ ][ver] -> [ ver] 2967
|
2969 |
+
[ರ್�][�] -> [ರ್ಚ] 2968
|
2970 |
+
[ Th][at] -> [ That] 2969
|
2971 |
+
[as][ing] -> [asing] 2970
|
2972 |
+
[ ty][pes] -> [ types] 2971
|
2973 |
+
[ cent][ury] -> [ century] 2972
|
2974 |
+
[pecial][ly] -> [pecially] 2973
|
2975 |
+
[ ಆ][ಯ] -> [ ಆಯ] 2974
|
2976 |
+
[ು][ಲ] -> [ುಲ] 2975
|
2977 |
+
[ ಯ][ಾವ] -> [ ಯಾವ] 2976
|
2978 |
+
[et][y] -> [ety] 2977
|
2979 |
+
[ le][ast] -> [ least] 2978
|
2980 |
+
[ ಸಂ][ಬಂಧ] -> [ ಸಂಬಂಧ] 2979
|
2981 |
+
[ s][ing] -> [ sing] 2980
|
2982 |
+
[ v][i] -> [ vi] 2981
|
2983 |
+
[ ra][il] -> [ rail] 2982
|
2984 |
+
[ avail][able] -> [ available] 2983
|
2985 |
+
[I][V] -> [IV] 2984
|
2986 |
+
[ ಸುರ][ಂಗ] -> [ ಸುರಂಗ] 2985
|
2987 |
+
[ w][at] -> [ wat] 2986
|
2988 |
+
[ sign][ific] -> [ signific] 2987
|
2989 |
+
[ w][ays] -> [ ways] 2988
|
2990 |
+
[ ಯ][ು] -> [ ಯು] 2989
|
2991 |
+
[್ಯಾಂ][ಡ್] -> [್ಯಾಂಡ್] 2990
|
2992 |
+
[ su][per] -> [ super] 2991
|
2993 |
+
[ul][es] -> [ules] 2992
|
2994 |
+
[o][ice] -> [oice] 2993
|
2995 |
+
[y][n] -> [yn] 2994
|
2996 |
+
[1][6] -> [16] 2995
|
2997 |
+
[ो][ज] -> [ोज] 2996
|
2998 |
+
[m][ber] -> [mber] 2997
|
2999 |
+
[ru][ction] -> [ruction] 2998
|
3000 |
+
[ cre][ate] -> [ create] 2999
|
3001 |
+
[ us][es] -> [ uses] 3000
|
3002 |
+
[ g][ive] -> [ give] 3001
|
3003 |
+
[ proble][ms] -> [ problems] 3002
|
3004 |
+
[u][ed] -> [ued] 3003
|
3005 |
+
[ದ][ು] -> [ದು] 3004
|
3006 |
+
[ ಕ][್ಲ] -> [ ಕ್ಲ] 3005
|
3007 |
+
[್ವ][ಹ] -> [್ವಹ] 3006
|
3008 |
+
[ ಉತ್ತ][ರ] -> [ ಉತ್ತರ] 3007
|
3009 |
+
[ಾಗಿದ್ದ][ರು] -> [ಾಗಿದ್ದರು] 3008
|
3010 |
+
[्�][�] -> [्ञ] 3009
|
3011 |
+
[ an][sw] -> [ answ] 3010
|
3012 |
+
[ म][ूल] -> [ मूल] 3011
|
3013 |
+
[ ex][pect] -> [ expect] 3012
|
3014 |
+
[ಂ][ಚ] -> [ಂಚ] 3013
|
3015 |
+
[ स्थ][ित] -> [ स्थित] 3014
|
3016 |
+
[an][ces] -> [ances] 3015
|
3017 |
+
[e][b] -> [eb] 3016
|
3018 |
+
[ो][व] -> [ोव] 3017
|
3019 |
+
[ इस][के] -> [ इसके] 3018
|
3020 |
+
[er][y] -> [ery] 3019
|
3021 |
+
[ure][d] -> [ured] 3020
|
3022 |
+
[ M][iss] -> [ Miss] 3021
|
3023 |
+
[al][t] -> [alt] 3022
|
3024 |
+
[ab][ility] -> [ability] 3023
|
3025 |
+
[ off][ic] -> [ offic] 3024
|
3026 |
+
[t][al] -> [tal] 3025
|
3027 |
+
[ dig][est] -> [ digest] 3026
|
3028 |
+
[ l][ay] -> [ lay] 3027
|
3029 |
+
[ ಅ][ಪ] -> [ ಅಪ] 3028
|
3030 |
+
[ ಆ][ಡ] -> [ ಆಡ] 3029
|
3031 |
+
[ೋ][ದ] -> [ೋದ] 3030
|
3032 |
+
[ ವ][ಿರ] -> [ ವಿರ] 3031
|
3033 |
+
[ु�][�] -> [ुट] 3032
|
3034 |
+
[ n][atural] -> [ natural] 3033
|
3035 |
+
[ t][ru] -> [ tru] 3034
|
3036 |
+
[ S][oc] -> [ Soc] 3035
|
3037 |
+
[ ज][िल] -> [ जिल] 3036
|
3038 |
+
[ कर][ना] -> [ करना] 3037
|
3039 |
+
[ M][ay] -> [ May] 3038
|
3040 |
+
[os][is] -> [osis] 3039
|
3041 |
+
[ e][conom] -> [ econom] 3040
|
3042 |
+
[ col][le] -> [ colle] 3041
|
3043 |
+
[ ಸ][ಿ] -> [ ಸಿ] 3042
|
3044 |
+
[ ಕಡ][ಿಮೆ] -> [ ಕಡಿಮೆ] 3043
|
3045 |
+
[ ಮು][ಂದ] -> [ ಮುಂದ] 3044
|
3046 |
+
[ಾನ][ೆ] -> [ಾನೆ] 3045
|
3047 |
+
[ರ][ೂ] -> [ರೂ] 3046
|
3048 |
+
[ब][ॉल] -> [बॉल] 3047
|
3049 |
+
[क्ष][िण] -> [क्षिण] 3048
|
3050 |
+
[ m][akes] -> [ makes] 3049
|
3051 |
+
[at][or] -> [ator] 3050
|
3052 |
+
[ 1][00] -> [ 100] 3051
|
3053 |
+
[ ರ][ಲ್ಲಿ] -> [ ರಲ್ಲಿ] 3052
|
3054 |
+
[ writ][ing] -> [ writing] 3053
|
3055 |
+
[ spec][ific] -> [ specific] 3054
|
3056 |
+
[ap][s] -> [aps] 3055
|
3057 |
+
[ �][�] -> [ ई] 3056
|
3058 |
+
[ G][en] -> [ Gen] 3057
|
3059 |
+
[ap][an] -> [apan] 3058
|
3060 |
+
[w][w] -> [ww] 3059
|
3061 |
+
[ b][ased] -> [ based] 3060
|
3062 |
+
[ exper][ience] -> [ experience] 3061
|
3063 |
+
[ produ][ce] -> [ produce] 3062
|
3064 |
+
[ಾಷ್ಟ್ರ][ೀಯ] -> [ಾಷ್ಟ್ರೀಯ] 3063
|
3065 |
+
[ು][ತ] -> [ುತ] 3064
|
3066 |
+
[ೀ][ಟ] -> [ೀಟ] 3065
|
3067 |
+
[�][�] -> [ಆ] 3066
|
3068 |
+
[ pro][b] -> [ prob] 3067
|
3069 |
+
[ s][ide] -> [ side] 3068
|
3070 |
+
[ ल][ोग] -> [ लोग] 3069
|
3071 |
+
[ t][er] -> [ ter] 3070
|
3072 |
+
[in][a] -> [ina] 3071
|
3073 |
+
[ Americ][a] -> [ America] 3072
|
3074 |
+
[ g][reen] -> [ green] 3073
|
3075 |
+
[ा�][�] -> [ाश] 3074
|
3076 |
+
[े�][�] -> [ेय] 3075
|
3077 |
+
[ P][ip] -> [ Pip] 3076
|
3078 |
+
[ ][er] -> [ er] 3077
|
3079 |
+
[ f][ull] -> [ full] 3078
|
3080 |
+
[ wor][king] -> [ working] 3079
|
3081 |
+
[ soc][iety] -> [ society] 3080
|
3082 |
+
[rib][ut] -> [ribut] 3081
|
3083 |
+
[our][n] -> [ourn] 3082
|
3084 |
+
[ct][or] -> [ctor] 3083
|
3085 |
+
[ov][ed] -> [oved] 3084
|
3086 |
+
[ ನ][ಗ] -> [ ನಗ] 3085
|
3087 |
+
[ಲ್ಲ][ಿನ] -> [ಲ್ಲಿನ] 3086
|
3088 |
+
[ ಆ][ಟ] -> [ ಆಟ] 3087
|
3089 |
+
[ೀ][ಸ] -> [ೀಸ] 3088
|
3090 |
+
[ prot][ect] -> [ protect] 3089
|
3091 |
+
[et][imes] -> [etimes] 3090
|
3092 |
+
[ು][ಗಳನ್ನು] -> [ುಗಳನ್ನು] 3091
|
3093 |
+
[ प][ू] -> [ पू] 3092
|
3094 |
+
[ स][ेव] -> [ सेव] 3093
|
3095 |
+
[ M][ar] -> [ Mar] 3094
|
3096 |
+
[ा�][�] -> [ाट] 3095
|
3097 |
+
[ पर][िव] -> [ परिव] 3096
|
3098 |
+
[ st][ep] -> [ step] 3097
|
3099 |
+
[m][it] -> [mit] 3098
|
3100 |
+
[ro][wn] -> [rown] 3099
|
3101 |
+
[ us][ually] -> [ usually] 3100
|
3102 |
+
[rough][t] -> [rought] 3101
|
3103 |
+
[ on][ce] -> [ once] 3102
|
3104 |
+
[ob][al] -> [obal] 3103
|
3105 |
+
[ ಹೆಚ್ಚ][ು] -> [ ಹೆಚ್ಚು] 3104
|
3106 |
+
[ರ][್ನ] -> [ರ್ನ] 3105
|
3107 |
+
[ ಪ][ಡೆದ] -> [ ಪಡೆದ] 3106
|
3108 |
+
[ವರೆ][ಗೆ] -> [ವರೆಗೆ] 3107
|
3109 |
+
[ activ][ities] -> [ activities] 3108
|
3110 |
+
[ m][ind] -> [ mind] 3109
|
3111 |
+
[ಬ][ೇಕ] -> [ಬೇಕ] 3110
|
3112 |
+
[ 2][4] -> [ 24] 3111
|
3113 |
+
[ comp][any] -> [ company] 3112
|
3114 |
+
[ाल][ी] -> [ाली] 3113
|
3115 |
+
[ म][ुख] -> [ मुख] 3114
|
3116 |
+
[ाद][ी] -> [ादी] 3115
|
3117 |
+
[ स][र] -> [ सर] 3116
|
3118 |
+
[ J][apan] -> [ Japan] 3117
|
3119 |
+
[ur][ther] -> [urther] 3118
|
3120 |
+
[l][in] -> [lin] 3119
|
3121 |
+
[ab][or] -> [abor] 3120
|
3122 |
+
[ ನ][್ಯೂ] -> [ ನ್ಯೂ] 3121
|
3123 |
+
[ू][न] -> [ून] 3122
|
3124 |
+
[ techn][ology] -> [ technology] 3123
|
3125 |
+
[ s][at] -> [ sat] 3124
|
3126 |
+
[ಿ�][�] -> [ಿಚ] 3125
|
3127 |
+
[ु�][�] -> [ुब] 3126
|
3128 |
+
[el][f] -> [elf] 3127
|
3129 |
+
[क][्ट] -> [क्ट] 3128
|
3130 |
+
[ m][ot] -> [ mot] 3129
|
3131 |
+
[ व्य][क्त] -> [ व्यक्त] 3130
|
3132 |
+
[ur][ch] -> [urch] 3131
|
3133 |
+
[ M][AD] -> [ MAD] 3132
|
3134 |
+
[ MAD][S] -> [ MADS] 3133
|
3135 |
+
[ count][ries] -> [ countries] 3134
|
3136 |
+
[I][S] -> [IS] 3135
|
3137 |
+
[ g][re] -> [ gre] 3136
|
3138 |
+
[ ob][s] -> [ obs] 3137
|
3139 |
+
[ab][les] -> [ables] 3138
|
3140 |
+
[ಿ�][�] -> [ಿಬ] 3139
|
3141 |
+
[ಿನ][್ನ] -> [ಿನ್ನ] 3140
|
3142 |
+
[ �][�] -> [ ಓ] 3141
|
3143 |
+
[ te][ac] -> [ teac] 3142
|
3144 |
+
[ de][pend] -> [ depend] 3143
|
3145 |
+
[ rese][arc] -> [ researc] 3144
|
3146 |
+
[ con][c] -> [ conc] 3145
|
3147 |
+
[್ರ][ಾಜ] -> [್ರಾಜ] 3146
|
3148 |
+
[ 19][0] -> [ 190] 3147
|
3149 |
+
[ fl][ag] -> [ flag] 3148
|
3150 |
+
[ h][our] -> [ hour] 3149
|
3151 |
+
[ con][d] -> [ cond] 3150
|
3152 |
+
[u][le] -> [ule] 3151
|
3153 |
+
[ them][selves] -> [ themselves] 3152
|
3154 |
+
[ l][at] -> [ lat] 3153
|
3155 |
+
[ dise][ase] -> [ disease] 3154
|
3156 |
+
[ जिस][े] -> [ जिसे] 3155
|
3157 |
+
[ ವ್ಯ][ವ] -> [ ವ್ಯವ] 3156
|
3158 |
+
[ च][ै] -> [ चै] 3157
|
3159 |
+
[व][ल] -> [वल] 3158
|
3160 |
+
[ ब][ड़] -> [ बड़] 3159
|
3161 |
+
[ive][ly] -> [ively] 3160
|
3162 |
+
[ರ][ಿಂದ] -> [ರಿಂದ] 3161
|
3163 |
+
[ फ][िर] -> [ फिर] 3162
|
3164 |
+
[ sh][ip] -> [ ship] 3163
|
3165 |
+
[ के][ंद्र] -> [ केंद्र] 3164
|
3166 |
+
[ sym][pt] -> [ sympt] 3165
|
3167 |
+
[ co][ol] -> [ cool] 3166
|
3168 |
+
[al][f] -> [alf] 3167
|
3169 |
+
[र्�][�] -> [र्न] 3168
|
3170 |
+
[ क][ाम] -> [ काम] 3169
|
3171 |
+
[ 200][6] -> [ 2006] 3170
|
3172 |
+
[g][ers] -> [gers] 3171
|
3173 |
+
[ him][self] -> [ himself] 3172
|
3174 |
+
[od][e] -> [ode] 3173
|
3175 |
+
[ tem][per] -> [ temper] 3174
|
3176 |
+
[emm][ick] -> [emmick] 3175
|
3177 |
+
[ir][st] -> [irst] 3176
|
3178 |
+
[pl][es] -> [ples] 3177
|
3179 |
+
[ disc][uss] -> [ discuss] 3178
|
3180 |
+
[ cont][ain] -> [ contain] 3179
|
3181 |
+
[ೋ][ಮ] -> [ೋಮ] 3180
|
3182 |
+
[ comple][te] -> [ complete] 3181
|
3183 |
+
[ri][e] -> [rie] 3182
|
3184 |
+
[o][le] -> [ole] 3183
|
3185 |
+
[ ac][ross] -> [ across] 3184
|
3186 |
+
[ com][b] -> [ comb] 3185
|
3187 |
+
[it][or] -> [itor] 3186
|
3188 |
+
[ es][pecially] -> [ especially] 3187
|
3189 |
+
[ commun][ity] -> [ community] 3188
|
3190 |
+
[ಂದ][್ರ] -> [ಂದ್ರ] 3189
|
3191 |
+
[ ಅವರ][ನ್ನು] -> [ ಅವರನ್ನು] 3190
|
3192 |
+
[ಬ್ಲ][್ಯೂ] -> [ಬ್ಲ್ಯೂ] 3191
|
3193 |
+
[ 200][5] -> [ 2005] 3192
|
3194 |
+
[ im][age] -> [ image] 3193
|
3195 |
+
[ द][क्षिण] -> [ दक्षिण] 3194
|
3196 |
+
[ ][z] -> [ z] 3195
|
3197 |
+
[ ಪ್ರಮ][ುಖ] -> [ ಪ್ರಮುಖ] 3196
|
3198 |
+
[ृ][ंख] -> [ृंख] 3197
|
3199 |
+
[ा�][�] -> [ाख] 3198
|
3200 |
+
[ ग][ुल] -> [ गुल] 3199
|
3201 |
+
[ 201][0] -> [ 2010] 3200
|
3202 |
+
[ e][ither] -> [ either] 3201
|
3203 |
+
[ be][gan] -> [ began] 3202
|
3204 |
+
[ N][ational] -> [ National] 3203
|
3205 |
+
[्�][�ा] -> [्या] 3204
|
3206 |
+
[ cle][ar] -> [ clear] 3205
|
3207 |
+
[ E][st] -> [ Est] 3206
|
3208 |
+
[ m][ult] -> [ mult] 3207
|
3209 |
+
[ot][e] -> [ote] 3208
|
3210 |
+
[ys][is] -> [ysis] 3209
|
3211 |
+
[ फिल][्म] -> [ फिल्म] 3210
|
3212 |
+
[ आ][ध] -> [ आध] 3211
|
3213 |
+
[am][ed] -> [amed] 3212
|
3214 |
+
[av][es] -> [aves] 3213
|
3215 |
+
[ो][च] -> [ोच] 3214
|
3216 |
+
[ॉ][स] -> [ॉस] 3215
|
3217 |
+
[al][m] -> [alm] 3216
|
3218 |
+
[ d][ark] -> [ dark] 3217
|
3219 |
+
[ t][akes] -> [ takes] 3218
|
3220 |
+
[ ಸ್ಥ][ಾಪ] -> [ ಸ್ಥಾಪ] 3219
|
3221 |
+
[b][y] -> [by] 3220
|
3222 |
+
[ grou][ps] -> [ groups] 3221
|
3223 |
+
[ pro][ced] -> [ proced] 3222
|
3224 |
+
[ ಪ್ರ][ಸ] -> [ ಪ್ರಸ] 3223
|
3225 |
+
[ುತ್ತ][ವೆ] -> [ುತ್ತವೆ] 3224
|
3226 |
+
[ದ][ಲು] -> [ದಲು] 3225
|
3227 |
+
[ 201][3] -> [ 2013] 3226
|
3228 |
+
[ 201][1] -> [ 2011] 3227
|
3229 |
+
[ ಮಾರ][್ಗ] -> [ ಮಾರ್ಗ] 3228
|
3230 |
+
[ದ][್ದ] -> [ದ್ದ] 3229
|
3231 |
+
[ು][ಂಬ] -> [ುಂಬ] 3230
|
3232 |
+
[ ಬ][ಿಡ] -> [ ಬಿಡ] 3231
|
3233 |
+
[ beh][av] -> [ behav] 3232
|
3234 |
+
[ೆ][ನ್] -> [ೆನ್] 3233
|
3235 |
+
[ re][d] -> [ red] 3234
|
3236 |
+
[ श्र][ृंख] -> [ श्रृंख] 3235
|
3237 |
+
[al][king] -> [alking] 3236
|
3238 |
+
[ ಸಾಮ][್ರಾಜ] -> [ ಸಾಮ್ರಾಜ] 3237
|
3239 |
+
[ संग][ीत] -> [ संगीत] 3238
|
3240 |
+
[ar][l] -> [arl] 3239
|
3241 |
+
[ ಬಲ್ಗೇರ][ಿಯನ್] -> [ ಬಲ್ಗೇರಿಯನ್] 3240
|
3242 |
+
[ W][emmick] -> [ Wemmick] 3241
|
3243 |
+
[s][p] -> [sp] 3242
|
3244 |
+
[ rel][ated] -> [ related] 3243
|
3245 |
+
[ as][k] -> [ ask] 3244
|
3246 |
+
[ su][m] -> [ sum] 3245
|
3247 |
+
[ p][ath] -> [ path] 3246
|
3248 |
+
[r][ing] -> [ring] 3247
|
3249 |
+
[्य][ों] -> [्यों] 3248
|
3250 |
+
[ ಹ][ಲವ] -> [ ಹಲವ] 3249
|
3251 |
+
[ ಹಲವ][ಾರು] -> [ ಹಲವಾರು] 3250
|
3252 |
+
[ ದ][ಕ್ಷ] -> [ ದಕ್ಷ] 3251
|
3253 |
+
[ ಪ][ಿ] -> [ ಪಿ] 3252
|
3254 |
+
[it][ing] -> [iting] 3253
|
3255 |
+
[in][king] -> [inking] 3254
|
3256 |
+
[ं�][�] -> [ंच] 3255
|
requirements.txt
ADDED
@@ -0,0 +1,3 @@
|
|
|
|
|
|
|
|
|
1 |
+
gradio
|
2 |
+
tiktoken
|
3 |
+
regex
|
tokenizer.py
ADDED
@@ -0,0 +1,91 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
1 |
+
import regex as re
|
2 |
+
|
3 |
+
def get_stats(ids, freq):
|
4 |
+
for pair in zip(ids[:-1], ids[1:]):
|
5 |
+
freq[pair] = freq.get(pair, 0) + 1
|
6 |
+
|
7 |
+
def merge(ids, pair, idx):
|
8 |
+
newids = []
|
9 |
+
i = 0
|
10 |
+
while i < len(ids):
|
11 |
+
if i < len(ids) - 1 and ids[i] == pair[0] and ids[i+1] == pair[1]:
|
12 |
+
newids.append(idx)
|
13 |
+
i += 2
|
14 |
+
else:
|
15 |
+
newids.append(ids[i])
|
16 |
+
i += 1
|
17 |
+
return newids
|
18 |
+
|
19 |
+
def _build_vocab(merges):
|
20 |
+
vocab = {idx: bytes([idx]) for idx in range(256)}
|
21 |
+
for (p0, p1), idx in merges.items():
|
22 |
+
vocab[idx] = vocab[p0] + vocab[p1]
|
23 |
+
return vocab
|
24 |
+
|
25 |
+
class CustomTokenizer:
|
26 |
+
def __init__(self, model_file):
|
27 |
+
self.merges, self.special_tokens, self.vocab = self.load(model_file)
|
28 |
+
self.pattern = r"""(?i) 's|'t|'re|'ve|'m|'ll|'d| ?\b[\p{L}\u0900-\u0963|\u0966-\u097F]+\b| ?\b[\p{L}\u0C80-\u0C9E|\u0CA0-\u0CFF]+\b| ?[\p{N}]+| ?[.,!?;:'"-]| ?[\u0964-\u0965]| ?[\u0C9E-\u0C9F]| ?[^\s\p{L}\p{N}\u0900-\u097F\u0C80-\u0CFF]+| \s+(?!\S)| \s+"""
|
29 |
+
self.regex = re.compile(self.pattern)
|
30 |
+
|
31 |
+
def load(self, model_file):
|
32 |
+
merges = {}
|
33 |
+
special_tokens = {}
|
34 |
+
idx = 256
|
35 |
+
with open(model_file, 'r', encoding="utf-8") as f:
|
36 |
+
version = f.readline().strip()
|
37 |
+
assert version == "minbpe v1"
|
38 |
+
pattern = f.readline().strip()
|
39 |
+
num_special = int(f.readline().strip())
|
40 |
+
for _ in range(num_special):
|
41 |
+
special, special_idx = f.readline().strip().split()
|
42 |
+
special_tokens[special] = int(special_idx)
|
43 |
+
for line in f:
|
44 |
+
idx1, idx2 = map(int, line.split())
|
45 |
+
merges[(idx1, idx2)] = idx
|
46 |
+
idx += 1
|
47 |
+
vocab = _build_vocab(merges)
|
48 |
+
return merges, special_tokens, vocab
|
49 |
+
|
50 |
+
def _encode_chunk(self, chunk_bytes: bytes) -> list[int]:
|
51 |
+
tokens = list(chunk_bytes)
|
52 |
+
while len(tokens) >= 2:
|
53 |
+
stats = {}
|
54 |
+
get_stats(tokens, stats)
|
55 |
+
pair = min(stats, key=lambda p: self.merges.get(p, float("inf")))
|
56 |
+
if pair not in self.merges:
|
57 |
+
break
|
58 |
+
idx = self.merges[pair]
|
59 |
+
tokens = merge(tokens, pair, idx)
|
60 |
+
return tokens
|
61 |
+
|
62 |
+
def encode(self, text: str, allowed_special="none") -> list[int]:
|
63 |
+
special = {}
|
64 |
+
if allowed_special == "all":
|
65 |
+
special = self.special_tokens
|
66 |
+
elif allowed_special == "none":
|
67 |
+
special = {}
|
68 |
+
elif isinstance(allowed_special, set):
|
69 |
+
special = {k: v for k, v in self.special_tokens.items() if k in allowed_special}
|
70 |
+
|
71 |
+
if not special:
|
72 |
+
chunk_texts = re.findall(self.regex, text)
|
73 |
+
ids_list = []
|
74 |
+
for text in chunk_texts:
|
75 |
+
chunk_bytes = text.encode("utf-8")
|
76 |
+
ids = self._encode_chunk(chunk_bytes)
|
77 |
+
ids_list.extend(ids)
|
78 |
+
return ids_list
|
79 |
+
|
80 |
+
special_pattern = "(" + "|".join(re.escape(token) for token in special) + ")"
|
81 |
+
parts = re.split(special_pattern, text)
|
82 |
+
ids = []
|
83 |
+
for part in parts:
|
84 |
+
if part in special:
|
85 |
+
ids.append(special[part])
|
86 |
+
else:
|
87 |
+
chunk_texts = re.findall(self.regex, part)
|
88 |
+
for text in chunk_texts:
|
89 |
+
chunk_bytes = text.encode("utf-8")
|
90 |
+
ids.extend(self._encode_chunk(chunk_bytes))
|
91 |
+
return ids
|