Spaces:
Sleeping
Sleeping
clementsan
commited on
Commit
·
aa98840
1
Parent(s):
4ce7fc5
Add ASCII transliteration of unicode text
Browse files
app.py
CHANGED
|
@@ -13,6 +13,7 @@ from langchain_community.llms import HuggingFaceEndpoint
|
|
| 13 |
|
| 14 |
from pathlib import Path
|
| 15 |
import chromadb
|
|
|
|
| 16 |
|
| 17 |
from transformers import AutoTokenizer
|
| 18 |
import transformers
|
|
@@ -188,6 +189,8 @@ def initialize_database(list_file_obj, chunk_size, chunk_overlap, progress=gr.Pr
|
|
| 188 |
# Fix potential issues from naming convention
|
| 189 |
## Remove space
|
| 190 |
collection_name = collection_name.replace(" ","-")
|
|
|
|
|
|
|
| 191 |
## Limit lenght to 50 characters
|
| 192 |
collection_name = collection_name[:50]
|
| 193 |
## Enforce start and end as alphanumeric character
|
|
|
|
| 13 |
|
| 14 |
from pathlib import Path
|
| 15 |
import chromadb
|
| 16 |
+
from unidecode import unidecode
|
| 17 |
|
| 18 |
from transformers import AutoTokenizer
|
| 19 |
import transformers
|
|
|
|
| 189 |
# Fix potential issues from naming convention
|
| 190 |
## Remove space
|
| 191 |
collection_name = collection_name.replace(" ","-")
|
| 192 |
+
## ASCII transliterations of Unicode text
|
| 193 |
+
collection_name = unidecode(collection_name)
|
| 194 |
## Limit lenght to 50 characters
|
| 195 |
collection_name = collection_name[:50]
|
| 196 |
## Enforce start and end as alphanumeric character
|