Spaces:
Running
Running
Commit
·
3036e92
1
Parent(s):
29dc4f2
fixes
Browse files- .gitignore +3 -0
- app.py +8 -8
- pages/processor.py +20 -16
.gitignore
CHANGED
|
@@ -127,3 +127,6 @@ dmypy.json
|
|
| 127 |
|
| 128 |
# Pyre type checker
|
| 129 |
.pyre/
|
|
|
|
|
|
|
|
|
|
|
|
| 127 |
|
| 128 |
# Pyre type checker
|
| 129 |
.pyre/
|
| 130 |
+
|
| 131 |
+
|
| 132 |
+
.vscode/
|
app.py
CHANGED
|
@@ -11,14 +11,14 @@ st.set_page_config(
|
|
| 11 |
PAGES = {"Home": pages.home, "Arabic Text Preprocessor": pages.processor}
|
| 12 |
|
| 13 |
|
| 14 |
-
|
| 15 |
-
|
| 16 |
-
st.sidebar.title("Navigation")
|
| 17 |
-
selection = st.sidebar.radio("Pages", list(PAGES.keys()))
|
| 18 |
|
| 19 |
-
|
|
|
|
| 20 |
ast.shared.components.write_page(page)
|
| 21 |
|
| 22 |
-
|
| 23 |
-
|
| 24 |
-
|
|
|
|
|
|
| 11 |
PAGES = {"Home": pages.home, "Arabic Text Preprocessor": pages.processor}
|
| 12 |
|
| 13 |
|
| 14 |
+
st.sidebar.title("Navigation")
|
| 15 |
+
selection = st.sidebar.radio("Pages", list(PAGES.keys()))
|
|
|
|
|
|
|
| 16 |
|
| 17 |
+
page = PAGES[selection]
|
| 18 |
+
with st.spinner(f"Loading {selection} ..."):
|
| 19 |
ast.shared.components.write_page(page)
|
| 20 |
|
| 21 |
+
st.sidebar.header("Info")
|
| 22 |
+
st.sidebar.write("Made by [Wissam Antoun](https://twitter.com/wissam_antoun)")
|
| 23 |
+
st.sidebar.write("[Models Repo](https://github.com/aub-mind/arabert)")
|
| 24 |
+
st.sidebar.write("Source Code [GitHub](https://github.com/WissamAntoun/Arabic-NLP-app)")
|
pages/processor.py
CHANGED
|
@@ -101,7 +101,7 @@ def _desegmentword(orig_word: str) -> str:
|
|
| 101 |
|
| 102 |
|
| 103 |
def write():
|
| 104 |
-
col1, _ = st.columns(
|
| 105 |
|
| 106 |
with col1:
|
| 107 |
col1.title("Arabic Text Pre-Processor")
|
|
@@ -120,26 +120,30 @@ def write():
|
|
| 120 |
value="ولن نبالغ إذا قلنا: إن 'هاتف' أو 'كمبيوتر المكتب' في زمننا هذا ضروري",
|
| 121 |
)
|
| 122 |
|
| 123 |
-
|
| 124 |
-
model_selector =
|
| 125 |
-
|
| 126 |
-
|
|
|
|
| 127 |
if model_selector == "None":
|
| 128 |
-
|
| 129 |
-
|
| 130 |
-
|
| 131 |
-
|
| 132 |
-
|
| 133 |
-
|
| 134 |
-
|
| 135 |
-
|
|
|
|
| 136 |
"Remove non-digit repetition", True
|
| 137 |
)
|
| 138 |
-
replace_slash_with_dash =
|
| 139 |
-
map_hindi_numbers_to_arabic =
|
| 140 |
"Map hindi numbers to arabic", None
|
| 141 |
)
|
| 142 |
-
apply_farasa_segmentation =
|
|
|
|
|
|
|
| 143 |
|
| 144 |
run_preprocessor = st.button("Run Pre-Processor")
|
| 145 |
|
|
|
|
| 101 |
|
| 102 |
|
| 103 |
def write():
|
| 104 |
+
_, col1, _ = st.columns(3)
|
| 105 |
|
| 106 |
with col1:
|
| 107 |
col1.title("Arabic Text Pre-Processor")
|
|
|
|
| 120 |
value="ولن نبالغ إذا قلنا: إن 'هاتف' أو 'كمبيوتر المكتب' في زمننا هذا ضروري",
|
| 121 |
)
|
| 122 |
|
| 123 |
+
st.sidebar.title("Model Selector")
|
| 124 |
+
model_selector = st.sidebar.selectbox(
|
| 125 |
+
"""Select None to enable further filters""",
|
| 126 |
+
options=MODELS_to_SELECT,
|
| 127 |
+
)
|
| 128 |
if model_selector == "None":
|
| 129 |
+
keep_emojis = st.sidebar.checkbox("Keep emojis", False)
|
| 130 |
+
remove_html_markup = st.sidebar.checkbox("Remove html markup", True)
|
| 131 |
+
strip_tashkeel = st.sidebar.checkbox("Strip tashkeel", True)
|
| 132 |
+
replace_urls_emails_mentions = st.sidebar.checkbox(
|
| 133 |
+
"Replace urls and emails", True
|
| 134 |
+
)
|
| 135 |
+
strip_tatweel = st.sidebar.checkbox("Strip tatweel", True)
|
| 136 |
+
insert_white_spaces = st.sidebar.checkbox("Insert white spaces", True)
|
| 137 |
+
remove_non_digit_repetition = st.sidebar.checkbox(
|
| 138 |
"Remove non-digit repetition", True
|
| 139 |
)
|
| 140 |
+
replace_slash_with_dash = st.sidebar.checkbox("Replace slash with dash", None)
|
| 141 |
+
map_hindi_numbers_to_arabic = st.sidebar.checkbox(
|
| 142 |
"Map hindi numbers to arabic", None
|
| 143 |
)
|
| 144 |
+
apply_farasa_segmentation = st.sidebar.checkbox(
|
| 145 |
+
"Apply farasa segmentation", None
|
| 146 |
+
)
|
| 147 |
|
| 148 |
run_preprocessor = st.button("Run Pre-Processor")
|
| 149 |
|