Spaces:
Running
Running
import gradio as gr | |
from hazm import Normalizer, word_tokenize, Lemmatizer, Chunker | |
# Initialize Hazm components | |
normalizer = Normalizer() | |
lemmatizer = Lemmatizer() | |
chunker = Chunker(model='resources/chunker.model') | |
def process_text(text, operations): | |
result = {} | |
if 'normalize' in operations: | |
text = normalizer.normalize(text) | |
result['Normalized Text'] = text | |
if 'tokenize' in operations: | |
tokens = word_tokenize(text) | |
result['Tokens'] = tokens | |
if 'lemmatize' in operations: | |
lemmas = [lemmatizer.lemmatize(token) for token in word_tokenize(text)] | |
result['Lemmas'] = lemmas | |
if 'chunk' in operations: | |
pos_tags = word_tokenize(text) | |
chunks = chunker.parse(pos_tags) | |
result['Chunks'] = str(chunks) | |
return result | |
# Define Gradio interface | |
operations = ['normalize', 'tokenize', 'lemmatize', 'chunk'] | |
iface = gr.Interface( | |
fn=process_text, | |
inputs=[ | |
gr.inputs.Textbox(lines=10, label="Input Text"), | |
gr.inputs.CheckboxGroup(operations, label="Operations") | |
], | |
outputs="json", | |
title="Persian Text Processor with Hazm", | |
description="Select operations to perform on the input text using Hazm." | |
) | |
if __name__ == "__main__": | |
iface.launch() | |