dingo / app.py
DataEval's picture
Update app.py
01f1fbb verified
raw
history blame
3.31 kB
import json
import gradio as gr
from dingo.exec import Executor
from dingo.io import InputArgs
def dingo_demo(input_path, data_format, column_content, rule_list, prompt_list, model, key, api_url):
if not input_path:
return 'ValueError: input_path can not be empty, please input.'
if not data_format:
return 'ValueError: data_format can not be empty, please input.'
if not column_content:
return 'ValueError: column_content can not be empty, please input.'
if not rule_list and not prompt_list:
return 'ValueError: rule_list and prompt_list can not be empty at the same time.'
input_data = {
"input_path": input_path,
"data_format": data_format,
"column_content": column_content,
"custom_config":
{
"rule_list": rule_list,
"prompt_list": prompt_list,
"llm_config":
{
"detect_text_quality_detail":
{
"model": model,
"key": key,
"api_url": api_url,
}
}
}
}
input_args = InputArgs(**input_data)
executor = Executor.exec_map["local"](input_args)
result = executor.execute()
summary = result[0].to_dict()
return json.dumps(summary, indent=4)
if __name__ == '__main__':
rule_options = ['RuleAbnormalChar', 'RuleAbnormalHtml', 'RuleContentNull', 'RuleContentShort', 'RuleEnterAndSpace', 'RuleOnlyUrl']
prompt_options = ['PromptRepeat', 'PromptContentChaos']
with open("header.html", "r") as file:
header = file.read()
with gr.Blocks() as demo:
gr.HTML(header)
with gr.Row():
with gr.Column():
input_path = gr.Textbox(value='chupei/format-jsonl', placeholder="please input huggingface dataset path", label="input_path")
data_format = gr.Dropdown(["jsonl", "json", "plaintext", "listjson"], label="data_format")
column_content = gr.Textbox(value="content", placeholder="please input column name of content in dataset", label="column_content")
rule_list = gr.CheckboxGroup(choices=rule_options, label="rule_list")
prompt_list = gr.CheckboxGroup(choices=prompt_options, label="prompt_list")
model = gr.Textbox(placeholder="If want to use llm, please input model, such as: deepseek-chat", label="model")
key = gr.Textbox(placeholder="If want to use llm, please input key, such as: 123456789012345678901234567890xx", label="key")
api_url = gr.Textbox(placeholder="If want to use llm, please input api_url, such as: https://api.deepseek.com/v1", label="api_url")
with gr.Row():
submit_single = gr.Button(value="Submit", interactive=True, variant="primary")
with gr.Column():
# 输出组件
output = gr.Textbox(label="output")
submit_single.click(
fn=dingo_demo,
inputs=[input_path, data_format, column_content, rule_list, prompt_list, model, key, api_url],
outputs=output
)
# 启动界面
demo.launch()