XLXW commited on
Commit
eb40ae9
·
verified ·
1 Parent(s): d91e25b

Initialize code

Browse files
Files changed (5) hide show
  1. .gitattributes +1 -0
  2. Dockerfile +49 -0
  3. app.py +105 -0
  4. data/learner_examplar_1.1.json +3 -0
  5. requirements.txt +4 -0
.gitattributes CHANGED
@@ -33,3 +33,4 @@ saved_model/**/* filter=lfs diff=lfs merge=lfs -text
33
  *.zip filter=lfs diff=lfs merge=lfs -text
34
  *.zst filter=lfs diff=lfs merge=lfs -text
35
  *tfevents* filter=lfs diff=lfs merge=lfs -text
 
 
33
  *.zip filter=lfs diff=lfs merge=lfs -text
34
  *.zst filter=lfs diff=lfs merge=lfs -text
35
  *tfevents* filter=lfs diff=lfs merge=lfs -text
36
+ data/learner_examplar_1.1.json filter=lfs diff=lfs merge=lfs -text
Dockerfile ADDED
@@ -0,0 +1,49 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ FROM python:3.10
2
+
3
+ # 安装系统依赖
4
+ RUN apt-get update && apt-get install -y \
5
+ git \
6
+ cmake \
7
+ build-essential \
8
+ zlib1g-dev \
9
+ libaio-dev \
10
+ pkg-config \
11
+ && rm -rf /var/lib/apt/lists/*
12
+
13
+ RUN pip install -U --no-cache-dir \
14
+ cmake==4.0.3 \
15
+ pybind11==2.13.6 \
16
+ spacy==3.5.0 \
17
+ torch==1.13.1
18
+
19
+ # 复制依赖文件
20
+ COPY requirements.txt .
21
+
22
+ # 安装 Python 依赖
23
+ RUN pip install -r requirements.txt
24
+
25
+ RUN pip install -U --no-cache-dir \
26
+ numpy==1.24.1
27
+
28
+ # 下载 spaCy 模型
29
+ RUN python -m spacy download en_core_web_sm
30
+
31
+ # 安装 ffrecord 库
32
+ RUN pip install git+https://github.com/HFAiLab/ffrecord.git
33
+
34
+ # 设置工作目录
35
+ WORKDIR /app
36
+
37
+ # 复制应用文件
38
+ COPY . .
39
+
40
+ # 复制应用代码
41
+ COPY . .
42
+
43
+ ENV PYTHONPATH=/app
44
+ ENV GRADIO_SERVER_NAME=0.0.0.0
45
+ ENV GRADIO_SERVER_PORT=7860
46
+
47
+ EXPOSE 7860
48
+
49
+ CMD ["python", "app.py"]
app.py ADDED
@@ -0,0 +1,105 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ import gradio as gr
2
+ import pandas as pd
3
+ import numpy as np
4
+ import tempfile
5
+ import random
6
+ import os
7
+ import json
8
+ from pathlib import Path
9
+
10
+ from cxglearner.parser import Parser
11
+ from cxglearner.config import DefaultConfigs, Config
12
+ from cxglearner.utils import init_logger
13
+ from cxglearner.utils.utils_cxs import convert_slots_to_str
14
+
15
+ temp_dir = tempfile.gettempdir()
16
+ log_dir = Path(temp_dir) / "logs"
17
+ log_dir.mkdir(exist_ok=True)
18
+ cahce_dir = Path(temp_dir) / "cache"
19
+
20
+ config = Config(DefaultConfigs.eng)
21
+ config.experiment.log_path = log_dir / "eng.log"
22
+ logger = init_logger(config)
23
+ parser = Parser(config=config, version="1.1", logger=logger, cache_dir=cahce_dir)
24
+ examples = [["she should be more polite with the customers."]]
25
+ MAX_EXAMPLAR = 10
26
+
27
+ with open("data/learner_examplar_1.1.json", "r", encoding="utf-8") as fp:
28
+ examplars = json.load(fp)
29
+
30
+ logger.debug(len(examplars))
31
+
32
+ def fill_input_box(example):
33
+ return example[0]
34
+
35
+
36
+ def parse_text(text):
37
+ if not text: return gr.Dataframe(), gr.update(choices=[], value=None), gr.Dataframe()
38
+ encoded_elements = parser.encoder.encode(text, raw=True)
39
+ tokens, upos, xpos = np.array(encoded_elements["lexical"]), np.array(encoded_elements["upos"]["spaCy"]), np.array(
40
+ encoded_elements["xpos"]["spaCy"])
41
+ encoded_elements = np.vstack((tokens, upos, xpos))
42
+ radio_parsed = parser.parse(text)
43
+ radio_parsed = ["{} | {} | {}-{}".format(cxs[0],
44
+ convert_slots_to_str(parser.cxs_decoder[cxs[0]], parser.encoder, logger), cxs[1] + 1, cxs[2])
45
+ for cxs in radio_parsed[0]]
46
+ radio_display = gr.Radio(
47
+ label="Constructions", choices=radio_parsed, interactive=True, value=radio_parsed[0]
48
+ )
49
+ if len(radio_parsed) == 0:
50
+ cons_df = pd.DataFrame()
51
+ else:
52
+ cxs = radio_parsed[0]
53
+ index, cxs, ranges = cxs.split("|")
54
+ cxs = cxs.strip()
55
+ if cxs in examplars:
56
+ exams = random.choices(examplars[cxs], k=min(MAX_EXAMPLAR, len(examplars[cxs])))
57
+ cons_df = pd.DataFrame(exams, columns=[cxs])
58
+ else:
59
+ cons_df = pd.DataFrame()
60
+ return encoded_elements, radio_display, cons_df
61
+
62
+
63
+ def refresh_examplar(option: str):
64
+ print(option)
65
+ index, cxs, ranges = option.split("|")
66
+ index = eval(index)
67
+ cxs = cxs.strip()
68
+ if cxs in examplars:
69
+ exams = random.choices(examplars[cxs], k=min(MAX_EXAMPLAR, len(examplars[cxs])))
70
+ return pd.DataFrame(exams, columns=[cxs])
71
+ return pd.DataFrame()
72
+
73
+
74
+ def clear_text():
75
+ return "", pd.DataFrame(), gr.Radio(label="Constructions", choices=[])
76
+
77
+
78
+ with gr.Blocks() as demo:
79
+ with gr.Column():
80
+ gr.Markdown("## CxGLearner Parser")
81
+ with gr.Row():
82
+ input_text = gr.Textbox(label="Input Text", placeholder="Enter a sentence here...")
83
+
84
+ with gr.Row():
85
+ dataset = gr.Dataset(components=[input_text],
86
+ samples=examples,
87
+ label="Click an example")
88
+ clear_buttton = gr.Button("Clear")
89
+ parser_button = gr.Button("Parse")
90
+
91
+ with gr.Column():
92
+ gr.Markdown("### Results of Encoding and Parsing")
93
+ enc_display = gr.Dataframe()
94
+ cxs_display = gr.Radio(label="Constructions", choices=[])
95
+
96
+ with gr.Column():
97
+ gr.Markdown("### Examplars")
98
+ cons_display = gr.Dataframe()
99
+
100
+ parser_button.click(fn=parse_text, inputs=[input_text], outputs=[enc_display, cxs_display, cons_display])
101
+ clear_buttton.click(fn=clear_text, inputs=[], outputs=[input_text, enc_display, cxs_display])
102
+ dataset.click(fn=fill_input_box, inputs=dataset, outputs=input_text)
103
+ cxs_display.select(refresh_examplar, inputs=[cxs_display], outputs=cons_display)
104
+
105
+ demo.launch()
data/learner_examplar_1.1.json ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:d7e7c22b12c2da2ee5d50067c285448e951189372b5b25724e58321691592463
3
+ size 21753927
requirements.txt ADDED
@@ -0,0 +1,4 @@
 
 
 
 
 
1
+ unidecode
2
+ beautifulsoup4
3
+ cxglearner==1.3.1
4
+ gradio