Spaces:
Sleeping
Sleeping
Upload with huggingface_hub
Browse files- app.py +62 -0
- predict.py +47 -0
- requirements.txt +2 -0
app.py
ADDED
@@ -0,0 +1,62 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
1 |
+
import gradio as gr
|
2 |
+
import os
|
3 |
+
from predict import *
|
4 |
+
|
5 |
+
from transformers import T5ForConditionalGeneration
|
6 |
+
from transformers import T5TokenizerFast as T5Tokenizer
|
7 |
+
import pandas as pd
|
8 |
+
model = "svjack/comet-atomic-en"
|
9 |
+
device = "cpu"
|
10 |
+
#device = "cuda:0"
|
11 |
+
tokenizer = T5Tokenizer.from_pretrained(model)
|
12 |
+
model = T5ForConditionalGeneration.from_pretrained(model).to(device).eval()
|
13 |
+
|
14 |
+
NEED_PREFIX = 'What are the necessary preconditions for the next event?'
|
15 |
+
EFFECT_PREFIX = 'What could happen after the next event?'
|
16 |
+
INTENT_PREFIX = 'What is the motivation for the next event?'
|
17 |
+
REACT_PREFIX = 'What are your feelings after the following event?'
|
18 |
+
|
19 |
+
obj = Obj(model, tokenizer, device)
|
20 |
+
|
21 |
+
'''
|
22 |
+
text0 = "X吃到了一顿大餐。"
|
23 |
+
text1 = "X和Y一起搭了个积木。"
|
24 |
+
'''
|
25 |
+
text0 = "X had a big meal."
|
26 |
+
text1 = "X invites Y to a party."
|
27 |
+
|
28 |
+
example_sample = [
|
29 |
+
[text0, False],
|
30 |
+
[text1, False],
|
31 |
+
]
|
32 |
+
|
33 |
+
def demo_func(event, do_sample):
|
34 |
+
#event = "X吃到了一顿大餐。"
|
35 |
+
times = 1
|
36 |
+
df = pd.DataFrame(
|
37 |
+
pd.Series(
|
38 |
+
[NEED_PREFIX, EFFECT_PREFIX, INTENT_PREFIX, REACT_PREFIX]
|
39 |
+
).map(
|
40 |
+
lambda x: (x, [obj.predict(
|
41 |
+
"{}{}".format(x, event), do_sample = do_sample
|
42 |
+
)[0] for _ in range(times)][0])
|
43 |
+
).values.tolist()
|
44 |
+
)
|
45 |
+
df.columns = ["PREFIX", "PRED"]
|
46 |
+
l = df.apply(lambda x: x.to_dict(), axis = 1).values.tolist()
|
47 |
+
return {
|
48 |
+
"Output": l
|
49 |
+
}
|
50 |
+
|
51 |
+
demo = gr.Interface(
|
52 |
+
fn=demo_func,
|
53 |
+
inputs=[gr.Text(label = "Event"),
|
54 |
+
gr.Checkbox(label="do sample"),
|
55 |
+
],
|
56 |
+
outputs="json",
|
57 |
+
title=f"English Comet Atomic 🦅 demonstration",
|
58 |
+
examples=example_sample if example_sample else None,
|
59 |
+
cache_examples = False
|
60 |
+
)
|
61 |
+
|
62 |
+
demo.launch(server_name=None, server_port=None)
|
predict.py
ADDED
@@ -0,0 +1,47 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
1 |
+
class Obj:
|
2 |
+
def __init__(self, model, tokenizer, device = "cpu"):
|
3 |
+
self.model = model
|
4 |
+
self.tokenizer = tokenizer
|
5 |
+
self.device = device
|
6 |
+
self.model = self.model.to(self.device)
|
7 |
+
|
8 |
+
def predict(
|
9 |
+
self,
|
10 |
+
source_text: str,
|
11 |
+
max_length: int = 512,
|
12 |
+
num_return_sequences: int = 1,
|
13 |
+
num_beams: int = 2,
|
14 |
+
top_k: int = 50,
|
15 |
+
top_p: float = 0.95,
|
16 |
+
do_sample: bool = True,
|
17 |
+
repetition_penalty: float = 2.5,
|
18 |
+
length_penalty: float = 1.0,
|
19 |
+
early_stopping: bool = True,
|
20 |
+
skip_special_tokens: bool = True,
|
21 |
+
clean_up_tokenization_spaces: bool = True,
|
22 |
+
):
|
23 |
+
input_ids = self.tokenizer.encode(
|
24 |
+
source_text, return_tensors="pt", add_special_tokens=True
|
25 |
+
)
|
26 |
+
input_ids = input_ids.to(self.device)
|
27 |
+
generated_ids = self.model.generate(
|
28 |
+
input_ids=input_ids,
|
29 |
+
num_beams=num_beams,
|
30 |
+
max_length=max_length,
|
31 |
+
repetition_penalty=repetition_penalty,
|
32 |
+
length_penalty=length_penalty,
|
33 |
+
early_stopping=early_stopping,
|
34 |
+
top_p=top_p,
|
35 |
+
top_k=top_k,
|
36 |
+
num_return_sequences=num_return_sequences,
|
37 |
+
do_sample = do_sample
|
38 |
+
)
|
39 |
+
preds = [
|
40 |
+
self.tokenizer.decode(
|
41 |
+
g,
|
42 |
+
skip_special_tokens=skip_special_tokens,
|
43 |
+
clean_up_tokenization_spaces=clean_up_tokenization_spaces,
|
44 |
+
)
|
45 |
+
for g in generated_ids
|
46 |
+
]
|
47 |
+
return preds
|
requirements.txt
ADDED
@@ -0,0 +1,2 @@
|
|
|
|
|
|
|
1 |
+
torch
|
2 |
+
transformers
|