asdc commited on
Commit
d86d51d
·
verified ·
1 Parent(s): 24bd711

Update app.py

Browse files
Files changed (1) hide show
  1. app.py +37 -25
app.py CHANGED
@@ -1,14 +1,31 @@
1
  import gradio as gr
2
- from huggingface_hub import InferenceClient
3
- import os
 
4
 
5
  """
6
  For more information on `huggingface_hub` Inference API support, please check the docs: https://huggingface.co/docs/huggingface_hub/v0.22.2/en/guides/inference
7
  """
8
 
9
- api_key = os.environ.get("ACESS_TOKEN")
10
- print(api_key)
11
- client = InferenceClient("asdc/Mistral-7B-multilingual-temporal-expression-normalization", token=api_key)
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
12
 
13
 
14
  def respond(
@@ -19,29 +36,24 @@ def respond(
19
  temperature,
20
  top_p,
21
  ):
22
- messages = [{"role": "system", "content": system_message}]
 
 
 
 
 
 
23
 
24
- for val in history:
25
- if val[0]:
26
- messages.append({"role": "user", "content": val[0]})
27
- if val[1]:
28
- messages.append({"role": "assistant", "content": val[1]})
29
-
30
- messages.append({"role": "user", "content": message})
31
-
32
- response = ""
33
-
34
- for message in client.chat_completion(
35
- messages,
36
- max_tokens=max_tokens,
37
- stream=True,
38
  temperature=temperature,
39
  top_p=top_p,
40
- ):
41
- token = message.choices[0].delta.content
42
-
43
- response += token
44
- yield response
45
 
46
 
47
  """
 
1
  import gradio as gr
2
+ from transformers import AutoModelForCausalLM, AutoTokenizer, pipeline
3
+ from peft import PeftModel
4
+ import torch
5
 
6
  """
7
  For more information on `huggingface_hub` Inference API support, please check the docs: https://huggingface.co/docs/huggingface_hub/v0.22.2/en/guides/inference
8
  """
9
 
10
+ # Set your model and adapter paths
11
+ BASE_MODEL = "mistralai/Mistral-7B-v0.1"
12
+ PEFT_ADAPTER = "asdc/Mistral-7B-multilingual-temporal-expression-normalization"
13
+
14
+ tokenizer = AutoTokenizer.from_pretrained(BASE_MODEL)
15
+ base_model = AutoModelForCausalLM.from_pretrained(
16
+ BASE_MODEL,
17
+ torch_dtype=torch.float16,
18
+ device_map="auto"
19
+ )
20
+
21
+ model = PeftModel.from_pretrained(base_model, PEFT_ADAPTER)
22
+
23
+ pipe = pipeline(
24
+ "text-generation",
25
+ model=model,
26
+ tokenizer=tokenizer,
27
+ device_map="auto"
28
+ )
29
 
30
 
31
  def respond(
 
36
  temperature,
37
  top_p,
38
  ):
39
+ prompt = system_message + "\n"
40
+ for user, assistant in history:
41
+ if user:
42
+ prompt += f"User: {user}\n"
43
+ if assistant:
44
+ prompt += f"Assistant: {assistant}\n"
45
+ prompt += f"User: {message}\nAssistant:"
46
 
47
+ outputs = pipe(
48
+ prompt,
49
+ max_new_tokens=max_tokens,
 
 
 
 
 
 
 
 
 
 
 
50
  temperature=temperature,
51
  top_p=top_p,
52
+ do_sample=True,
53
+ pad_token_id=tokenizer.eos_token_id,
54
+ )
55
+ response = outputs[0]["generated_text"][len(prompt):]
56
+ yield response
57
 
58
 
59
  """