Dhahlan2000 commited on
Commit
bbfdd64
·
verified ·
1 Parent(s): d12a493

Update app.py

Browse files
Files changed (1) hide show
  1. app.py +9 -7
app.py CHANGED
@@ -5,7 +5,7 @@ import torch
5
  from huggingface_hub import InferenceClient
6
  import os
7
 
8
- access_token = os.environ["TOKEN"]
9
 
10
  # Set up device
11
  device = "cuda" if torch.cuda.is_available() else "cpu"
@@ -48,9 +48,9 @@ def transliterate_to_sinhala(text):
48
  return transliterate.process('Velthuis', 'Sinhala', text)
49
 
50
  # Load conversation model
51
- conv_model_name = "google/gemma-2b-it" # Use GPT-2 instead of the gated model
52
- tokenizer = AutoTokenizer.from_pretrained(conv_model_name, trust_remote_code=True, token = access_token)
53
- model = AutoModelForCausalLM.from_pretrained(conv_model_name, trust_remote_code=True, token = access_token, torch_dtype=torch.bfloat16).to(device)
54
  # pipe1 = pipeline("text-generation", model="TinyLlama/TinyLlama-1.1B-Chat-v1.0").to(device)
55
 
56
  # model = "tiiuae/falcon-7b-instruct"
@@ -70,6 +70,8 @@ model = AutoModelForCausalLM.from_pretrained(conv_model_name, trust_remote_code=
70
  # client = InferenceClient("google/gemma-2b-it")
71
 
72
  def conversation_predict(text):
 
 
73
  # return client.text_generation(text, return_full_text=False)
74
  # pipe = pipeline(
75
  # "text-generation",
@@ -85,9 +87,9 @@ def conversation_predict(text):
85
 
86
  # output = pipe(text, **generation_args)
87
  # return output[0]['generated_text']
88
- input_ids = tokenizer(text, return_tensors="pt")
89
- outputs = model.generate(**input_ids)
90
- return tokenizer.decode(outputs[0])
91
 
92
  # outputs = pipe1(text, max_new_tokens=256, temperature=0.7, top_k=50, top_p=0.95)
93
  # return outputs[0]["generated_text"]
 
5
  from huggingface_hub import InferenceClient
6
  import os
7
 
8
+ # access_token = os.environ["TOKEN"]
9
 
10
  # Set up device
11
  device = "cuda" if torch.cuda.is_available() else "cpu"
 
48
  return transliterate.process('Velthuis', 'Sinhala', text)
49
 
50
  # Load conversation model
51
+ # conv_model_name = "google/gemma-2b-it" # Use GPT-2 instead of the gated model
52
+ # tokenizer = AutoTokenizer.from_pretrained(conv_model_name, trust_remote_code=True, token = access_token)
53
+ # model = AutoModelForCausalLM.from_pretrained(conv_model_name, trust_remote_code=True, token = access_token, torch_dtype=torch.bfloat16).to(device)
54
  # pipe1 = pipeline("text-generation", model="TinyLlama/TinyLlama-1.1B-Chat-v1.0").to(device)
55
 
56
  # model = "tiiuae/falcon-7b-instruct"
 
70
  # client = InferenceClient("google/gemma-2b-it")
71
 
72
  def conversation_predict(text):
73
+ interface = gr.interface.load("microsoft/Phi-3-mini-4k-instruct")
74
+ return interface.result
75
  # return client.text_generation(text, return_full_text=False)
76
  # pipe = pipeline(
77
  # "text-generation",
 
87
 
88
  # output = pipe(text, **generation_args)
89
  # return output[0]['generated_text']
90
+ # input_ids = tokenizer(text, return_tensors="pt")
91
+ # outputs = model.generate(**input_ids)
92
+ # return tokenizer.decode(outputs[0])
93
 
94
  # outputs = pipe1(text, max_new_tokens=256, temperature=0.7, top_k=50, top_p=0.95)
95
  # return outputs[0]["generated_text"]