Dhahlan2000 commited on
Commit
2ab668c
·
verified ·
1 Parent(s): a63ae46

Update app.py

Browse files
Files changed (1) hide show
  1. app.py +11 -8
app.py CHANGED
@@ -3,6 +3,9 @@ from transformers import pipeline, AutoTokenizer, AutoModelForSeq2SeqLM, AutoMod
3
  from aksharamukha import transliterate
4
  import torch
5
  from huggingface_hub import InferenceClient
 
 
 
6
 
7
  # Set up device
8
  device = "cuda" if torch.cuda.is_available() else "cpu"
@@ -45,9 +48,9 @@ def transliterate_to_sinhala(text):
45
  return transliterate.process('Velthuis', 'Sinhala', text)
46
 
47
  # Load conversation model
48
- # conv_model_name = "microsoft/Phi-3-mini-4k-instruct" # Use GPT-2 instead of the gated model
49
- # tokenizer = AutoTokenizer.from_pretrained(conv_model_name, trust_remote_code=True)
50
- # model = AutoModelForCausalLM.from_pretrained(conv_model_name, trust_remote_code=True).to(device)
51
  # pipe1 = pipeline("text-generation", model="TinyLlama/TinyLlama-1.1B-Chat-v1.0").to(device)
52
 
53
  # model = "tiiuae/falcon-7b-instruct"
@@ -64,10 +67,10 @@ def transliterate_to_sinhala(text):
64
 
65
  # pipe1 = pipeline("text-generation", model="unsloth/gemma-2b-it")
66
 
67
- client = InferenceClient("google/gemma-2b-it")
68
 
69
  # def conversation_predict(text):
70
- return client.text_generation(text, return_full_text=False)
71
  # pipe = pipeline(
72
  # "text-generation",
73
  # model=model,
@@ -82,9 +85,9 @@ client = InferenceClient("google/gemma-2b-it")
82
 
83
  # output = pipe(text, **generation_args)
84
  # return output[0]['generated_text']
85
- # input_ids = tokenizer(text, return_tensors="pt").to(device)
86
- # outputs = model.generate(**input_ids)
87
- # return tokenizer.decode(outputs[0])
88
 
89
  # outputs = pipe1(text, max_new_tokens=256, temperature=0.7, top_k=50, top_p=0.95)
90
  # return outputs[0]["generated_text"]
 
3
  from aksharamukha import transliterate
4
  import torch
5
  from huggingface_hub import InferenceClient
6
+ import os
7
+
8
+ access_token = os.environ["TOKEN"]
9
 
10
  # Set up device
11
  device = "cuda" if torch.cuda.is_available() else "cpu"
 
48
  return transliterate.process('Velthuis', 'Sinhala', text)
49
 
50
  # Load conversation model
51
+ conv_model_name = "google/gemma-2b-it" # Use GPT-2 instead of the gated model
52
+ tokenizer = AutoTokenizer.from_pretrained(conv_model_name, trust_remote_code=True, token = access_token)
53
+ model = AutoModelForCausalLM.from_pretrained(conv_model_name, trust_remote_code=True, token = access_token, torch_dtype=torch.bfloat16).to(device)
54
  # pipe1 = pipeline("text-generation", model="TinyLlama/TinyLlama-1.1B-Chat-v1.0").to(device)
55
 
56
  # model = "tiiuae/falcon-7b-instruct"
 
67
 
68
  # pipe1 = pipeline("text-generation", model="unsloth/gemma-2b-it")
69
 
70
+ # client = InferenceClient("google/gemma-2b-it")
71
 
72
  # def conversation_predict(text):
73
+ # return client.text_generation(text, return_full_text=False)
74
  # pipe = pipeline(
75
  # "text-generation",
76
  # model=model,
 
85
 
86
  # output = pipe(text, **generation_args)
87
  # return output[0]['generated_text']
88
+ input_ids = tokenizer(text, return_tensors="pt").to(device)
89
+ outputs = model.generate(**input_ids)
90
+ return tokenizer.decode(outputs[0])
91
 
92
  # outputs = pipe1(text, max_new_tokens=256, temperature=0.7, top_k=50, top_p=0.95)
93
  # return outputs[0]["generated_text"]