Spaces:

Mihaiii
/

backtrack_sampler_demo

Running on Zero

Mihaiii commited on Oct 12, 2024

Commit

7a641cc

verified ·

1 Parent(s): d0e140f

Update app.py

Files changed (1) hide show

app.py CHANGED Viewed

@@ -15,13 +15,10 @@ model_name = "unsloth/Llama-3.2-1B-Instruct"
 tokenizer = AutoTokenizer.from_pretrained(model_name)
 # Load two instances of the model on CUDA for parallel inference
-model1 = AutoModelForCausalLM.from_pretrained(model_name).to("cuda")
-model2 = AutoModelForCausalLM.from_pretrained(model_name)
-device = torch.device('cuda')
-strategy = CreativeWritingStrategy()
-provider = TransformersProvider(model2, tokenizer, device)
 creative_sampler = BacktrackSampler(strategy, provider)
 # Helper function to create message array for the chat template
@@ -52,7 +49,7 @@ def generate_responses(prompt, history):
         return tokenizer.decode(generated_list, skip_special_tokens=True)
     custom_output = asyncio.run(custom_sampler_task())
-    standard_output = model1.generate(inputs, max_length=2048, temperature=1)
     # Decode standard output and remove the prompt from the generated response
     standard_response = tokenizer.decode(standard_output[0][len(inputs[0]):], skip_special_tokens=True)

 tokenizer = AutoTokenizer.from_pretrained(model_name)
 # Load two instances of the model on CUDA for parallel inference
+model = AutoModelForCausalLM.from_pretrained(model_name).to("cuda")
+provider = TransformersProvider(model, tokenizer, device)
+strategy = CreativeWritingStrategy(provider)
 creative_sampler = BacktrackSampler(strategy, provider)
 # Helper function to create message array for the chat template
         return tokenizer.decode(generated_list, skip_special_tokens=True)
     custom_output = asyncio.run(custom_sampler_task())
+    standard_output = model.generate(inputs, max_length=2048, temperature=1)
     # Decode standard output and remove the prompt from the generated response
     standard_response = tokenizer.decode(standard_output[0][len(inputs[0]):], skip_special_tokens=True)