|
```python |
|
model_id = "mgoin/starcoderbase-1b-pruned50-quant" |
|
|
|
# Load model with SparseAutoModel |
|
from sparseml.transformers.utils import SparseAutoModel |
|
from transformers import AutoConfig |
|
|
|
config = AutoConfig.from_pretrained(model_id) # Why does SparseAutoModel need config? |
|
model = SparseAutoModel.text_generation_from_pretrained(model_id, config=config) |
|
|
|
# Apply recipe to model |
|
# Note: Really annoying we can't grab the recipe.yaml present in the uploaded model |
|
# and you need this separate apply_recipe_structure_to_model function |
|
from sparseml.pytorch.model_load.helpers import apply_recipe_structure_to_model |
|
from huggingface_hub import hf_hub_download |
|
import os |
|
|
|
recipe_path = hf_hub_download(repo_id=model_id, filename="recipe.yaml") |
|
apply_recipe_structure_to_model( |
|
model=model, recipe_path=recipe_path, model_path=os.path.dirname(recipe_path) |
|
) |
|
|
|
# Regular HF inference |
|
from transformers import AutoTokenizer |
|
|
|
tokenizer = AutoTokenizer.from_pretrained(model_id) |
|
inputs = tokenizer.encode("def print_hello_world():", return_tensors="pt") |
|
outputs = model.generate(inputs) |
|
print(tokenizer.decode(outputs[0])) |
|
""" |
|
def print_hello_world(): |
|
print("Hello World!") |
|
|
|
print_hello_world |
|
""" |
|
``` |