Spaces:
Running
Running
llm refactor
Browse files- modules/llm/__init__.py +11 -0
- modules/llm/base.py +15 -0
- modules/{llm.py → llm/hf_pipeline.py} +7 -35
- modules/llm/registry.py +19 -0
- tests/__init__.py +0 -0
- tests/test_llm_infer.py +26 -0
modules/llm/__init__.py
ADDED
|
@@ -0,0 +1,11 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
from .base import AbstractLLMModel
|
| 2 |
+
from .registry import LLM_MODEL_REGISTRY, get_llm_model, register_llm_model
|
| 3 |
+
from .hf_pipeline import HFTextGenerationLLM
|
| 4 |
+
from .qwen import QwenLLM
|
| 5 |
+
|
| 6 |
+
__all__ = [
|
| 7 |
+
"AbstractLLMModel",
|
| 8 |
+
"get_llm_model",
|
| 9 |
+
"register_llm_model",
|
| 10 |
+
"LLM_MODEL_REGISTRY",
|
| 11 |
+
]
|
modules/llm/base.py
ADDED
|
@@ -0,0 +1,15 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
from abc import ABC, abstractmethod
|
| 2 |
+
|
| 3 |
+
|
| 4 |
+
class AbstractLLMModel(ABC):
|
| 5 |
+
def __init__(
|
| 6 |
+
self, model_id: str, device: str = "cpu", cache_dir: str = "cache", **kwargs
|
| 7 |
+
):
|
| 8 |
+
print(f"Loading LLM model {model_id}...")
|
| 9 |
+
self.model_id = model_id
|
| 10 |
+
self.device = device
|
| 11 |
+
self.cache_dir = cache_dir
|
| 12 |
+
|
| 13 |
+
@abstractmethod
|
| 14 |
+
def generate(self, prompt: str, **kwargs) -> str:
|
| 15 |
+
pass
|
modules/{llm.py → llm/hf_pipeline.py}
RENAMED
|
@@ -1,45 +1,16 @@
|
|
| 1 |
import os
|
| 2 |
-
from abc import ABC, abstractmethod
|
| 3 |
|
| 4 |
from transformers import pipeline
|
| 5 |
|
| 6 |
-
|
| 7 |
-
|
| 8 |
-
|
| 9 |
|
| 10 |
-
|
| 11 |
-
def __init__(
|
| 12 |
-
self, model_id: str, device: str = "cpu", cache_dir: str = "cache", **kwargs
|
| 13 |
-
):
|
| 14 |
-
print(f"Loading LLM model {model_id}...")
|
| 15 |
-
self.model_id = model_id
|
| 16 |
-
self.device = device
|
| 17 |
-
self.cache_dir = cache_dir
|
| 18 |
-
|
| 19 |
-
@abstractmethod
|
| 20 |
-
def generate(self, prompt: str, **kwargs) -> str:
|
| 21 |
-
pass
|
| 22 |
-
|
| 23 |
-
|
| 24 |
-
def register_llm_model(prefix: str):
|
| 25 |
-
def wrapper(cls):
|
| 26 |
-
assert issubclass(cls, AbstractLLMModel), f"{cls} must inherit AbstractLLMModel"
|
| 27 |
-
LLM_MODEL_REGISTRY[prefix] = cls
|
| 28 |
-
return cls
|
| 29 |
-
|
| 30 |
-
return wrapper
|
| 31 |
-
|
| 32 |
-
|
| 33 |
-
def get_llm_model(model_id: str, device="cpu", **kwargs) -> AbstractLLMModel:
|
| 34 |
-
for prefix, cls in LLM_MODEL_REGISTRY.items():
|
| 35 |
-
if model_id.startswith(prefix):
|
| 36 |
-
return cls(model_id, device=device, **kwargs)
|
| 37 |
-
raise ValueError(f"No LLM wrapper found for model: {model_id}")
|
| 38 |
|
| 39 |
|
| 40 |
-
@register_llm_model("
|
| 41 |
-
@register_llm_model("
|
| 42 |
-
@register_llm_model("meta-llama")
|
| 43 |
class HFTextGenerationLLM(AbstractLLMModel):
|
| 44 |
def __init__(
|
| 45 |
self, model_id: str, device: str = "cpu", cache_dir: str = "cache", **kwargs
|
|
@@ -53,6 +24,7 @@ class HFTextGenerationLLM(AbstractLLMModel):
|
|
| 53 |
device=0 if device == "cuda" else -1,
|
| 54 |
return_full_text=False,
|
| 55 |
token=hf_token,
|
|
|
|
| 56 |
**kwargs,
|
| 57 |
)
|
| 58 |
|
|
|
|
| 1 |
import os
|
|
|
|
| 2 |
|
| 3 |
from transformers import pipeline
|
| 4 |
|
| 5 |
+
from .base import AbstractLLMModel
|
| 6 |
+
from .registry import register_llm_model
|
|
|
|
| 7 |
|
| 8 |
+
hf_token = os.getenv("HF_TOKEN")
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 9 |
|
| 10 |
|
| 11 |
+
@register_llm_model("openai-community/")
|
| 12 |
+
@register_llm_model("google/gemma-")
|
| 13 |
+
@register_llm_model("meta-llama/Llama-")
|
| 14 |
class HFTextGenerationLLM(AbstractLLMModel):
|
| 15 |
def __init__(
|
| 16 |
self, model_id: str, device: str = "cpu", cache_dir: str = "cache", **kwargs
|
|
|
|
| 24 |
device=0 if device == "cuda" else -1,
|
| 25 |
return_full_text=False,
|
| 26 |
token=hf_token,
|
| 27 |
+
trust_remote_code=True,
|
| 28 |
**kwargs,
|
| 29 |
)
|
| 30 |
|
modules/llm/registry.py
ADDED
|
@@ -0,0 +1,19 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
from .base import AbstractLLMModel
|
| 2 |
+
|
| 3 |
+
LLM_MODEL_REGISTRY = {}
|
| 4 |
+
|
| 5 |
+
|
| 6 |
+
def register_llm_model(prefix: str):
|
| 7 |
+
def wrapper(cls):
|
| 8 |
+
assert issubclass(cls, AbstractLLMModel), f"{cls} must inherit AbstractLLMModel"
|
| 9 |
+
LLM_MODEL_REGISTRY[prefix] = cls
|
| 10 |
+
return cls
|
| 11 |
+
|
| 12 |
+
return wrapper
|
| 13 |
+
|
| 14 |
+
|
| 15 |
+
def get_llm_model(model_id: str, device="cpu", **kwargs) -> AbstractLLMModel:
|
| 16 |
+
for prefix, cls in LLM_MODEL_REGISTRY.items():
|
| 17 |
+
if model_id.startswith(prefix):
|
| 18 |
+
return cls(model_id, device=device, **kwargs)
|
| 19 |
+
raise ValueError(f"No LLM wrapper found for model: {model_id}")
|
tests/__init__.py
ADDED
|
File without changes
|
tests/test_llm_infer.py
ADDED
|
@@ -0,0 +1,26 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
from modules.llm import get_llm_model
|
| 2 |
+
|
| 3 |
+
if __name__ == "__main__":
|
| 4 |
+
supported_llms = [
|
| 5 |
+
# "MiniMaxAI/MiniMax-M1-80k", #-》load with custom code
|
| 6 |
+
# "Qwen/Qwen-1_8B",
|
| 7 |
+
# "meta-llama/Llama-3.1-8B-Instruct", # pending for approval
|
| 8 |
+
# "tiiuae/Falcon-H1-1B-Base",
|
| 9 |
+
# "tiiuae/Falcon-H1-3B-Instruct",
|
| 10 |
+
# "tencent/Hunyuan-A13B-Instruct", # -> load with custom code
|
| 11 |
+
# "deepseek-ai/DeepSeek-R1-0528",
|
| 12 |
+
# "openai-community/gpt2-xl",
|
| 13 |
+
# "google/gemma-2-2b",
|
| 14 |
+
]
|
| 15 |
+
for model_id in supported_llms:
|
| 16 |
+
try:
|
| 17 |
+
print(f"Loading model: {model_id}")
|
| 18 |
+
llm = get_llm_model(model_id, cache_dir="./.cache")
|
| 19 |
+
prompt = "你好,今天你心情怎么样?"
|
| 20 |
+
result = llm.generate(prompt)
|
| 21 |
+
print(f"=================")
|
| 22 |
+
print(f"[{model_id}] LLM inference result:", result)
|
| 23 |
+
except Exception as e:
|
| 24 |
+
print(f"Failed to load model {model_id}: {e}")
|
| 25 |
+
breakpoint()
|
| 26 |
+
continue
|