Spaces:

jhansss
/

SingingSDS

Running

App Files Files Community

jhansss commited on Jul 3

Commit

7974242

1 Parent(s): ced727c

add qwen3

Browse files

Files changed (3) hide show

modules/llm/__init__.py +1 -1
modules/llm/qwen3.py +37 -0
tests/test_llm_infer.py +2 -2

modules/llm/__init__.py CHANGED Viewed

@@ -1,7 +1,7 @@
 from .base import AbstractLLMModel
 from .registry import LLM_MODEL_REGISTRY, get_llm_model, register_llm_model
 from .hf_pipeline import HFTextGenerationLLM
-from .qwen import QwenLLM
 from .gemini import GeminiLLM
 __all__ = [

 from .base import AbstractLLMModel
 from .registry import LLM_MODEL_REGISTRY, get_llm_model, register_llm_model
 from .hf_pipeline import HFTextGenerationLLM
+from .qwen3 import Qwen3LLM
 from .gemini import GeminiLLM
 __all__ = [

modules/llm/qwen3.py ADDED Viewed

	@@ -0,0 +1,37 @@

+from .base import AbstractLLMModel
+from .registry import register_llm_model
+from transformers import AutoModelForCausalLM, AutoTokenizer
+@register_llm_model("Qwen/Qwen3-")
+class Qwen3LLM(AbstractLLMModel):
+    def __init__(
+        self, model_id: str, device: str = "auto", cache_dir: str = "cache", **kwargs
+    ):
+        super().__init__(model_id, device, cache_dir, **kwargs)
+        self.model = AutoModelForCausalLM.from_pretrained(
+            model_id, device_map=device, torch_dtype="auto", cache_dir=cache_dir
+        ).eval()
+        self.tokenizer = AutoTokenizer.from_pretrained(model_id, cache_dir=cache_dir)
+    def generate(self, prompt: str, enable_thinking: bool = True, max_new_tokens: int = 32768, **kwargs) -> str:
+        messages = [{"role": "user", "content": prompt}]
+        text = self.tokenizer.apply_chat_template(
+            messages,
+            tokenize=False,
+            add_generation_prompt=True,
+            enable_thinking=enable_thinking,
+        )
+        model_inputs = self.tokenizer([text], return_tensors="pt").to(self.model.device)
+        generated_ids = self.model.generate(**model_inputs, max_new_tokens=max_new_tokens)
+        output_ids = generated_ids[0][len(model_inputs.input_ids[0]) :].tolist()
+        # parse thinking content
+        if enable_thinking:
+            try:
+                # rindex finding 151668 (</think>)
+                index = len(output_ids) - output_ids[::-1].index(151668)
+            except ValueError:
+                index = 0
+            output_ids = output_ids[index:]
+        return self.tokenizer.decode(output_ids, skip_special_tokens=True).strip("\n")

tests/test_llm_infer.py CHANGED Viewed

@@ -3,8 +3,8 @@ from modules.llm import get_llm_model
 if __name__ == "__main__":
     supported_llms = [
         # "MiniMaxAI/MiniMax-M1-80k", #-》load with custom code
-        # "Qwen/Qwen-1_8B",
-        # "meta-llama/Llama-3.1-8B-Instruct", # pending for approval
         # "tiiuae/Falcon-H1-1B-Base",
         # "tiiuae/Falcon-H1-3B-Instruct",
         # "tencent/Hunyuan-A13B-Instruct", # -> load with custom code

 if __name__ == "__main__":
     supported_llms = [
         # "MiniMaxAI/MiniMax-M1-80k", #-》load with custom code
+        # "Qwen/Qwen3-8B",
+        # "meta-llama/Llama-3.1-8B-Instruct",
         # "tiiuae/Falcon-H1-1B-Base",
         # "tiiuae/Falcon-H1-3B-Instruct",
         # "tencent/Hunyuan-A13B-Instruct", # -> load with custom code