Final_Assignment_Template

Configuration error

App Files Files Community

Coool2 commited on 14 days ago

Commit

979a803

1 Parent(s): ec692d1

Update agent.py

Browse files

Files changed (1) hide show

agent.py +63 -10

agent.py CHANGED Viewed

@@ -26,7 +26,6 @@ from llama_index.core.schema import ImageNode, TextNode
 # LlamaIndex specialized imports
 from llama_index.embeddings.huggingface import HuggingFaceEmbedding
 from llama_index.llms.huggingface import HuggingFaceLLM
-from llama_index.multi_modal_llms.huggingface import HuggingFaceMultiModal
 from llama_index.readers.assemblyai import AssemblyAIAudioTranscriptReader
 from llama_index.readers.json import JSONReader
 from llama_index.readers.web import BeautifulSoupWebReader
@@ -121,16 +120,70 @@ def initialize_models(use_api_mode=False):
         print("Initializing models in non-API mode with local models...")
         try :
-            proj_llm = HuggingFaceMultiModal.from_model_name(
-                        "Qwen/Qwen2.5-VL-7B-Instruct",
-                        temperature=0.7,
-                        top_p=0.9,
-                        top_k=40,
-                        max_new_tokens=5120,
-                        device_map="auto",
-                        model_kwargs={"torch_dtype": "auto"}
                     )
             # Code LLM
             code_llm = HuggingFaceLLM(

 # LlamaIndex specialized imports
 from llama_index.embeddings.huggingface import HuggingFaceEmbedding
 from llama_index.llms.huggingface import HuggingFaceLLM
 from llama_index.readers.assemblyai import AssemblyAIAudioTranscriptReader
 from llama_index.readers.json import JSONReader
 from llama_index.readers.web import BeautifulSoupWebReader
         print("Initializing models in non-API mode with local models...")
         try :
+            from typing import Any, Optional, List, Mapping
+            from llama_index.core.llms import CustomLLM, CompletionResponse, CompletionResponseGen, LLMMetadata
+            from llama_index.core.llms.callbacks import llm_completion_callback
+            from transformers import Qwen2_5_VLForConditionalGeneration, AutoProcessor
+            from qwen_vl_utils import process_vision_info
+            import torch
+            class QwenVL7BCustomLLM(CustomLLM):
+                context_window: int = 32768
+                num_output: int = 256
+                model_name: str = "Qwen/Qwen2.5-VL-7B-Instruct"
+                def __init__(self, device: str = "cuda"):
+                    self.device = device
+                    self.model = Qwen2_5_VLForConditionalGeneration.from_pretrained(
+                        self.model_name, torch_dtype=torch.bfloat16, device_map="auto"
                     )
+                    self.processor = AutoProcessor.from_pretrained(self.model_name)
+                @property
+                def metadata(self) -> LLMMetadata:
+                    return LLMMetadata(
+                        context_window=self.context_window,
+                        num_output=self.num_output,
+                        model_name=self.model_name,
+                    )
+                @llm_completion_callback()
+                def complete(self, prompt: str, image_paths: Optional[List[str]] = None, **kwargs: Any) -> CompletionResponse:
+                    # Prepare messages for multimodal input
+                    messages = [{"role": "user", "content": []}]
+                    if image_paths:
+                        for path in image_paths:
+                            messages[0]["content"].append({"type": "image", "image": path})
+                    messages[0]["content"].append({"type": "text", "text": prompt})
+                    # Process inputs
+                    text = self.processor.apply_chat_template(messages, tokenize=False, add_generation_prompt=True)
+                    image_inputs, video_inputs = process_vision_info(messages)
+                    inputs = self.processor(
+                        text=[text],
+                        images=image_inputs,
+                        videos=video_inputs,
+                        padding=True,
+                        return_tensors="pt",
+                    )
+                    inputs = inputs.to(self.model.device)
+                    # Generate output
+                    generated_ids = self.model.generate(**inputs, max_new_tokens=self.num_output)
+                    generated_ids_trimmed = [out_ids[len(in_ids):] for in_ids, out_ids in zip(inputs.input_ids, generated_ids)]
+                    output_text = self.processor.batch_decode(
+                        generated_ids_trimmed, skip_special_tokens=True, clean_up_tokenization_spaces=False
+                    )[0]
+                    return CompletionResponse(text=output_text)
+                @llm_completion_callback()
+                def stream_complete(self, prompt: str, image_paths: Optional[List[str]] = None, **kwargs: Any) -> CompletionResponseGen:
+                    response = self.complete(prompt, image_paths)
+                    for token in response.text:
+                        yield CompletionResponse(text=token, delta=token)
+            proj_llm = QwenVL7BCustomLLM()
             # Code LLM
             code_llm = HuggingFaceLLM(