peterpeter8585 commited on
Commit
819f86e
·
verified ·
1 Parent(s): 00eb31b

Update app.py

Browse files
Files changed (1) hide show
  1. app.py +404 -89
app.py CHANGED
@@ -10,7 +10,411 @@ from langchain.schema import SystemMessage as SM,HumanMessage as HM, AIMessage a
10
  from langchain import hub
11
  import os
12
  import torch
 
13
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
14
 
15
  from langchain_core.prompts.chat import ChatPromptTemplate, MessagesPlaceholder
16
  system = '''Respond to the human as helpfully and accurately as possible. You have access to the following tools:
@@ -74,97 +478,8 @@ from langchain_core.callbacks.manager import AsyncCallbackManagerForLLMRun
74
  from langchain_core.runnables import run_in_executor
75
  from transformers import AutoTokenizer, AutoModelForCausalLM
76
  import torch
77
- class Chatchat(BaseChatModel):
78
-
79
- model_name: str = "peterpeter8585/deepseek_1"
80
- tokenizer : AutoTokenizer = None
81
- model: AutoModelForCausalLM = None
82
- model_path: str = None
83
-
84
- def __init__(self, model_path, **kwargs: Any) -> None:
85
- super().__init__(**kwargs)
86
- if model_path is not None:
87
- self.model_name = model_path
88
-
89
- self.tokenizer = AutoTokenizer.from_pretrained(self.model_name, trust_remote_code=True)
90
- self.model = AutoModelForCausalLM.from_pretrained(
91
- self.model_name, trust_remote_code=True)
92
- self.model=self
93
-
94
-
95
- def _call(
96
- self,
97
- prompt: str,
98
- stop: Optional[List[str]] = None,
99
- run_manager: Optional[CallbackManagerForLLMRun] = None,
100
- **kwargs: Any,
101
- ) -> str:
102
- # Load and preprocess the image
103
- messages = [
104
- {"role": "system", "content": "You are Chatchat.A helpful assistant at code."},
105
- {"role": "user", "content": prompt}
106
- ]
107
-
108
- text = self.tokenizer.apply_chat_template(
109
- messages,
110
- tokenize=False,
111
- add_generation_prompt=True
112
- )
113
- model_inputs = self.tokenizer([text], return_tensors="pt").to(self.model.device)
114
- generated_ids = self.model.generate(
115
- **model_inputs,
116
- max_new_tokens=512
117
- )
118
- generated_ids = [
119
- output_ids[len(input_ids):] for input_ids, output_ids in zip(model_inputs.input_ids, generated_ids)
120
- ]
121
-
122
- response = self.tokenizer.batch_decode(generated_ids, skip_special_tokens=True)[0]
123
-
124
- return response
125
-
126
- async def _acall(
127
- self,
128
- prompt: str,
129
- stop: Optional[List[str]] = None,
130
- run_manager: Optional[AsyncCallbackManagerForLLMRun] = None,
131
- **kwargs: Any,
132
- ) -> str:
133
- # Implement the async logic to generate a response from the model
134
- return await run_in_executor(
135
- None,
136
- self._call,
137
- prompt,
138
- stop,
139
- run_manager.get_sync() if run_manager else None,
140
- **kwargs,
141
- )
142
 
143
- @property
144
- def _llm_type(self) -> str:
145
- return "custom-llm-chat"
146
-
147
- @property
148
- def _identifying_params(self) -> Dict[str, Any]:
149
- return {"model_name": self.model_name}
150
 
151
- def _generate(
152
- self,
153
- messages: List[BaseMessage],
154
- stop: Optional[List[str]] = None,
155
- run_manager: Optional[CallbackManagerForLLMRun] = None,
156
- **kwargs: Any,
157
- ) -> ChatResult:
158
- # Assumes the first message contains the prompt and the image path is in metadata
159
- prompt = messages[0].content
160
- response_text = self._call(prompt, stop, run_manager, **kwargs)
161
-
162
- # Create AIMessage with the response
163
- ai_message = AIMessage(content=response_text)
164
- return ChatResult(generations=[ChatGeneration(message=ai_message)])
165
-
166
-
167
- llm=Chatchat(model_path=None)
168
  #from transformers import pipeline,AutoModelForCausalLM as M,AutoTokenizer as T
169
  #m=M.from_pretrained("peterpeter8585/syai4.3")
170
  #t=T.from_pretrained("peterpeter8585/syai4.3")
 
10
  from langchain import hub
11
  import os
12
  import torch
13
+ from __future__ import annotations # type: ignore[import-not-found]
14
 
15
+ import importlib.util
16
+ import logging
17
+ from typing import Any, Dict, Iterator, List, Mapping, Optional
18
+
19
+ from langchain_core.callbacks import CallbackManagerForLLMRun
20
+ from langchain_core.language_models.llms import BaseLLM
21
+ from langchain_core.outputs import Generation, GenerationChunk, LLMResult
22
+ from pydantic import ConfigDict, model_validator
23
+
24
+ from ..utils.import_utils import (
25
+ IMPORT_ERROR,
26
+ is_ipex_available,
27
+ is_openvino_available,
28
+ is_optimum_intel_available,
29
+ is_optimum_intel_version,
30
+ )
31
+
32
+ DEFAULT_MODEL_ID = "gpt2"
33
+ DEFAULT_TASK = "text-generation"
34
+ VALID_TASKS = (
35
+ "text2text-generation",
36
+ "text-generation",
37
+ "summarization",
38
+ "translation",
39
+ )
40
+ DEFAULT_BATCH_SIZE = 4
41
+ _MIN_OPTIMUM_VERSION = "1.21"
42
+
43
+
44
+ logger = logging.getLogger(__name__)
45
+
46
+
47
+ class HuggingFacePipeline(BaseLLM):
48
+ """HuggingFace Pipeline API.
49
+
50
+ To use, you should have the ``transformers`` python package installed.
51
+
52
+ Only supports `text-generation`, `text2text-generation`, `summarization` and
53
+ `translation` for now.
54
+
55
+ Example using from_model_id:
56
+ .. code-block:: python
57
+
58
+ from langchain_huggingface import HuggingFacePipeline
59
+ hf = HuggingFacePipeline.from_model_id(
60
+ model_id="gpt2",
61
+ task="text-generation",
62
+ pipeline_kwargs={"max_new_tokens": 10},
63
+ )
64
+ Example passing pipeline in directly:
65
+ .. code-block:: python
66
+
67
+ from langchain_huggingface import HuggingFacePipeline
68
+ from transformers import AutoModelForCausalLM, AutoTokenizer, pipeline
69
+
70
+ model_id = "gpt2"
71
+ tokenizer = AutoTokenizer.from_pretrained(model_id)
72
+ model = AutoModelForCausalLM.from_pretrained(model_id)
73
+ pipe = pipeline(
74
+ "text-generation", model=model, tokenizer=tokenizer, max_new_tokens=10
75
+ )
76
+ hf = HuggingFacePipeline(pipeline=pipe)
77
+ """
78
+
79
+ pipeline: Any = None #: :meta private:
80
+ model_id: Optional[str] = None
81
+ """The model name. If not set explicitly by the user,
82
+ it will be inferred from the provided pipeline (if available).
83
+ If neither is provided, the DEFAULT_MODEL_ID will be used."""
84
+ model_kwargs: Optional[dict] = None
85
+ """Keyword arguments passed to the model."""
86
+ pipeline_kwargs: Optional[dict] = None
87
+ """Keyword arguments passed to the pipeline."""
88
+ batch_size: int = DEFAULT_BATCH_SIZE
89
+ """Batch size to use when passing multiple documents to generate."""
90
+
91
+ model_config = ConfigDict(
92
+ extra="forbid",
93
+ )
94
+
95
+ @model_validator(mode="before")
96
+ @classmethod
97
+ def pre_init_validator(cls, values: Dict[str, Any]) -> Dict[str, Any]:
98
+ """Ensure model_id is set either by pipeline or user input."""
99
+ if "model_id" not in values:
100
+ if "pipeline" in values and values["pipeline"]:
101
+ values["model_id"] = values["pipeline"].model.name_or_path
102
+ else:
103
+ values["model_id"] = DEFAULT_MODEL_ID
104
+ return values
105
+
106
+ @classmethod
107
+ def from_model_id(
108
+ cls,
109
+ model_id: str,
110
+ task: str,
111
+ backend: str = "default",
112
+ device: Optional[int] = None,
113
+ device_map: Optional[str] = None,
114
+ model_kwargs: Optional[dict] = None,
115
+ pipeline_kwargs: Optional[dict] = None,
116
+ batch_size: int = DEFAULT_BATCH_SIZE,
117
+ **kwargs: Any,
118
+ ) -> HuggingFacePipeline:
119
+ """Construct the pipeline object from model_id and task."""
120
+ try:
121
+ from transformers import ( # type: ignore[import]
122
+ AutoModelForCausalLM,
123
+ AutoModelForSeq2SeqLM,
124
+ AutoTokenizer,
125
+ )
126
+ from transformers import pipeline as hf_pipeline # type: ignore[import]
127
+
128
+ except ImportError:
129
+ raise ValueError(
130
+ "Could not import transformers python package. "
131
+ "Please install it with `pip install transformers`."
132
+ )
133
+
134
+ _model_kwargs = model_kwargs.copy() if model_kwargs else {}
135
+ if device_map is not None:
136
+ if device is not None:
137
+ raise ValueError(
138
+ "Both `device` and `device_map` are specified. "
139
+ "`device` will override `device_map`. "
140
+ "You will most likely encounter unexpected behavior."
141
+ "Please remove `device` and keep "
142
+ "`device_map`."
143
+ )
144
+
145
+ if "device_map" in _model_kwargs:
146
+ raise ValueError("`device_map` is already specified in `model_kwargs`.")
147
+
148
+ _model_kwargs["device_map"] = device_map
149
+ tokenizer = AutoTokenizer.from_pretrained(model_id, **_model_kwargs)
150
+
151
+ if backend in {"openvino", "ipex"}:
152
+ if task not in VALID_TASKS:
153
+ raise ValueError(
154
+ f"Got invalid task {task}, "
155
+ f"currently only {VALID_TASKS} are supported"
156
+ )
157
+
158
+ err_msg = f'Backend: {backend} {IMPORT_ERROR.format(f"optimum[{backend}]")}'
159
+ if not is_optimum_intel_available():
160
+ raise ImportError(err_msg)
161
+
162
+ # TODO: upgrade _MIN_OPTIMUM_VERSION to 1.22 after release
163
+ min_optimum_version = (
164
+ "1.22"
165
+ if backend == "ipex" and task != "text-generation"
166
+ else _MIN_OPTIMUM_VERSION
167
+ )
168
+ if is_optimum_intel_version("<", min_optimum_version):
169
+ raise ImportError(
170
+ f"Backend: {backend} requires optimum-intel>="
171
+ f"{min_optimum_version}. You can install it with pip: "
172
+ "`pip install --upgrade --upgrade-strategy eager "
173
+ f"`optimum[{backend}]`."
174
+ )
175
+
176
+ if backend == "openvino":
177
+ if not is_openvino_available():
178
+ raise ImportError(err_msg)
179
+
180
+ from optimum.intel import ( # type: ignore[import]
181
+ OVModelForCausalLM,
182
+ OVModelForSeq2SeqLM,
183
+ )
184
+
185
+ model_cls = (
186
+ OVModelForCausalLM
187
+ if task == "text-generation"
188
+ else OVModelForSeq2SeqLM
189
+ )
190
+ else:
191
+ if not is_ipex_available():
192
+ raise ImportError(err_msg)
193
+
194
+ if task == "text-generation":
195
+ from optimum.intel import (
196
+ IPEXModelForCausalLM, # type: ignore[import]
197
+ )
198
+
199
+ model_cls = IPEXModelForCausalLM
200
+ else:
201
+ from optimum.intel import (
202
+ IPEXModelForSeq2SeqLM, # type: ignore[import]
203
+ )
204
+
205
+ model_cls = IPEXModelForSeq2SeqLM
206
+
207
+ else:
208
+ model_cls = (
209
+ AutoModelForCausalLM
210
+ if task == "text-generation"
211
+ else AutoModelForSeq2SeqLM
212
+ )
213
+
214
+ model = model_cls.from_pretrained(model_id, **_model_kwargs)
215
+ model=torch.compile(model,mode="max-autotune")
216
+
217
+ if tokenizer.pad_token is None:
218
+ if model.config.pad_token_id is not None:
219
+ tokenizer.pad_token_id = model.config.pad_token_id
220
+ elif model.config.eos_token_id is not None and isinstance(
221
+ model.config.eos_token_id, int
222
+ ):
223
+ tokenizer.pad_token_id = model.config.eos_token_id
224
+ elif tokenizer.eos_token_id is not None:
225
+ tokenizer.pad_token_id = tokenizer.eos_token_id
226
+ else:
227
+ tokenizer.add_special_tokens({"pad_token": "[PAD]"})
228
+
229
+ if (
230
+ (
231
+ getattr(model, "is_loaded_in_4bit", False)
232
+ or getattr(model, "is_loaded_in_8bit", False)
233
+ )
234
+ and device is not None
235
+ and backend == "default"
236
+ ):
237
+ logger.warning(
238
+ f"Setting the `device` argument to None from {device} to avoid "
239
+ "the error caused by attempting to move the model that was already "
240
+ "loaded on the GPU using the Accelerate module to the same or "
241
+ "another device."
242
+ )
243
+ device = None
244
+
245
+ if (
246
+ device is not None
247
+ and importlib.util.find_spec("torch") is not None
248
+ and backend == "default"
249
+ ):
250
+ import torch
251
+
252
+ cuda_device_count = torch.cuda.device_count()
253
+ if device < -1 or (device >= cuda_device_count):
254
+ raise ValueError(
255
+ f"Got device=={device}, "
256
+ f"device is required to be within [-1, {cuda_device_count})"
257
+ )
258
+ if device_map is not None and device < 0:
259
+ device = None
260
+ if device is not None and device < 0 and cuda_device_count > 0:
261
+ logger.warning(
262
+ "Device has %d GPUs available. "
263
+ "Provide device={deviceId} to `from_model_id` to use available"
264
+ "GPUs for execution. deviceId is -1 (default) for CPU and "
265
+ "can be a positive integer associated with CUDA device id.",
266
+ cuda_device_count,
267
+ )
268
+ if device is not None and device_map is not None and backend == "openvino":
269
+ logger.warning("Please set device for OpenVINO through: `model_kwargs`")
270
+ if "trust_remote_code" in _model_kwargs:
271
+ _model_kwargs = {
272
+ k: v for k, v in _model_kwargs.items() if k != "trust_remote_code"
273
+ }
274
+ _pipeline_kwargs = pipeline_kwargs or {}
275
+ pipeline = hf_pipeline(
276
+ task=task,
277
+ model=model,
278
+ tokenizer=tokenizer,
279
+ device=device,
280
+ batch_size=batch_size,
281
+ model_kwargs=_model_kwargs,
282
+ **_pipeline_kwargs,
283
+ )
284
+ if pipeline.task not in VALID_TASKS:
285
+ raise ValueError(
286
+ f"Got invalid task {pipeline.task}, "
287
+ f"currently only {VALID_TASKS} are supported"
288
+ )
289
+ return cls(
290
+ pipeline=pipeline,
291
+ model_id=model_id,
292
+ model_kwargs=_model_kwargs,
293
+ pipeline_kwargs=_pipeline_kwargs,
294
+ batch_size=batch_size,
295
+ **kwargs,
296
+ )
297
+
298
+ @property
299
+ def _identifying_params(self) -> Mapping[str, Any]:
300
+ """Get the identifying parameters."""
301
+ return {
302
+ "model_id": self.model_id,
303
+ "model_kwargs": self.model_kwargs,
304
+ "pipeline_kwargs": self.pipeline_kwargs,
305
+ }
306
+
307
+ @property
308
+ def _llm_type(self) -> str:
309
+ return "huggingface_pipeline"
310
+
311
+ def _generate(
312
+ self,
313
+ prompts: List[str],
314
+ stop: Optional[List[str]] = None,
315
+ run_manager: Optional[CallbackManagerForLLMRun] = None,
316
+ **kwargs: Any,
317
+ ) -> LLMResult:
318
+ # List to hold all results
319
+ text_generations: List[str] = []
320
+ pipeline_kwargs = kwargs.get("pipeline_kwargs", {})
321
+ skip_prompt = kwargs.get("skip_prompt", False)
322
+
323
+ for i in range(0, len(prompts), self.batch_size):
324
+ batch_prompts = prompts[i : i + self.batch_size]
325
+
326
+ # Process batch of prompts
327
+ responses = self.pipeline(
328
+ batch_prompts,
329
+ **pipeline_kwargs,
330
+ )
331
+
332
+ # Process each response in the batch
333
+ for j, response in enumerate(responses):
334
+ if isinstance(response, list):
335
+ # if model returns multiple generations, pick the top one
336
+ response = response[0]
337
+
338
+ if self.pipeline.task == "text-generation":
339
+ text = response["generated_text"]
340
+ elif self.pipeline.task == "text2text-generation":
341
+ text = response["generated_text"]
342
+ elif self.pipeline.task == "summarization":
343
+ text = response["summary_text"]
344
+ elif self.pipeline.task in "translation":
345
+ text = response["translation_text"]
346
+ else:
347
+ raise ValueError(
348
+ f"Got invalid task {self.pipeline.task}, "
349
+ f"currently only {VALID_TASKS} are supported"
350
+ )
351
+ if skip_prompt:
352
+ text = text[len(batch_prompts[j]) :]
353
+ # Append the processed text to results
354
+ text_generations.append(text)
355
+
356
+ return LLMResult(
357
+ generations=[[Generation(text=text)] for text in text_generations]
358
+ )
359
+
360
+ def _stream(
361
+ self,
362
+ prompt: str,
363
+ stop: Optional[List[str]] = None,
364
+ run_manager: Optional[CallbackManagerForLLMRun] = None,
365
+ **kwargs: Any,
366
+ ) -> Iterator[GenerationChunk]:
367
+ from threading import Thread
368
+
369
+ import torch
370
+ from transformers import (
371
+ StoppingCriteria,
372
+ StoppingCriteriaList,
373
+ TextIteratorStreamer,
374
+ )
375
+
376
+ pipeline_kwargs = kwargs.get("pipeline_kwargs", {})
377
+ skip_prompt = kwargs.get("skip_prompt", True)
378
+
379
+ if stop is not None:
380
+ stop = self.pipeline.tokenizer.convert_tokens_to_ids(stop)
381
+ stopping_ids_list = stop or []
382
+
383
+ class StopOnTokens(StoppingCriteria):
384
+ def __call__(
385
+ self,
386
+ input_ids: torch.LongTensor,
387
+ scores: torch.FloatTensor,
388
+ **kwargs: Any,
389
+ ) -> bool:
390
+ for stop_id in stopping_ids_list:
391
+ if input_ids[0][-1] == stop_id:
392
+ return True
393
+ return False
394
+
395
+ stopping_criteria = StoppingCriteriaList([StopOnTokens()])
396
+
397
+ streamer = TextIteratorStreamer(
398
+ self.pipeline.tokenizer,
399
+ timeout=60.0,
400
+ skip_prompt=skip_prompt,
401
+ skip_special_tokens=True,
402
+ )
403
+ generation_kwargs = dict(
404
+ text_inputs=prompt,
405
+ streamer=streamer,
406
+ stopping_criteria=stopping_criteria,
407
+ **pipeline_kwargs,
408
+ )
409
+ t1 = Thread(target=self.pipeline, kwargs=generation_kwargs)
410
+ t1.start()
411
+
412
+ for char in streamer:
413
+ chunk = GenerationChunk(text=char)
414
+ if run_manager:
415
+ run_manager.on_llm_new_token(chunk.text, chunk=chunk)
416
+
417
+ yield chunk
418
 
419
  from langchain_core.prompts.chat import ChatPromptTemplate, MessagesPlaceholder
420
  system = '''Respond to the human as helpfully and accurately as possible. You have access to the following tools:
 
478
  from langchain_core.runnables import run_in_executor
479
  from transformers import AutoTokenizer, AutoModelForCausalLM
480
  import torch
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
481
 
 
 
 
 
 
 
 
482
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
483
  #from transformers import pipeline,AutoModelForCausalLM as M,AutoTokenizer as T
484
  #m=M.from_pretrained("peterpeter8585/syai4.3")
485
  #t=T.from_pretrained("peterpeter8585/syai4.3")