Utiric commited on
Commit
710ab17
·
verified ·
1 Parent(s): 7b02d7b

Update app.py

Browse files
Files changed (1) hide show
  1. app.py +20 -17
app.py CHANGED
@@ -1,30 +1,30 @@
1
  import gradio as gr
2
- import spaces
3
  from transformers import AutoModelForCausalLM, AutoProcessor
4
  import torch
5
  from PIL import Image
6
- import subprocess
7
- subprocess.run('pip install flash-attn --no-build-isolation', env={'FLASH_ATTENTION_SKIP_CUDA_BUILD': "TRUE"}, shell=True)
8
 
 
9
  models = {
10
- "microsoft/Phi-3.5-vision-instruct": AutoModelForCausalLM.from_pretrained("microsoft/Phi-3.5-vision-instruct", trust_remote_code=True, torch_dtype="auto", _attn_implementation="flash_attention_2").cuda().eval()
11
-
 
12
  }
13
 
14
  processors = {
15
- "microsoft/Phi-3.5-vision-instruct": AutoProcessor.from_pretrained("microsoft/Phi-3.5-vision-instruct", trust_remote_code=True)
 
 
16
  }
17
 
18
  DESCRIPTION = "[Phi-3.5-vision Demo](https://huggingface.co/microsoft/Phi-3.5-vision-instruct)"
19
 
20
  kwargs = {}
21
- kwargs['torch_dtype'] = torch.bfloat16
22
 
23
  user_prompt = '<|user|>\n'
24
  assistant_prompt = '<|assistant|>\n'
25
  prompt_suffix = "<|end|>\n"
26
 
27
- @spaces.GPU
28
  def run_example(image, text_input=None, model_id="microsoft/Phi-3.5-vision-instruct"):
29
  model = models[model_id]
30
  processor = processors[model_id]
@@ -32,15 +32,18 @@ def run_example(image, text_input=None, model_id="microsoft/Phi-3.5-vision-instr
32
  prompt = f"{user_prompt}<|image_1|>\n{text_input}{prompt_suffix}{assistant_prompt}"
33
  image = Image.fromarray(image).convert("RGB")
34
 
35
- inputs = processor(prompt, image, return_tensors="pt").to("cuda:0")
36
- generate_ids = model.generate(**inputs,
37
- max_new_tokens=1000,
38
- eos_token_id=processor.tokenizer.eos_token_id,
39
- )
 
40
  generate_ids = generate_ids[:, inputs['input_ids'].shape[1]:]
41
- response = processor.batch_decode(generate_ids,
42
- skip_special_tokens=True,
43
- clean_up_tokenization_spaces=False)[0]
 
 
44
  return response
45
 
46
  css = """
@@ -66,4 +69,4 @@ with gr.Blocks(css=css) as demo:
66
  submit_btn.click(run_example, [input_img, text_input, model_selector], [output_text])
67
 
68
  demo.queue(api_open=False)
69
- demo.launch(debug=True, show_api=False)
 
1
  import gradio as gr
 
2
  from transformers import AutoModelForCausalLM, AutoProcessor
3
  import torch
4
  from PIL import Image
 
 
5
 
6
+ # Model ve işlemci yükleme
7
  models = {
8
+ "microsoft/Phi-3.5-vision-instruct": AutoModelForCausalLM.from_pretrained(
9
+ "microsoft/Phi-3.5-vision-instruct", trust_remote_code=True, torch_dtype="auto"
10
+ ).eval()
11
  }
12
 
13
  processors = {
14
+ "microsoft/Phi-3.5-vision-instruct": AutoProcessor.from_pretrained(
15
+ "microsoft/Phi-3.5-vision-instruct", trust_remote_code=True
16
+ )
17
  }
18
 
19
  DESCRIPTION = "[Phi-3.5-vision Demo](https://huggingface.co/microsoft/Phi-3.5-vision-instruct)"
20
 
21
  kwargs = {}
22
+ kwargs['torch_dtype'] = torch.float32 # CPU üzerinde çalıştığı için bfloat16 yerine float32 kullanılıyor
23
 
24
  user_prompt = '<|user|>\n'
25
  assistant_prompt = '<|assistant|>\n'
26
  prompt_suffix = "<|end|>\n"
27
 
 
28
  def run_example(image, text_input=None, model_id="microsoft/Phi-3.5-vision-instruct"):
29
  model = models[model_id]
30
  processor = processors[model_id]
 
32
  prompt = f"{user_prompt}<|image_1|>\n{text_input}{prompt_suffix}{assistant_prompt}"
33
  image = Image.fromarray(image).convert("RGB")
34
 
35
+ inputs = processor(prompt, image, return_tensors="pt") # Cihaz belirtilmedi, varsayılan olarak CPU kullanılır
36
+ generate_ids = model.generate(
37
+ **inputs,
38
+ max_new_tokens=1000,
39
+ eos_token_id=processor.tokenizer.eos_token_id,
40
+ )
41
  generate_ids = generate_ids[:, inputs['input_ids'].shape[1]:]
42
+ response = processor.batch_decode(
43
+ generate_ids,
44
+ skip_special_tokens=True,
45
+ clean_up_tokenization_spaces=False
46
+ )[0]
47
  return response
48
 
49
  css = """
 
69
  submit_btn.click(run_example, [input_img, text_input, model_selector], [output_text])
70
 
71
  demo.queue(api_open=False)
72
+ demo.launch(debug=True, show_api=False)