File size: 7,570 Bytes
34ffddc
aa6782f
 
f77d097
eb04357
 
aa6782f
 
 
 
 
34ffddc
f77d097
bca73d7
 
 
 
 
 
 
 
 
 
 
 
 
 
aa6782f
81fc7b1
aa6782f
eb04357
aa6782f
 
 
 
 
eb04357
aa6782f
 
6b13d30
eb04357
6b13d30
 
 
 
 
 
 
 
eb04357
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
5994dcd
 
eb04357
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
aa6782f
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1e00c03
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
aa6782f
 
 
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
161
162
163
164
165
166
167
168
169
170
171
172
173
174
175
176
177
178
179
180
181
182
import os
import torch
from threading import Thread
import gradio as gr
from transformers import Qwen2_5_VLForConditionalGeneration, AutoProcessor
from qwen_vl_utils import process_vision_info

# 3D mesh dependencies
import trimesh
from trimesh.exchange.gltf import export_glb
import numpy as np
import tempfile


DESCRIPTION = '''
<div>
<h1 style="text-align: center;">LLaMA-Mesh</h1>
<div>
<a style="display:inline-block" href="https://research.nvidia.com/labs/toronto-ai/LLaMA-Mesh/"><img src='https://img.shields.io/badge/public_website-8A2BE2'></a>
<a style="display:inline-block; margin-left: .5em" href="https://github.com/nv-tlabs/LLaMA-Mesh"><img src='https://img.shields.io/github/stars/nv-tlabs/LLaMA-Mesh?style=social'/></a>
</div>
<p>LLaMA-Mesh: Unifying 3D Mesh Generation with Language Models.<a style="display:inline-block" href="https://research.nvidia.com/labs/toronto-ai/LLaMA-Mesh/">[Project Page]</a> <a style="display:inline-block" href="https://github.com/nv-tlabs/LLaMA-Mesh">[Code]</a></p>
<p> Notice: (1) This demo supports up to 4096 tokens due to computational limits, while our full model supports 8k tokens. This limitation may result in incomplete generated meshes. To experience the full 8k token context, please run our model locally.</p>
<p>(2) We only support generating a single mesh per dialog round. To generate another mesh, click the "clear" button and start a new dialog.</p>
<p>(3) If the LLM refuses to generate a 3D mesh, try adding more explicit instructions to the prompt, such as "create a 3D model of a table <strong>in OBJ format</strong>." A more effective approach is to request the mesh generation at the start of the dialog.</p>
</div>
'''
# --------- Configuration & Model Loading ---------
MODEL_DIR = "Qwen/Qwen2.5-VL-3B-Instruct"
# Load processor, tokenizer, model for Qwen2.5-VL
model = Qwen2_5_VLForConditionalGeneration.from_pretrained(
    MODEL_DIR,
    torch_dtype=torch.float16,
    device_map="auto",
    trust_remote_code=True
)
processor = AutoProcessor.from_pretrained(MODEL_DIR)

# --------- Chat Inference Function ---------
def chat_qwen_vl(message: str, history: list, temperature: float = 0.1, max_new_tokens: int = 1024):
    # —— 原有多模态输入构造 —— #
    messages = [
            {
                "role": "user",
                "content": [
                    {"type": "text", "text": message},
                ],
            }
        ]
    text = processor.apply_chat_template(
        messages, tokenize=False, add_generation_prompt=True
    )
    image_inputs, video_inputs = process_vision_info(messages)
    inputs = processor(
        text=[text],
        images=image_inputs,
        videos=video_inputs,
        padding=True,
        return_tensors="pt"
    ).to(model.device)

    # —— 流式生成部分 —— #
    # 1. 构造 streamer,用 processor.tokenizer(AutoProcessor 内部自带 tokenizer)
    streamer = TextIteratorStreamer(
        processor.tokenizer,
        timeout=10.0,
        skip_prompt=True,
        skip_special_tokens=True
    )

    # 2. 把 streamer 和生成参数一起传给 model.generate
    gen_kwargs = dict(
        **inputs,           # 包含 input_ids, pixel_values, attention_mask 等
        streamer=streamer,  # 关键:挂载 streamer
        top_k=1024,
        max_new_tokens=max_new_tokens,
        temperature=temperature,
        top_p=0.1,
        eos_token_id=terminators,  # 你的结束符 ID 列表
    )
    # 如果需要零温度贪心,则关闭采样
    if gen_kwargs["temperature"] == 0:
        gen_kwargs["do_sample"] = False

    # 3. 在后台线程中启动生成
    Thread(target=model.generate, kwargs=gen_kwargs).start()

    # 4. 在主线程中实时读取并 yield
    buffer = []
    for chunk in streamer:
        buffer.append(chunk)
        # 每次拿到新片段就拼接并输出
        yield "".join(buffer)
        
# --------- 3D Mesh Coloring Function ---------
def apply_gradient_color(mesh_text: str) -> str:
    """
    Apply a Y-axis-based gradient RGBA color to OBJ mesh text and export as GLB.
    """
    # Write OBJ to temp file
    tmp = tempfile.NamedTemporaryFile(suffix=".obj", delete=False)
    tmp.write(mesh_text.encode('utf-8'))
    tmp.flush()
    tmp.close()

    mesh = trimesh.load_mesh(tmp.name, file_type='obj')
    vertices = mesh.vertices
    ys = vertices[:, 1]
    y_norm = (ys - ys.min()) / (ys.max() - ys.min())

    colors = np.zeros((len(vertices), 4))
    colors[:, 0] = y_norm
    colors[:, 2] = 1 - y_norm
    colors[:, 3] = 1.0
    mesh.visual.vertex_colors = colors

    glb_path = tmp.name.replace('.obj', '.glb')
    with open(glb_path, 'wb') as f:
        f.write(export_glb(mesh))
    return glb_path

# --------- Gradio Interface ---------
css = """
h1 { text-align: center; }
"""
PLACEHOLDER = (
    "<div style='padding:30px;text-align:center;display:flex;flex-direction:column;align-items:center;'>"
    "<h1 style='font-size:28px;opacity:0.55;'>Qwen2.5-VL Local Chat</h1>"
    "<p style='font-size:18px;opacity:0.65;'>Ask anything or generate images!</p></div>"
)

chatbot=gr.Chatbot(height=450, placeholder=PLACEHOLDER, label='Gradio ChatInterface')

with gr.Blocks(fill_height=True, css=css) as demo:
    with gr.Column(): 
        gr.Markdown(DESCRIPTION)
        # gr.DuplicateButton(value="Duplicate Space for private use", elem_id="duplicate-button")
        with gr.Row():
            with gr.Column(scale=3):    
                gr.ChatInterface(
                    fn=chat_qwen_vl,
                    chatbot=chatbot,
                    fill_height=True,
                    additional_inputs_accordion=gr.Accordion(label="⚙️ Parameters", open=False, render=False),
                    additional_inputs=[
                        gr.Slider(minimum=0,
                                maximum=1, 
                                step=0.1,
                                value=0.9, 
                                label="Temperature", 
                                interactive = False,
                                render=False),
                        gr.Slider(minimum=128, 
                                maximum=4096,
                                step=1,
                                value=4096, 
                                label="Max new tokens", 
                                interactive = False,
                                render=False),
                        ],
                    examples=[
                        ['Create a 3D model of a wooden hammer'],
                        ['Create a 3D model of a pyramid in obj format'],
                        ['Create a 3D model of a cabinet.'],
                        ['Create a low poly 3D model of a coffe cup'],
                        ['Create a 3D model of a table.'],
                        ["Create a low poly 3D model of a tree."],
                        ['Write a python code for sorting.'],
                        ['How to setup a human base on Mars? Give short answer.'],
                        ['Explain theory of relativity to me like I’m 8 years old.'],
                        ['What is 9,000 * 9,000?'],
                        ['Create a 3D model of a soda can.'],
                        ['Create a 3D model of a sword.'],
                        ['Create a 3D model of a wooden barrel'],
                        ['Create a 3D model of a chair.']
                        ],
                    cache_examples=False,
                                )

if __name__ == "__main__":
    demo.launch()