Kohaku-Blueleaf
commited on
Commit
·
fdedf25
1
Parent(s):
c38e0e5
update inst
Browse files
app.py
CHANGED
@@ -306,10 +306,40 @@ def generate(
|
|
306 |
|
307 |
# --- Gradio UI Definition ---
|
308 |
with gr.Blocks(title="HDM Demo", theme=gr.themes.Soft()) as demo:
|
309 |
-
gr.Markdown("# HDM Demo")
|
310 |
gr.Markdown(
|
311 |
"### Enter a natural language prompt and/or specific tags to generate an image."
|
312 |
)
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
313 |
|
314 |
with gr.Row():
|
315 |
with gr.Column(scale=2):
|
|
|
306 |
|
307 |
# --- Gradio UI Definition ---
|
308 |
with gr.Blocks(title="HDM Demo", theme=gr.themes.Soft()) as demo:
|
309 |
+
gr.Markdown("# HDM Early Demo")
|
310 |
gr.Markdown(
|
311 |
"### Enter a natural language prompt and/or specific tags to generate an image."
|
312 |
)
|
313 |
+
with gr.Accordion("Introduction", open=False):
|
314 |
+
gr.Markdown("""
|
315 |
+
# HDM: HomeDiffusion Model Project
|
316 |
+
HDM is a project to implement a series of generative model that can be pretrained at home.
|
317 |
+
|
318 |
+
## About this Demo
|
319 |
+
This DEMO used a checkpoint during training to demostrate the functionality of HDM.
|
320 |
+
Not final model yet.
|
321 |
+
|
322 |
+
## Usage
|
323 |
+
This early model used a model trained on anime image set only,
|
324 |
+
so you should expect to see anime style images only in this demo.
|
325 |
+
|
326 |
+
For prompting, enter danbooru tag prompt to the box "Tag Prompt" with comma seperated and remove the underscore.
|
327 |
+
enter natural language prompt to the box "Natural Language Prompt" and enter negative prompt to the box "Negative Prompt".
|
328 |
+
|
329 |
+
If you don't want to spent so much effort on prompting, try to keep "Enable TIPO" selected.
|
330 |
+
|
331 |
+
If you don't want to apply any pre-defined format, unselect "Enable TIPO" and "Enable Format".
|
332 |
+
|
333 |
+
## Model Spec
|
334 |
+
- Backbone: 342M custom DiT(UViT modified) arch
|
335 |
+
- Text Encoder: Qwen3 0.6B (596M)
|
336 |
+
- VAE: EQ-SDXL-VAE, an EQ-VAE finetuned sdxl vae.
|
337 |
+
|
338 |
+
## Pretraining Dataset
|
339 |
+
- Danbooru 2023 (latest id around 8M)
|
340 |
+
- Pixiv famous artist set
|
341 |
+
- some pvc figure photos
|
342 |
+
""")
|
343 |
|
344 |
with gr.Row():
|
345 |
with gr.Column(scale=2):
|