Sync from GitHub
Browse files
app.py
CHANGED
@@ -93,11 +93,21 @@ with gr.Blocks(theme=gr.themes.Soft()) as demo:
|
|
93 |
with gr.Column(scale=1):
|
94 |
submit_btn = gr.Button("Estimate Memory ☁", variant="primary", scale=1)
|
95 |
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
96 |
with gr.Accordion("Generated LLM Prompt (for debugging)", open=False):
|
97 |
prompt_output = gr.Textbox(label="Prompt", show_copy_button=True, lines=10, interactive=False)
|
98 |
|
99 |
gr.Markdown("---")
|
100 |
-
gr.Markdown("###
|
101 |
|
102 |
output_markdown = gr.Markdown(label="LLM Output", value="*Your results will appear here...*")
|
103 |
|
|
|
93 |
with gr.Column(scale=1):
|
94 |
submit_btn = gr.Button("Estimate Memory ☁", variant="primary", scale=1)
|
95 |
|
96 |
+
with gr.Accordion("💡 Tips", open=False):
|
97 |
+
gr.Markdown(
|
98 |
+
"""
|
99 |
+
- Try changing to the model from Flash to Pro if the results are bad.
|
100 |
+
- Try to be as specific as possible about your local machine.
|
101 |
+
- As a rule of thumb, GPUs from RTX 4090 and later, are generally good for using `torch.compile()`.
|
102 |
+
- To leverage FP8, the GPU needs to have a compute capability of at least 8.9.
|
103 |
+
"""
|
104 |
+
)
|
105 |
+
|
106 |
with gr.Accordion("Generated LLM Prompt (for debugging)", open=False):
|
107 |
prompt_output = gr.Textbox(label="Prompt", show_copy_button=True, lines=10, interactive=False)
|
108 |
|
109 |
gr.Markdown("---")
|
110 |
+
gr.Markdown("### Generated Code")
|
111 |
|
112 |
output_markdown = gr.Markdown(label="LLM Output", value="*Your results will appear here...*")
|
113 |
|