Spaces:
Runtime error
Runtime error
Update app.py
Browse files
app.py
CHANGED
|
@@ -29,7 +29,7 @@ from utils import (
|
|
| 29 |
)
|
| 30 |
|
| 31 |
# Initialize the model and tokenizer.
|
| 32 |
-
api_token = os.getenv("
|
| 33 |
model_name = "meta-llama/Llama-3.1-8B-Instruct"
|
| 34 |
tokenizer = AutoTokenizer.from_pretrained(model_name, token=api_token)
|
| 35 |
model = AutoModelForCausalLM.from_pretrained(model_name, token=api_token, torch_dtype=torch.float16)
|
|
@@ -591,7 +591,7 @@ def update_token_breakdown(token_count, retrieval_slider, global_local_value):
|
|
| 591 |
|
| 592 |
rag_tokens = int(retrieval_context_length * (1.0 - (percentage / 100)))
|
| 593 |
kv_tokens = retrieval_context_length - rag_tokens
|
| 594 |
-
return f"Token Breakdown: {kv_tokens} tokens (KV compression), {rag_tokens} tokens (RAG retrieval)"
|
| 595 |
|
| 596 |
##########################################################################
|
| 597 |
# Gradio Interface
|
|
@@ -797,12 +797,12 @@ with gr.Blocks(css=CSS, theme=gr.themes.Soft(font=["Arial", gr.themes.GoogleFont
|
|
| 797 |
retrieval_slider.change(
|
| 798 |
fn=update_token_breakdown,
|
| 799 |
inputs=[hidden_token_count, retrieval_slider, global_local_slider],
|
| 800 |
-
outputs=tokens_breakdown_text
|
| 801 |
)
|
| 802 |
global_local_slider.change(
|
| 803 |
fn=update_token_breakdown,
|
| 804 |
inputs=[hidden_token_count, retrieval_slider, global_local_slider],
|
| 805 |
-
outputs=tokens_breakdown_text
|
| 806 |
)
|
| 807 |
|
| 808 |
# Compress button: Prepare compression and then update chat status.
|
|
|
|
| 29 |
)
|
| 30 |
|
| 31 |
# Initialize the model and tokenizer.
|
| 32 |
+
api_token = os.getenv("HUGGING_FACE_HUB_TOKEN")
|
| 33 |
model_name = "meta-llama/Llama-3.1-8B-Instruct"
|
| 34 |
tokenizer = AutoTokenizer.from_pretrained(model_name, token=api_token)
|
| 35 |
model = AutoModelForCausalLM.from_pretrained(model_name, token=api_token, torch_dtype=torch.float16)
|
|
|
|
| 591 |
|
| 592 |
rag_tokens = int(retrieval_context_length * (1.0 - (percentage / 100)))
|
| 593 |
kv_tokens = retrieval_context_length - rag_tokens
|
| 594 |
+
return f"Token Breakdown: {kv_tokens} tokens (KV compression), {rag_tokens} tokens (RAG retrieval)", f"Number of tokens after compression: {retrieval_context_length}"
|
| 595 |
|
| 596 |
##########################################################################
|
| 597 |
# Gradio Interface
|
|
|
|
| 797 |
retrieval_slider.change(
|
| 798 |
fn=update_token_breakdown,
|
| 799 |
inputs=[hidden_token_count, retrieval_slider, global_local_slider],
|
| 800 |
+
outputs=[tokens_breakdown_text, retrieval_info_text]
|
| 801 |
)
|
| 802 |
global_local_slider.change(
|
| 803 |
fn=update_token_breakdown,
|
| 804 |
inputs=[hidden_token_count, retrieval_slider, global_local_slider],
|
| 805 |
+
outputs=[tokens_breakdown_text, retrieval_info_text]
|
| 806 |
)
|
| 807 |
|
| 808 |
# Compress button: Prepare compression and then update chat status.
|