abdull4h commited on
Commit
d1f82c7
·
verified ·
1 Parent(s): fde320d

Update app.py

Browse files
Files changed (1) hide show
  1. app.py +5 -0
app.py CHANGED
@@ -6,6 +6,7 @@ import numpy as np
6
  import pandas as pd
7
  from tqdm import tqdm
8
  from pathlib import Path
 
9
 
10
  # PDF processing
11
  import PyPDF2
@@ -199,6 +200,7 @@ def create_vector_store(documents):
199
  return vector_store
200
 
201
  # Model Loading and RAG System
 
202
  def load_model_and_tokenizer():
203
  """Load the ALLaM-7B model and tokenizer with error handling"""
204
  model_name = "ALLaM-AI/ALLaM-7B-Instruct-preview"
@@ -257,6 +259,7 @@ def retrieve_context(query, vector_store, top_k=5):
257
 
258
  return contexts
259
 
 
260
  def generate_response(query, contexts, model, tokenizer, language="auto"):
261
  """Generate a response using retrieved contexts with ALLaM-specific formatting"""
262
  # Auto-detect language if not specified
@@ -476,11 +479,13 @@ def evaluate_response(query, response, reference):
476
 
477
  return evaluation_results
478
 
 
479
  def run_conversation(assistant, query):
480
  """Run a query through the assistant and return the response"""
481
  response, sources, contexts = assistant.answer(query)
482
  return response, sources, contexts
483
 
 
484
  def run_evaluation_on_sample(assistant, sample_index=0):
485
  """Run evaluation on a selected sample from the evaluation dataset"""
486
  if sample_index < 0 or sample_index >= len(comprehensive_evaluation_data):
 
6
  import pandas as pd
7
  from tqdm import tqdm
8
  from pathlib import Path
9
+ import spaces # Import spaces for GPU allocation
10
 
11
  # PDF processing
12
  import PyPDF2
 
200
  return vector_store
201
 
202
  # Model Loading and RAG System
203
+ @spaces.GPU # Use GPU for model loading
204
  def load_model_and_tokenizer():
205
  """Load the ALLaM-7B model and tokenizer with error handling"""
206
  model_name = "ALLaM-AI/ALLaM-7B-Instruct-preview"
 
259
 
260
  return contexts
261
 
262
+ @spaces.GPU # Use GPU for text generation
263
  def generate_response(query, contexts, model, tokenizer, language="auto"):
264
  """Generate a response using retrieved contexts with ALLaM-specific formatting"""
265
  # Auto-detect language if not specified
 
479
 
480
  return evaluation_results
481
 
482
+ @spaces.GPU # Use GPU for conversation handling
483
  def run_conversation(assistant, query):
484
  """Run a query through the assistant and return the response"""
485
  response, sources, contexts = assistant.answer(query)
486
  return response, sources, contexts
487
 
488
+ @spaces.GPU # Use GPU for evaluation
489
  def run_evaluation_on_sample(assistant, sample_index=0):
490
  """Run evaluation on a selected sample from the evaluation dataset"""
491
  if sample_index < 0 or sample_index >= len(comprehensive_evaluation_data):