SLM-RAG-Arena

Running on Zero

App Files Files Community

oliver-aizip commited on May 5

Commit

8151596

1 Parent(s): 2d7d23d

switched models to using full context

Browse files

Files changed (1) hide show

utils/models.py +4 -12

utils/models.py CHANGED Viewed

@@ -30,18 +30,13 @@ def generate_summaries(example, model_a_name, model_b_name):
     # Create a plain text version of the contexts for the models
     context_text = ""
     context_parts = []
-    if "contexts" in example and example["contexts"]:
-        for ctx in example["contexts"]:
             if isinstance(ctx, dict) and "content" in ctx:
                 context_parts.append(ctx["content"])
         context_text = "\n---\n".join(context_parts)
     else:
-        # Fallback to full contexts if highlighted contexts are not available
-        if "full_contexts" in example:
-            for ctx in example["full_contexts"]:
-                if isinstance(ctx, dict) and "content" in ctx:
-                    context_parts.append(ctx["content"])
-            context_text = "\n---\n".join(context_parts)
     # Pass 'Answerable' status to models (they might use it)
     answerable = example.get("Answerable", True)
@@ -85,17 +80,14 @@ def run_inference(model_name, context, question):
     ).to(device)
     input_length = actual_input.shape[1]
-    # Create attention mask (1 for all tokens since we're not padding)
     attention_mask = torch.ones_like(actual_input).to(device)
     # Generate output
     with torch.inference_mode():
-        # Disable gradient calculation for inference
         outputs = model.generate(
             actual_input,
             attention_mask=attention_mask,
-            max_new_tokens=512,  # Use max_new_tokens instead of max_length
             pad_token_id=tokenizer.pad_token_id,
         )

     # Create a plain text version of the contexts for the models
     context_text = ""
     context_parts = []
+    if "full_contexts" in example:
+        for ctx in example["full_contexts"]:
             if isinstance(ctx, dict) and "content" in ctx:
                 context_parts.append(ctx["content"])
         context_text = "\n---\n".join(context_parts)
     else:
+        raise ValueError("No context found in the example.")
     # Pass 'Answerable' status to models (they might use it)
     answerable = example.get("Answerable", True)
     ).to(device)
     input_length = actual_input.shape[1]
     attention_mask = torch.ones_like(actual_input).to(device)
     # Generate output
     with torch.inference_mode():
         outputs = model.generate(
             actual_input,
             attention_mask=attention_mask,
+            max_new_tokens=512,
             pad_token_id=tokenizer.pad_token_id,
         )