Spaces:

vectara
/

leaderboard

Running on CPU Upgrade

App Files Files Community

forrestbao commited on May 23

Commit

81cb431

1 Parent(s): 120684a

fix user permission problem

Browse files

Files changed (5) hide show

Dockerfile +6 -10
app/app.py +0 -3
app/app_utils.py +5 -8
app/requirements.txt +0 -1
app/results.json +0 -860

Dockerfile CHANGED Viewed

@@ -6,21 +6,17 @@ COPY ./app/vectara_theme.py /app/vectara_theme.py
 COPY ./app/requirements.txt /app/requirements.txt
 COPY ./app/app.py /app/app.py
 COPY ./app/app_utils.py /app/app_utils.py
-COPY ./app/results.json /app/results.json
 RUN apt-get update && apt-get install -y git-lfs
-RUN mkdir -p /app/results
 RUN pip install --no-cache-dir --upgrade -r /app/requirements.txt
-# RUN useradd -m -u 1000 user
-# USER user
-# ENV HOME=/home/user \
-# 	PATH=/home/user/.local/bin:$PATH
-# WORKDIR $HOME/app
-# COPY --chown=user . $HOME/app
 CMD ["funix", "app.py", "--host", "0.0.0.0", "--port", "7860", "--no-browser"]

 COPY ./app/requirements.txt /app/requirements.txt
 COPY ./app/app.py /app/app.py
 COPY ./app/app_utils.py /app/app_utils.py
+# COPY ./app/results.json /app/results.json
 RUN apt-get update && apt-get install -y git-lfs
 RUN pip install --no-cache-dir --upgrade -r /app/requirements.txt
+RUN useradd -m -u 1000 user
+ENV HOME=/home/user \
+	PATH=/home/user/.local/bin:$PATH
+RUN mkdir -p /app/results
+RUN chown -R user /app/results
 CMD ["funix", "app.py", "--host", "0.0.0.0", "--port", "7860", "--no-browser"]

app/app.py CHANGED Viewed

@@ -5,9 +5,6 @@ import pandas as pd
 import matplotlib.figure
 from IPython.display import Markdown
-import dotenv
-dotenv.load_dotenv() # load HF_TOKEN
 from funix import funix, import_theme
 from vectara_theme import vectara_theme
 import_theme(vectara_theme)

 import matplotlib.figure
 from IPython.display import Markdown
 from funix import funix, import_theme
 from vectara_theme import vectara_theme
 import_theme(vectara_theme)

app/app_utils.py CHANGED Viewed

@@ -7,14 +7,14 @@ import matplotlib.pyplot as plt
 import matplotlib.figure
 from sklearn.preprocessing import MinMaxScaler
-import dotenv
-dotenv.load_dotenv()
 min_max_scaler = MinMaxScaler()
 # %%
 def pull_results(results_dir: str):
-    repo = Repository(local_dir = results_dir, clone_from="vectara/results", repo_type="dataset", token=os.getenv("HF_TOKEN"))
     repo.git_pull()
 def extract_info_from_result_file(result_file):
@@ -108,14 +108,11 @@ def load_results(
     results_df = pd.DataFrame(results)
     results_df = results_df.sort_values(by="Hallucination %", ascending=True)
-    # replace any value TBD with 0
-    results_df = results_df.replace("TBD", -1)
     for column in ["Hallucination %", "Answer %", "Avg Summary Words"]:
         results_df[column] = results_df[column].apply(lambda x: round(x, 3))
-    # replace any value -1 with string "TBD"
-    results_df = results_df.replace(-1, "TBD")
     return results_df

 import matplotlib.figure
 from sklearn.preprocessing import MinMaxScaler
+# import dotenv
+# dotenv.load_dotenv()
 min_max_scaler = MinMaxScaler()
 # %%
 def pull_results(results_dir: str):
+    repo = Repository(local_dir = results_dir, clone_from="vectara/results", repo_type="dataset")
     repo.git_pull()
 def extract_info_from_result_file(result_file):
     results_df = pd.DataFrame(results)
     results_df = results_df.sort_values(by="Hallucination %", ascending=True)
+    # replace any value TBD with -1
+    results_df = results_df.replace("TBD", 100)
     for column in ["Hallucination %", "Answer %", "Avg Summary Words"]:
         results_df[column] = results_df[column].apply(lambda x: round(x, 3))
     return results_df

app/requirements.txt CHANGED Viewed

@@ -1,6 +1,5 @@
 funix==0.6.1
 pandas
-dotenv
 huggingface_hub
 matplotlib
 scikit-learn

 funix==0.6.1
 pandas
 huggingface_hub
 matplotlib
 scikit-learn

app/results.json DELETED Viewed

@@ -1,860 +0,0 @@
-[
-  {
-    "LLM": "gemini-2.0-flash-exp",
-    "Hallucination %": 1.3,
-    "Answer %": 99.9,
-    "Avg Summary Words": 60.0
-  },
-  {
-    "LLM": "deepseek/deepseek-r1",
-    "Hallucination %": 14.3,
-    "Answer %": 100.0,
-    "Avg Summary Words": 77.1
-  },
-  {
-    "LLM": "deepseek/deepseek-v3",
-    "Hallucination %": 3.9,
-    "Answer %": 100.0,
-    "Avg Summary Words": 88.2
-  },
-  {
-    "LLM": "deepseek/deepseek-chat",
-    "Hallucination %": 2.4,
-    "Answer %": 100.0,
-    "Avg Summary Words": 83.2
-  },
-  {
-    "LLM": "deepseek/deepseek-v3-0324",
-    "Hallucination %": 8.0,
-    "Answer %": 100.0,
-    "Avg Summary Words": 78.9
-  },
-  {
-    "LLM": "openai/chatgpt-4o-latest",
-    "Hallucination %": 3.5,
-    "Answer %": 100.0,
-    "Avg Summary Words": 63.5
-  },
-  {
-    "LLM": "openai/GPT-4",
-    "Hallucination %": 1.8050541516245486,
-    "Answer %": 100.0,
-    "Avg Summary Words": 81.1
-  },
-  {
-    "LLM": "openai/o3-mini-high-reasoning",
-    "Hallucination %": 0.7952286282306176,
-    "Answer %": 100.0,
-    "Avg Summary Words": 79.51888667992047
-  },
-  {
-    "LLM": "openai/gpt-4.1-mini",
-    "Hallucination %": 2.2,
-    "Answer %": 100.0,
-    "Avg Summary Words": 79.6
-  },
-  {
-    "LLM": "openai/o1-pro",
-    "Hallucination %": 2.4,
-    "Answer %": 100.0,
-    "Avg Summary Words": 81.0
-  },
-  {
-    "LLM": "openai/gpt-4.1-nano",
-    "Hallucination %": 2.0,
-    "Answer %": 100.0,
-    "Avg Summary Words": 70.2
-  },
-  {
-    "LLM": "openai/o1-mini",
-    "Hallucination %": 1.4,
-    "Answer %": 100.0,
-    "Avg Summary Words": 78.3
-  },
-  {
-    "LLM": "openai/GPT-4-Turbo",
-    "Hallucination %": 1.6898608349900597,
-    "Answer %": 100.0,
-    "Avg Summary Words": 86.2
-  },
-  {
-    "LLM": "openai/o3",
-    "Hallucination %": 6.8,
-    "Answer %": 100.0,
-    "Avg Summary Words": 77.7
-  },
-  {
-    "LLM": "openai/GPT-3.5-Turbo",
-    "Hallucination %": 1.9,
-    "Answer %": 99.6,
-    "Avg Summary Words": 84.1
-  },
-  {
-    "LLM": "openai/o1",
-    "Hallucination %": 2.4,
-    "Answer %": 99.9,
-    "Avg Summary Words": 73.0
-  },
-  {
-    "LLM": "openai/GPT-4o",
-    "Hallucination %": 1.4910536779324055,
-    "Answer %": 100.0,
-    "Avg Summary Words": 77.8
-  },
-  {
-    "LLM": "openai/GPT-4o-mini",
-    "Hallucination %": 1.7,
-    "Answer %": 100.0,
-    "Avg Summary Words": 76.3
-  },
-  {
-    "LLM": "openai/o1-preview",
-    "Hallucination %": 3.3,
-    "Answer %": 100.0,
-    "Avg Summary Words": 119.3
-  },
-  {
-    "LLM": "openai/o4-mini",
-    "Hallucination %": 4.6,
-    "Answer %": 100.0,
-    "Avg Summary Words": 82.0
-  },
-  {
-    "LLM": "openai/gpt-4.5-preview",
-    "Hallucination %": 1.2,
-    "Answer %": 100.0,
-    "Avg Summary Words": 77.0
-  },
-  {
-    "LLM": "openai/gpt-4.1",
-    "Hallucination %": 2.0,
-    "Answer %": 100.0,
-    "Avg Summary Words": 71.9
-  },
-  {
-    "LLM": "Qwen/Qwen2-VL-2B-Instruct",
-    "Hallucination %": 8.3,
-    "Answer %": 100.0,
-    "Avg Summary Words": 81.8
-  },
-  {
-    "LLM": "Qwen/Qwen2.5-14B-Instruct",
-    "Hallucination %": 4.2,
-    "Answer %": 100.0,
-    "Avg Summary Words": 74.8
-  },
-  {
-    "LLM": "Qwen/Qwen3-32B",
-    "Hallucination %": 2.8,
-    "Answer %": 100.0,
-    "Avg Summary Words": 82.4
-  },
-  {
-    "LLM": "Qwen/Qwen2.5-32B-Instruct",
-    "Hallucination %": 3.0,
-    "Answer %": 100.0,
-    "Avg Summary Words": 67.9
-  },
-  {
-    "LLM": "Qwen/QwQ-32B-Preview",
-    "Hallucination %": 12.9,
-    "Answer %": 100.0,
-    "Avg Summary Words": 140.2
-  },
-  {
-    "LLM": "Qwen/Qwen3-0.6B",
-    "Hallucination %": 3.7,
-    "Answer %": 100.0,
-    "Avg Summary Words": 65.3
-  },
-  {
-    "LLM": "Qwen/Qwen3-14B",
-    "Hallucination %": 2.2,
-    "Answer %": 100.0,
-    "Avg Summary Words": 82.4
-  },
-  {
-    "LLM": "Qwen/Qwen2.5-3B-Instruct",
-    "Hallucination %": 7.0,
-    "Answer %": 100.0,
-    "Avg Summary Words": 70.4
-  },
-  {
-    "LLM": "Qwen/Qwen2.5-1.5B-Instruct",
-    "Hallucination %": 15.8,
-    "Answer %": 100.0,
-    "Avg Summary Words": 70.7
-  },
-  {
-    "LLM": "Qwen/Qwen2-VL-7B-Instruct",
-    "Hallucination %": 4.2,
-    "Answer %": 100.0,
-    "Avg Summary Words": 73.9
-  },
-  {
-    "LLM": "Qwen/Qwen2.5-0.5B-Instruct",
-    "Hallucination %": 25.2,
-    "Answer %": 100.0,
-    "Avg Summary Words": 72.6
-  },
-  {
-    "LLM": "Qwen/Qwen3-4B",
-    "Hallucination %": 2.7,
-    "Answer %": 100.0,
-    "Avg Summary Words": 87.7
-  },
-  {
-    "LLM": "Qwen/Qwen2.5-72B-Instruct",
-    "Hallucination %": 4.3,
-    "Answer %": 100.0,
-    "Avg Summary Words": 80.8
-  },
-  {
-    "LLM": "Qwen/Qwen3-8B",
-    "Hallucination %": 3.0,
-    "Answer %": 100.0,
-    "Avg Summary Words": 78.2
-  },
-  {
-    "LLM": "Qwen/Qwen3-1.7B",
-    "Hallucination %": 4.4,
-    "Answer %": 100.0,
-    "Avg Summary Words": 69.0
-  },
-  {
-    "LLM": "Qwen/Qwen2-72B-Instruct",
-    "Hallucination %": 4.7,
-    "Answer %": 100.0,
-    "Avg Summary Words": 100.1
-  },
-  {
-    "LLM": "Qwen/Qwen2.5-7B-Instruct",
-    "Hallucination %": 2.8,
-    "Answer %": 100.0,
-    "Avg Summary Words": 71.0
-  },
-  {
-    "LLM": "allenai/OLMo-2-1124-7B-Instruct",
-    "Hallucination %": 11.1,
-    "Answer %": 100.0,
-    "Avg Summary Words": 112.6
-  },
-  {
-    "LLM": "allenai/OLMo-2-1124-13B-Instruct",
-    "Hallucination %": 10.8,
-    "Answer %": 100.0,
-    "Avg Summary Words": 82.0
-  },
-  {
-    "LLM": "allenai/olmo-2-0325-32b-instruct",
-    "Hallucination %": 4.9,
-    "Answer %": 99.9,
-    "Avg Summary Words": 100.0
-  },
-  {
-    "LLM": "amazon/Titan-Express",
-    "Hallucination %": 13.5,
-    "Answer %": 99.5,
-    "Avg Summary Words": 98.4
-  },
-  {
-    "LLM": "amazon/nova-lite-v1",
-    "Hallucination %": 1.8,
-    "Answer %": 99.9,
-    "Avg Summary Words": 80.7
-  },
-  {
-    "LLM": "amazon/nova-pro-v1",
-    "Hallucination %": 1.8,
-    "Answer %": 100.0,
-    "Avg Summary Words": 85.5
-  },
-  {
-    "LLM": "amazon/nova-micro-v1",
-    "Hallucination %": 1.6,
-    "Answer %": 100.0,
-    "Avg Summary Words": 90.0
-  },
-  {
-    "LLM": "google/gemini-2.5-pro-exp-03-25",
-    "Hallucination %": 1.1,
-    "Answer %": 95.1,
-    "Avg Summary Words": 72.9
-  },
-  {
-    "LLM": "google/PaLM-2",
-    "Hallucination %": 14.1,
-    "Answer %": 99.8,
-    "Avg Summary Words": 86.6
-  },
-  {
-    "LLM": "google/gemma-1.1-2b-it",
-    "Hallucination %": 27.8,
-    "Answer %": 100.0,
-    "Avg Summary Words": 66.8
-  },
-  {
-    "LLM": "google/gemini-2.0-flash-thinking-exp",
-    "Hallucination %": 1.8,
-    "Answer %": 99.3,
-    "Avg Summary Words": 73.2
-  },
-  {
-    "LLM": "google/gemma-3-1b-it",
-    "Hallucination %": 5.3,
-    "Answer %": 99.9,
-    "Avg Summary Words": 57.9
-  },
-  {
-    "LLM": "google/gemma-2-2b-it",
-    "Hallucination %": 7.0,
-    "Answer %": 100.0,
-    "Avg Summary Words": 62.2
-  },
-  {
-    "LLM": "google/flan-t5-large",
-    "Hallucination %": 18.3,
-    "Answer %": 99.3,
-    "Avg Summary Words": 20.9
-  },
-  {
-    "LLM": "google/gemini-2.5-flash-preview-04-17",
-    "Hallucination %": 1.3,
-    "Answer %": 91.2,
-    "Avg Summary Words": 71.1
-  },
-  {
-    "LLM": "google/Gemini-Pro",
-    "Hallucination %": 7.6767676767676765,
-    "Answer %": 98.4,
-    "Avg Summary Words": 89.5
-  },
-  {
-    "LLM": "google/gemini-1.5-pro-001",
-    "Hallucination %": 9.1,
-    "Answer %": 99.8,
-    "Avg Summary Words": 61.6
-  },
-  {
-    "LLM": "google/gemma-2-9b-it",
-    "Hallucination %": 10.139165009940358,
-    "Answer %": 100.0,
-    "Avg Summary Words": 70.2
-  },
-  {
-    "LLM": "google/gemma-1.1-7b-it",
-    "Hallucination %": 17.0,
-    "Answer %": 100.0,
-    "Avg Summary Words": 64.3
-  },
-  {
-    "LLM": "google/gemma-3-4b-it",
-    "Hallucination %": 3.7,
-    "Answer %": 100.0,
-    "Avg Summary Words": 63.7
-  },
-  {
-    "LLM": "google/gemini-2.0-pro-exp-02-05",
-    "Hallucination %": 0.8,
-    "Answer %": 99.7,
-    "Avg Summary Words": 61.5
-  },
-  {
-    "LLM": "google/gemini-1.5-pro-002",
-    "Hallucination %": 6.6,
-    "Answer %": 99.9,
-    "Avg Summary Words": 62.0
-  },
-  {
-    "LLM": "google/gemma-3-12b-it",
-    "Hallucination %": 2.8,
-    "Answer %": 100.0,
-    "Avg Summary Words": 69.6
-  },
-  {
-    "LLM": "google/gemini-2.0-flash-001",
-    "Hallucination %": 0.7,
-    "Answer %": 100.0,
-    "Avg Summary Words": 65.2
-  },
-  {
-    "LLM": "google/gemini-1.5-flash-002",
-    "Hallucination %": 3.4,
-    "Answer %": 99.9,
-    "Avg Summary Words": 59.4
-  },
-  {
-    "LLM": "google/gemma-7b-it",
-    "Hallucination %": 14.81113320079523,
-    "Answer %": 100.0,
-    "Avg Summary Words": 113.0
-  },
-  {
-    "LLM": "google/gemini-2.0-flash-lite-preview-02-05",
-    "Hallucination %": 1.2,
-    "Answer %": 99.5,
-    "Avg Summary Words": 60.9
-  },
-  {
-    "LLM": "google/gemini-1.5-flash-001",
-    "Hallucination %": 6.6,
-    "Answer %": 99.9,
-    "Avg Summary Words": 63.3
-  },
-  {
-    "LLM": "google/gemma-3-27b-it",
-    "Hallucination %": 5.9,
-    "Answer %": 98.5,
-    "Avg Summary Words": 64.3
-  },
-  {
-    "LLM": "snowflake/snowflake-arctic-instruct",
-    "Hallucination %": 3.0,
-    "Answer %": 100.0,
-    "Avg Summary Words": 68.7
-  },
-  {
-    "LLM": "01-ai/Yi-1.5-9B-Chat",
-    "Hallucination %": 4.9,
-    "Answer %": 100.0,
-    "Avg Summary Words": 85.7
-  },
-  {
-    "LLM": "01-ai/Yi-1.5-6B-Chat",
-    "Hallucination %": 7.9,
-    "Answer %": 100.0,
-    "Avg Summary Words": 98.9
-  },
-  {
-    "LLM": "01-ai/Yi-1.5-34B-Chat",
-    "Hallucination %": 3.7,
-    "Answer %": 100.0,
-    "Avg Summary Words": 83.7
-  },
-  {
-    "LLM": "ai21labs/AI21-Jamba-1.5-Mini",
-    "Hallucination %": 2.9,
-    "Answer %": 95.6,
-    "Avg Summary Words": 74.5
-  },
-  {
-    "LLM": "cohere/c4ai-aya-expanse-32b",
-    "Hallucination %": 8.5,
-    "Answer %": 99.9,
-    "Avg Summary Words": 81.9
-  },
-  {
-    "LLM": "cohere/command-r-plus-08-2024",
-    "Hallucination %": 5.4,
-    "Answer %": 100.0,
-    "Avg Summary Words": 68.4
-  },
-  {
-    "LLM": "cohere/c4ai-aya-expanse-8b",
-    "Hallucination %": 12.2,
-    "Answer %": 99.9,
-    "Avg Summary Words": 83.9
-  },
-  {
-    "LLM": "cohere/command-a-03-2025",
-    "Hallucination %": 4.5,
-    "Answer %": 100.0,
-    "Avg Summary Words": 77.3
-  },
-  {
-    "LLM": "cohere/command-r-08-2024",
-    "Hallucination %": 4.9,
-    "Answer %": 100.0,
-    "Avg Summary Words": 68.7
-  },
-  {
-    "LLM": "Intel/neural-chat-7b-v3-3",
-    "Hallucination %": 2.6,
-    "Answer %": 100.0,
-    "Avg Summary Words": 60.7
-  },
-  {
-    "LLM": "mistralai/pixtral-large-latest",
-    "Hallucination %": 6.6,
-    "Answer %": 100.0,
-    "Avg Summary Words": 76.4
-  },
-  {
-    "LLM": "mistralai/Mixtral-8x22B-Instruct-v0.1",
-    "Hallucination %": 4.7,
-    "Answer %": 99.9,
-    "Avg Summary Words": 92.0
-  },
-  {
-    "LLM": "mistralai/mistral-small-latest",
-    "Hallucination %": 8.6,
-    "Answer %": 100.0,
-    "Avg Summary Words": 74.2
-  },
-  {
-    "LLM": "mistralai/mistral-large-latest",
-    "Hallucination %": 5.864811133200803,
-    "Answer %": 100.0,
-    "Avg Summary Words": 79.55367793240556
-  },
-  {
-    "LLM": "mistralai/Mixtral-8x7B-Instruct-v0.1",
-    "Hallucination %": 20.09950248756219,
-    "Answer %": 99.9,
-    "Avg Summary Words": 90.7
-  },
-  {
-    "LLM": "mistralai/Mistral-Nemo-Instruct-2407",
-    "Hallucination %": 11.2,
-    "Answer %": 100.0,
-    "Avg Summary Words": 69.9
-  },
-  {
-    "LLM": "mistralai/Mistral-Large2",
-    "Hallucination %": 4.1,
-    "Answer %": 100.0,
-    "Avg Summary Words": 77.4
-  },
-  {
-    "LLM": "mistralai/Mistral-7B-Instruct-v0.3",
-    "Hallucination %": 9.5,
-    "Answer %": 100.0,
-    "Avg Summary Words": 98.4
-  },
-  {
-    "LLM": "mistralai/ministral-3b-latest",
-    "Hallucination %": 8.3,
-    "Answer %": 100.0,
-    "Avg Summary Words": 73.2
-  },
-  {
-    "LLM": "mistralai/ministral-8b-latest",
-    "Hallucination %": 7.5,
-    "Answer %": 100.0,
-    "Avg Summary Words": 62.7
-  },
-  {
-    "LLM": "mistralai/Mistral-Small-24B-Instruct-2501",
-    "Hallucination %": 3.1,
-    "Answer %": 100.0,
-    "Avg Summary Words": 74.9
-  },
-  {
-    "LLM": "mistralai/mistral-small-3.1-24b-instruct",
-    "Hallucination %": 5.6,
-    "Answer %": 100.0,
-    "Avg Summary Words": 73.1
-  },
-  {
-    "LLM": "anthropic/Claude-3-5-Sonnet",
-    "Hallucination %": 8.6,
-    "Answer %": 100.0,
-    "Avg Summary Words": 103.0
-  },
-  {
-    "LLM": "anthropic/claude-3-7-sonnet-latest",
-    "Hallucination %": 4.4,
-    "Answer %": 100.0,
-    "Avg Summary Words": 97.8
-  },
-  {
-    "LLM": "anthropic/Claude-3-opus",
-    "Hallucination %": 10.092687950566425,
-    "Answer %": 95.5,
-    "Avg Summary Words": 92.1
-  },
-  {
-    "LLM": "anthropic/Claude-2",
-    "Hallucination %": 17.448856799037305,
-    "Answer %": 99.3,
-    "Avg Summary Words": 87.5
-  },
-  {
-    "LLM": "anthropic/claude-3-5-haiku-20241022",
-    "Hallucination %": 4.9,
-    "Answer %": 100.0,
-    "Avg Summary Words": 92.2
-  },
-  {
-    "LLM": "anthropic/Claude-3-sonnet",
-    "Hallucination %": 16.302186878727635,
-    "Answer %": 100.0,
-    "Avg Summary Words": 108.5
-  },
-  {
-    "LLM": "anthropic/claude-3-7-sonnet-latest-think",
-    "Hallucination %": 4.5,
-    "Answer %": 99.8,
-    "Avg Summary Words": 99.9
-  },
-  {
-    "LLM": "ai21/jamba-1.6-mini",
-    "Hallucination %": 4.6,
-    "Answer %": 100.0,
-    "Avg Summary Words": 82.3
-  },
-  {
-    "LLM": "ai21/jamba-1.6-large",
-    "Hallucination %": 2.3,
-    "Answer %": 99.9,
-    "Avg Summary Words": 85.6
-  },
-  {
-    "LLM": "qwen/qwen3-235b-a22b",
-    "Hallucination %": 13.0,
-    "Answer %": 99.2,
-    "Avg Summary Words": 86.6
-  },
-  {
-    "LLM": "qwen/qwen-max",
-    "Hallucination %": 2.9,
-    "Answer %": 88.4,
-    "Avg Summary Words": 90.4
-  },
-  {
-    "LLM": "qwen/qwen3-30b-a3b",
-    "Hallucination %": 7.6,
-    "Answer %": 99.9,
-    "Avg Summary Words": 69.9
-  },
-  {
-    "LLM": "x-ai/grok-2-1212",
-    "Hallucination %": 1.9,
-    "Answer %": 100.0,
-    "Avg Summary Words": 86.5
-  },
-  {
-    "LLM": "x-ai/grok-2-vision-1212",
-    "Hallucination %": 2.9,
-    "Answer %": 100.0,
-    "Avg Summary Words": 79.8
-  },
-  {
-    "LLM": "databricks/dbrx-instruct",
-    "Hallucination %": 8.3,
-    "Answer %": 100.0,
-    "Avg Summary Words": 85.9
-  },
-  {
-    "LLM": "xai/grok-3-mini-latest",
-    "Hallucination %": 3.3,
-    "Answer %": 100.0,
-    "Avg Summary Words": 90.2
-  },
-  {
-    "LLM": "xai/grok-beta",
-    "Hallucination %": 4.6,
-    "Answer %": 100.0,
-    "Avg Summary Words": 91.0
-  },
-  {
-    "LLM": "xai/grok-3-latest",
-    "Hallucination %": 2.1,
-    "Answer %": 100.0,
-    "Avg Summary Words": 97.7
-  },
-  {
-    "LLM": "apple/OpenELM-3B-Instruct",
-    "Hallucination %": 24.776119402985074,
-    "Answer %": 99.3,
-    "Avg Summary Words": 47.2
-  },
-  {
-    "LLM": "meta-llama/Llama-3.2-3B-Instruct-Turbo",
-    "Hallucination %": 7.9,
-    "Answer %": 100.0,
-    "Avg Summary Words": 72.2
-  },
-  {
-    "LLM": "meta-llama/Llama-2-70b-chat-hf",
-    "Hallucination %": 5.896510228640193,
-    "Answer %": 99.9,
-    "Avg Summary Words": 84.9
-  },
-  {
-    "LLM": "meta-llama/Meta-Llama-3.1-405B-Instruct",
-    "Hallucination %": 3.9,
-    "Answer %": 99.6,
-    "Avg Summary Words": 85.7
-  },
-  {
-    "LLM": "meta-llama/Llama-3.3-70B-Instruct",
-    "Hallucination %": 4.0,
-    "Answer %": 100.0,
-    "Avg Summary Words": 85.3
-  },
-  {
-    "LLM": "meta-llama/Meta-Llama-3.1-8B-Instruct",
-    "Hallucination %": 5.4,
-    "Answer %": 100.0,
-    "Avg Summary Words": 71.0
-  },
-  {
-    "LLM": "meta-llama/Meta-Llama-3.1-70B-Instruct",
-    "Hallucination %": 5.0,
-    "Answer %": 100.0,
-    "Avg Summary Words": 79.6
-  },
-  {
-    "LLM": "meta-llama/Llama-3.2-11B-Vision-Instruct-Turbo",
-    "Hallucination %": 8.9,
-    "Answer %": 100.0,
-    "Avg Summary Words": 73.1
-  },
-  {
-    "LLM": "meta-llama/Llama-3.2-1B-Instruct",
-    "Hallucination %": 20.7,
-    "Answer %": 100.0,
-    "Avg Summary Words": 71.5
-  },
-  {
-    "LLM": "meta-llama/Llama-3-70B-chat-hf",
-    "Hallucination %": 4.1,
-    "Answer %": 99.2,
-    "Avg Summary Words": 68.5
-  },
-  {
-    "LLM": "meta-llama/Llama-3-8B-chat-hf",
-    "Hallucination %": 7.370517928286853,
-    "Answer %": 99.8,
-    "Avg Summary Words": 79.7
-  },
-  {
-    "LLM": "meta-llama/Llama-3.2-90B-Vision-Instruct-Turbo",
-    "Hallucination %": 4.3,
-    "Answer %": 100.0,
-    "Avg Summary Words": 79.8
-  },
-  {
-    "LLM": "meta-llama/llama-4-scout",
-    "Hallucination %": 4.7,
-    "Answer %": 100.0,
-    "Avg Summary Words": 80.7
-  },
-  {
-    "LLM": "meta-llama/Llama-2-7b-chat-hf",
-    "Hallucination %": 11.3,
-    "Answer %": 99.6,
-    "Avg Summary Words": 119.9
-  },
-  {
-    "LLM": "meta-llama/Llama-2-13b-chat-hf",
-    "Hallucination %": 10.5,
-    "Answer %": 99.8,
-    "Avg Summary Words": 82.1
-  },
-  {
-    "LLM": "meta-llama/llama-4-maverick",
-    "Hallucination %": 4.6,
-    "Answer %": 100.0,
-    "Avg Summary Words": 84.8
-  },
-  {
-    "LLM": "microsoft/Orca-2-13b",
-    "Hallucination %": 2.5,
-    "Answer %": 100.0,
-    "Avg Summary Words": 66.2
-  },
-  {
-    "LLM": "microsoft/Phi-3.5-MoE-instruct",
-    "Hallucination %": 2.5,
-    "Answer %": 96.3,
-    "Avg Summary Words": 69.7
-  },
-  {
-    "LLM": "microsoft/Phi-3-mini-4k-instruct",
-    "Hallucination %": 3.9761431411530817,
-    "Answer %": 100.0,
-    "Avg Summary Words": 86.8
-  },
-  {
-    "LLM": "microsoft/phi-4",
-    "Hallucination %": 4.7,
-    "Answer %": 100.0,
-    "Avg Summary Words": 100.3
-  },
-  {
-    "LLM": "microsoft/Phi-3.5-mini-instruct",
-    "Hallucination %": 4.1,
-    "Answer %": 100.0,
-    "Avg Summary Words": 75.0
-  },
-  {
-    "LLM": "microsoft/Phi-3-mini-128k-instruct",
-    "Hallucination %": 3.1,
-    "Answer %": 100.0,
-    "Avg Summary Words": 60.1
-  },
-  {
-    "LLM": "microsoft/Phi-4-mini-instruct",
-    "Hallucination %": 3.4,
-    "Answer %": 100.0,
-    "Avg Summary Words": 69.7
-  },
-  {
-    "LLM": "microsoft/WizardLM-2-8x22B",
-    "Hallucination %": 11.741293532338307,
-    "Answer %": 99.9,
-    "Avg Summary Words": 140.8
-  },
-  {
-    "LLM": "microsoft/phi-2",
-    "Hallucination %": 6.666666666666667,
-    "Answer %": 91.5,
-    "Avg Summary Words": 80.8
-  },
-  {
-    "LLM": "THUDM/glm-4-9b-chat",
-    "Hallucination %": 1.3,
-    "Answer %": 100.0,
-    "Avg Summary Words": 58.1
-  },
-  {
-    "LLM": "internlm/internlm3-8b-instruct",
-    "Hallucination %": 4.0,
-    "Answer %": 100.0,
-    "Avg Summary Words": 97.5
-  },
-  {
-    "LLM": "ibm-granite/granite-3.1-8b-instruct",
-    "Hallucination %": 8.6,
-    "Answer %": 100.0,
-    "Avg Summary Words": 107.4
-  },
-  {
-    "LLM": "ibm-granite/granite-3.2-2b-instruct",
-    "Hallucination %": 16.5,
-    "Answer %": 100.0,
-    "Avg Summary Words": 117.3
-  },
-  {
-    "LLM": "ibm-granite/granite-3.1-2b-instruct",
-    "Hallucination %": 15.7,
-    "Answer %": 100.0,
-    "Avg Summary Words": 107.7
-  },
-  {
-    "LLM": "ibm-granite/granite-3.0-2b-instruct",
-    "Hallucination %": 8.8,
-    "Answer %": 100.0,
-    "Avg Summary Words": 81.6
-  },
-  {
-    "LLM": "ibm-granite/granite-3.0-8b-instruct",
-    "Hallucination %": 6.5,
-    "Answer %": 100.0,
-    "Avg Summary Words": 74.2
-  },
-  {
-    "LLM": "ibm-granite/granite-3.2-8b-instruct",
-    "Hallucination %": 8.7,
-    "Answer %": 100.0,
-    "Avg Summary Words": 120.1
-  },
-  {
-    "LLM": "tiiuae/falcon-7b-instruct",
-    "Hallucination %": 29.92047713717694,
-    "Answer %": 90.0,
-    "Avg Summary Words": 75.5
-  }
-]