Spaces:
Runtime error
Runtime error
File size: 5,841 Bytes
15bbe10 c9e00de 15bbe10 c9e00de 15bbe10 c9e00de 15bbe10 c9e00de 15bbe10 c9e00de 15bbe10 c9e00de 15bbe10 c9e00de 15bbe10 c9e00de 15bbe10 c9e00de 15bbe10 c9e00de 15bbe10 c9e00de 15bbe10 c9e00de 15bbe10 c9e00de 15bbe10 c9e00de 15bbe10 c9e00de 15bbe10 c9e00de 15bbe10 c9e00de 15bbe10 c9e00de 15bbe10 c9e00de 15bbe10 c9e00de 15bbe10 c9e00de 15bbe10 c9e00de 15bbe10 c9e00de 15bbe10 c9e00de 15bbe10 c9e00de 15bbe10 c9e00de 15bbe10 c9e00de 15bbe10 c9e00de 15bbe10 c9e00de 15bbe10 c9e00de 15bbe10 c9e00de 15bbe10 c9e00de 15bbe10 c9e00de |
1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20 21 22 23 24 25 26 27 28 29 30 31 32 33 34 35 36 37 38 39 40 41 42 43 44 45 46 47 48 49 50 51 52 53 54 55 56 57 58 59 60 61 62 63 64 65 66 67 68 69 70 71 72 73 74 75 76 77 78 79 80 81 82 83 84 85 86 87 88 89 90 91 92 93 94 95 96 97 98 99 100 101 102 103 104 105 106 107 108 109 110 111 112 113 114 115 116 117 118 119 120 121 122 123 124 125 126 127 128 |
import numpy as np
from models import chat_with_model, embed
from prompts import create_gen_prompt, create_judge_prompt
import time
from concurrent.futures import ThreadPoolExecutor, as_completed
import threading
import streamlit as st # Import Streamlit
def process_question(question, model_name, open_router_key, openai_api_key):
start_time = time.time()
st.write(f"<span style='color:red'>{question}</span>", unsafe_allow_html=True) # Display question in red
previous_answers = []
question_novelty = 0
try:
while True:
gen_prompt = create_gen_prompt(question, previous_answers)
try:
new_answer = chat_with_model(prompt=gen_prompt, model=model_name, open_router_key=open_router_key, openai_api_key=openai_api_key)
except Exception as e:
st.write(f"<span style='color:red'>Error generating answer: {str(e)}</span>", unsafe_allow_html=True) # Display error in red
break
judge_prompt = create_judge_prompt(question, new_answer)
judge = "openai/gpt-4o-mini"
try:
judge_response = chat_with_model(prompt=judge_prompt, model=judge, open_router_key=open_router_key, openai_api_key=openai_api_key)
except Exception as e:
st.write(f"<span style='color:red'>Error getting judge response: {str(e)}</span>", unsafe_allow_html=True) # Display error in red
break
coherence_score = int(judge_response.split("<coherence_score>")[1].split("</coherence_score>")[0])
if coherence_score <= 3:
st.write("<span style='color:yellow'>Output is incoherent. Moving to next question.</span>", unsafe_allow_html=True) # Display warning in yellow
break
novelty_score = get_novelty_score(new_answer, previous_answers, openai_api_key)
if novelty_score < 0.1:
st.write("<span style='color:yellow'>Output is redundant. Moving to next question.</span>", unsafe_allow_html=True) # Display warning in yellow
break
st.write(f"**New Answer:**\n{new_answer}")
st.write(f"<span style='color:green'>Coherence Score: {coherence_score}</span>", unsafe_allow_html=True) # Display coherence score in green
st.write(f"**Novelty Score:** {novelty_score}")
previous_answers.append(new_answer)
question_novelty += novelty_score
except Exception as e:
st.write(f"<span style='color:red'>Unexpected error processing question: {str(e)}</span>", unsafe_allow_html=True) # Display error in red
time_taken = time.time() - start_time
st.write(f"<span style='color:blue'>Total novelty score for this question: {question_novelty}</span>", unsafe_allow_html=True) # Display novelty score in blue
st.write(f"<span style='color:blue'>Time taken: {time_taken} seconds</span>", unsafe_allow_html=True) # Display time taken in blue
return question_novelty, [
{
"question": question,
"answers": previous_answers,
"coherence_score": coherence_score,
"novelty_score": question_novelty
}
]
def get_novelty_score(new_answer: str, previous_answers: list, openai_api_key):
new_embedding = embed(new_answer, openai_api_key)
# If there are no previous answers, return maximum novelty
if not previous_answers:
return 1.0
previous_embeddings = [embed(answer, openai_api_key) for answer in previous_answers]
similarities = [
np.dot(new_embedding, prev_embedding) /
(np.linalg.norm(new_embedding) * np.linalg.norm(prev_embedding))
for prev_embedding in previous_embeddings
]
max_similarity = max(similarities)
novelty = 1 - max_similarity
return novelty
def benchmark_model_multithreaded(model_name, questions, open_router_key, openai_api_key):
novelty_score = 0
print_lock = threading.Lock() # Lock for thread-safe printing
results = []
with ThreadPoolExecutor(max_workers=len(questions)) as executor:
future_to_question = {executor.submit(
process_question, question, model_name, open_router_key, openai_api_key): question for question in questions}
for future in as_completed(future_to_question):
question = future_to_question[future]
try:
question_novelty, question_results = future.result()
with print_lock:
novelty_score += question_novelty
results.extend(question_results)
st.write(f"<span style='color:yellow'>Total novelty score across all questions (so far): {novelty_score}</span>", unsafe_allow_html=True)
except Exception as e:
with print_lock:
st.write(f"<span style='color:red'>Error in thread: {str(e)}</span>", unsafe_allow_html=True)
st.write(f"<span style='color:yellow'>Final total novelty score across all questions: {novelty_score}</span>", unsafe_allow_html=True)
return results
def benchmark_model_sequential(model_name, questions, open_router_key, openai_api_key):
novelty_score = 0
results = []
for i, question in enumerate(questions):
question_novelty, question_results = process_question(question, model_name, open_router_key, openai_api_key)
novelty_score += question_novelty
results.extend(question_results)
st.write(f"<span style='color:yellow'>Total novelty score across processed questions: {novelty_score}</span>", unsafe_allow_html=True) # Display progress after each question
st.write(f"<span style='color:yellow'>Final total novelty score across all questions: {novelty_score}</span>", unsafe_allow_html=True)
return results
|