Spaces:
Sleeping
Sleeping
File size: 5,918 Bytes
ec9caf7 |
1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20 21 22 23 24 25 26 27 28 29 30 31 32 33 34 35 36 37 38 39 40 41 42 43 44 45 46 47 48 49 50 51 52 53 54 55 56 57 58 59 60 61 62 63 64 65 66 67 68 69 70 71 72 73 74 75 76 77 78 79 80 81 82 83 84 85 86 87 88 89 90 91 92 93 94 95 96 97 98 99 100 101 102 103 104 105 106 107 108 109 110 111 112 113 114 115 116 117 118 119 120 121 122 123 124 125 126 127 128 129 130 131 132 133 134 135 136 137 138 139 140 |
import re
import streamlit as st
from together import Together
# Initialize the page
st.set_page_config(page_title="AI Prompt Evaluator", page_icon="π§ ", layout="wide")
st.title("AI Prompt Evaluator")
def evaluate_prompt(prompt):
"""Evaluates a prompt based on key principles of good prompt engineering."""
criteria = {
"clarity": bool(re.search(r"\b(who|what|where|when|why|how)\b", prompt.lower())),
"specificity": len(prompt.split()) > 5,
"context": bool(re.search(r"\b(for example|such as|like|including)\b", prompt.lower())),
"output_format": bool(re.search(r"\b(list|table|bullets|code|summary)\b", prompt.lower())),
"constraints": bool(re.search(r"\b(limit|max|min|exactly|within)\b", prompt.lower()))
}
score = sum(criteria.values()) * 2 # Scale to 10 points
suggestions = []
if not criteria["clarity"]:
suggestions.append("Make the prompt clearer by specifying exactly what you need.")
if not criteria["specificity"]:
suggestions.append("Make the prompt more detailed and specific.")
if not criteria["context"]:
suggestions.append("Add some background information to improve relevance.")
if not criteria["output_format"]:
suggestions.append("Specify how you want the output to be structured (e.g., list, code).")
if not criteria["constraints"]:
suggestions.append("Define any limits or conditions (e.g., word count, time frame).")
return score, suggestions, criteria
def get_ai_response(prompt, model):
"""Get a response from the Together API"""
try:
client = Together(api_key=st.secrets["together_api_key"])
response = client.chat.completions.create(
model=model,
messages=[{"role": "user", "content": prompt}],
max_tokens=500
)
return response.choices[0].message.content
except Exception as e:
return f"Error: {str(e)}"
# Sidebar for configuration
st.sidebar.header("Configuration")
model_options = {
"Mistral 7B": "mistralai/Mistral-7B-Instruct-v0.3",
"Llama 3 8B": "meta-llama/Llama-3-8B-Instruct",
"Qwen 72B": "Qwen/Qwen-72B-Chat"
}
selected_model = st.sidebar.selectbox("Select AI Model", list(model_options.keys()))
# Main input area
prompt_input = st.text_area("Enter your prompt:", height=150)
col1, col2 = st.columns(2)
with col1:
if st.button("Evaluate Prompt", type="primary"):
if not prompt_input.strip():
st.error("Please enter a prompt to evaluate.")
else:
# Store API key in secrets
if "together_api_key" not in st.secrets:
st.secrets["together_api_key"] = "4db152889da5afebdba262f90e4cdcf12976ee8b48d9135c2bb86ef9b0d12bdd"
score, suggestions, criteria = evaluate_prompt(prompt_input)
# Display evaluation results
st.subheader("Prompt Evaluation Results")
# Score with color-coded meter
if score >= 8:
st.success(f"Score: {score}/10 - Excellent!")
elif score >= 6:
st.info(f"Score: {score}/10 - Good")
else:
st.warning(f"Score: {score}/10 - Needs Improvement")
# Criteria checklist
st.markdown("#### Criteria Checklist")
for criterion, passed in criteria.items():
icon = "β
" if passed else "β"
st.markdown(f"{icon} **{criterion.capitalize()}**")
# Improvement suggestions
if suggestions:
st.markdown("#### Suggestions to improve your prompt:")
for tip in suggestions:
st.markdown(f"- {tip}")
else:
st.success("Your prompt is well-structured!")
# Get AI response if score is high enough
if score >= 4: # Only get response if score is decent
with st.spinner("Getting AI response..."):
ai_response = get_ai_response(prompt_input, model_options[selected_model])
st.subheader("AI Response Preview")
st.markdown(ai_response)
else:
st.info("Improve your prompt score to see an AI response preview.")
with col2:
st.subheader("Prompt Engineering Tips")
st.markdown("""
### How to craft effective prompts:
1. **Be clear and specific** - Clearly state what you want.
2. **Provide context** - Give background information.
3. **Specify output format** - Ask for lists, code, or summaries.
4. **Set constraints** - Define limits like word count.
5. **Use examples** - Show examples of desired output.
### Examples of good prompts:
- "Create a 5-item bulleted list of healthy breakfast ideas including nutritional benefits."
- "Write a Python function that sorts a list of integers using the bubble sort algorithm."
- "Summarize the key benefits of exercise in exactly 100 words for a health newsletter."
""")
# Add explanation of the evaluation criteria
st.markdown("---")
with st.expander("Understanding the Evaluation Criteria"):
st.markdown("""
- **Clarity**: Does the prompt include clear question words (who, what, where, when, why, how)?
- **Specificity**: Is the prompt detailed enough (more than 5 words)?
- **Context**: Does the prompt provide examples or background information?
- **Output Format**: Does the prompt specify the desired format (list, table, code, etc.)?
- **Constraints**: Does the prompt include specific limitations or requirements?
""")
# Footer
st.markdown("---")
st.markdown("#### About")
st.markdown("This tool helps you create better prompts for AI systems by evaluating them against best practices in prompt engineering.") |