Spaces:
Sleeping
Sleeping
import re | |
import streamlit as st | |
from together import Together | |
# Initialize the page | |
st.set_page_config(page_title="AI Prompt Evaluator", page_icon="π§ ", layout="wide") | |
st.title("AI Prompt Evaluator") | |
def evaluate_prompt(prompt): | |
"""Evaluates a prompt based on key principles of good prompt engineering.""" | |
criteria = { | |
"clarity": bool(re.search(r"\b(who|what|where|when|why|how)\b", prompt.lower())), | |
"specificity": len(prompt.split()) > 5, | |
"context": bool(re.search(r"\b(for example|such as|like|including)\b", prompt.lower())), | |
"output_format": bool(re.search(r"\b(list|table|bullets|code|summary)\b", prompt.lower())), | |
"constraints": bool(re.search(r"\b(limit|max|min|exactly|within)\b", prompt.lower())) | |
} | |
score = sum(criteria.values()) * 2 # Scale to 10 points | |
suggestions = [] | |
if not criteria["clarity"]: | |
suggestions.append("Make the prompt clearer by specifying exactly what you need.") | |
if not criteria["specificity"]: | |
suggestions.append("Make the prompt more detailed and specific.") | |
if not criteria["context"]: | |
suggestions.append("Add some background information to improve relevance.") | |
if not criteria["output_format"]: | |
suggestions.append("Specify how you want the output to be structured (e.g., list, code).") | |
if not criteria["constraints"]: | |
suggestions.append("Define any limits or conditions (e.g., word count, time frame).") | |
return score, suggestions, criteria | |
def get_ai_response(prompt, model): | |
"""Get a response from the Together API""" | |
try: | |
client = Together(api_key=st.secrets["together_api_key"]) | |
response = client.chat.completions.create( | |
model=model, | |
messages=[{"role": "user", "content": prompt}], | |
max_tokens=500 | |
) | |
return response.choices[0].message.content | |
except Exception as e: | |
return f"Error: {str(e)}" | |
# Sidebar for configuration | |
st.sidebar.header("Configuration") | |
model_options = { | |
"Mistral 7B": "mistralai/Mistral-7B-Instruct-v0.3", | |
"Llama 3 8B": "meta-llama/Llama-3-8B-Instruct", | |
"Qwen 72B": "Qwen/Qwen-72B-Chat" | |
} | |
selected_model = st.sidebar.selectbox("Select AI Model", list(model_options.keys())) | |
# Main input area | |
prompt_input = st.text_area("Enter your prompt:", height=150) | |
col1, col2 = st.columns(2) | |
with col1: | |
if st.button("Evaluate Prompt", type="primary"): | |
if not prompt_input.strip(): | |
st.error("Please enter a prompt to evaluate.") | |
else: | |
# Store API key in secrets | |
if "together_api_key" not in st.secrets: | |
st.secrets["together_api_key"] = "4db152889da5afebdba262f90e4cdcf12976ee8b48d9135c2bb86ef9b0d12bdd" | |
score, suggestions, criteria = evaluate_prompt(prompt_input) | |
# Display evaluation results | |
st.subheader("Prompt Evaluation Results") | |
# Score with color-coded meter | |
if score >= 8: | |
st.success(f"Score: {score}/10 - Excellent!") | |
elif score >= 6: | |
st.info(f"Score: {score}/10 - Good") | |
else: | |
st.warning(f"Score: {score}/10 - Needs Improvement") | |
# Criteria checklist | |
st.markdown("#### Criteria Checklist") | |
for criterion, passed in criteria.items(): | |
icon = "β " if passed else "β" | |
st.markdown(f"{icon} **{criterion.capitalize()}**") | |
# Improvement suggestions | |
if suggestions: | |
st.markdown("#### Suggestions to improve your prompt:") | |
for tip in suggestions: | |
st.markdown(f"- {tip}") | |
else: | |
st.success("Your prompt is well-structured!") | |
# Get AI response if score is high enough | |
if score >= 4: # Only get response if score is decent | |
with st.spinner("Getting AI response..."): | |
ai_response = get_ai_response(prompt_input, model_options[selected_model]) | |
st.subheader("AI Response Preview") | |
st.markdown(ai_response) | |
else: | |
st.info("Improve your prompt score to see an AI response preview.") | |
with col2: | |
st.subheader("Prompt Engineering Tips") | |
st.markdown(""" | |
### How to craft effective prompts: | |
1. **Be clear and specific** - Clearly state what you want. | |
2. **Provide context** - Give background information. | |
3. **Specify output format** - Ask for lists, code, or summaries. | |
4. **Set constraints** - Define limits like word count. | |
5. **Use examples** - Show examples of desired output. | |
### Examples of good prompts: | |
- "Create a 5-item bulleted list of healthy breakfast ideas including nutritional benefits." | |
- "Write a Python function that sorts a list of integers using the bubble sort algorithm." | |
- "Summarize the key benefits of exercise in exactly 100 words for a health newsletter." | |
""") | |
# Add explanation of the evaluation criteria | |
st.markdown("---") | |
with st.expander("Understanding the Evaluation Criteria"): | |
st.markdown(""" | |
- **Clarity**: Does the prompt include clear question words (who, what, where, when, why, how)? | |
- **Specificity**: Is the prompt detailed enough (more than 5 words)? | |
- **Context**: Does the prompt provide examples or background information? | |
- **Output Format**: Does the prompt specify the desired format (list, table, code, etc.)? | |
- **Constraints**: Does the prompt include specific limitations or requirements? | |
""") | |
# Footer | |
st.markdown("---") | |
st.markdown("#### About") | |
st.markdown("This tool helps you create better prompts for AI systems by evaluating them against best practices in prompt engineering.") |