File size: 5,918 Bytes
ec9caf7
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
import re
import streamlit as st
from together import Together

# Initialize the page
st.set_page_config(page_title="AI Prompt Evaluator", page_icon="🧠", layout="wide")
st.title("AI Prompt Evaluator")

def evaluate_prompt(prompt):
    """Evaluates a prompt based on key principles of good prompt engineering."""
    
    criteria = {
        "clarity": bool(re.search(r"\b(who|what|where|when|why|how)\b", prompt.lower())),
        "specificity": len(prompt.split()) > 5,
        "context": bool(re.search(r"\b(for example|such as|like|including)\b", prompt.lower())),
        "output_format": bool(re.search(r"\b(list|table|bullets|code|summary)\b", prompt.lower())),
        "constraints": bool(re.search(r"\b(limit|max|min|exactly|within)\b", prompt.lower()))
    }
    
    score = sum(criteria.values()) * 2  # Scale to 10 points
    
    suggestions = []
    if not criteria["clarity"]:
        suggestions.append("Make the prompt clearer by specifying exactly what you need.")
    if not criteria["specificity"]:
        suggestions.append("Make the prompt more detailed and specific.")
    if not criteria["context"]:
        suggestions.append("Add some background information to improve relevance.")
    if not criteria["output_format"]:
        suggestions.append("Specify how you want the output to be structured (e.g., list, code).")
    if not criteria["constraints"]:
        suggestions.append("Define any limits or conditions (e.g., word count, time frame).")
    
    return score, suggestions, criteria

def get_ai_response(prompt, model):
    """Get a response from the Together API"""
    try:
        client = Together(api_key=st.secrets["together_api_key"])
        response = client.chat.completions.create(
            model=model,
            messages=[{"role": "user", "content": prompt}],
            max_tokens=500
        )
        return response.choices[0].message.content
    except Exception as e:
        return f"Error: {str(e)}"

# Sidebar for configuration
st.sidebar.header("Configuration")
model_options = {
    "Mistral 7B": "mistralai/Mistral-7B-Instruct-v0.3",
    "Llama 3 8B": "meta-llama/Llama-3-8B-Instruct",
    "Qwen 72B": "Qwen/Qwen-72B-Chat"
}
selected_model = st.sidebar.selectbox("Select AI Model", list(model_options.keys()))

# Main input area
prompt_input = st.text_area("Enter your prompt:", height=150)

col1, col2 = st.columns(2)
with col1:
    if st.button("Evaluate Prompt", type="primary"):
        if not prompt_input.strip():
            st.error("Please enter a prompt to evaluate.")
        else:
            # Store API key in secrets
            if "together_api_key" not in st.secrets:
                st.secrets["together_api_key"] = "4db152889da5afebdba262f90e4cdcf12976ee8b48d9135c2bb86ef9b0d12bdd"
            
            score, suggestions, criteria = evaluate_prompt(prompt_input)
            
            # Display evaluation results
            st.subheader("Prompt Evaluation Results")
            
            # Score with color-coded meter
            if score >= 8:
                st.success(f"Score: {score}/10 - Excellent!")
            elif score >= 6:
                st.info(f"Score: {score}/10 - Good")
            else:
                st.warning(f"Score: {score}/10 - Needs Improvement")
            
            # Criteria checklist
            st.markdown("#### Criteria Checklist")
            for criterion, passed in criteria.items():
                icon = "βœ…" if passed else "❌"
                st.markdown(f"{icon} **{criterion.capitalize()}**")
            
            # Improvement suggestions
            if suggestions:
                st.markdown("#### Suggestions to improve your prompt:")
                for tip in suggestions:
                    st.markdown(f"- {tip}")
            else:
                st.success("Your prompt is well-structured!")
            
            # Get AI response if score is high enough
            if score >= 4:  # Only get response if score is decent
                with st.spinner("Getting AI response..."):
                    ai_response = get_ai_response(prompt_input, model_options[selected_model])
                    
                st.subheader("AI Response Preview")
                st.markdown(ai_response)
            else:
                st.info("Improve your prompt score to see an AI response preview.")

with col2:
    st.subheader("Prompt Engineering Tips")
    st.markdown("""
    ### How to craft effective prompts:
    
    1. **Be clear and specific** - Clearly state what you want.
    2. **Provide context** - Give background information.
    3. **Specify output format** - Ask for lists, code, or summaries.
    4. **Set constraints** - Define limits like word count.
    5. **Use examples** - Show examples of desired output.
    
    ### Examples of good prompts:
    
    - "Create a 5-item bulleted list of healthy breakfast ideas including nutritional benefits."
    - "Write a Python function that sorts a list of integers using the bubble sort algorithm."
    - "Summarize the key benefits of exercise in exactly 100 words for a health newsletter."
    """)

# Add explanation of the evaluation criteria
st.markdown("---")
with st.expander("Understanding the Evaluation Criteria"):
    st.markdown("""
    - **Clarity**: Does the prompt include clear question words (who, what, where, when, why, how)?
    - **Specificity**: Is the prompt detailed enough (more than 5 words)?
    - **Context**: Does the prompt provide examples or background information?
    - **Output Format**: Does the prompt specify the desired format (list, table, code, etc.)?
    - **Constraints**: Does the prompt include specific limitations or requirements?
    """)

# Footer
st.markdown("---")
st.markdown("#### About")
st.markdown("This tool helps you create better prompts for AI systems by evaluating them against best practices in prompt engineering.")