ChangranHuuu commited on
Commit
b43abc8
·
verified ·
1 Parent(s): 52c87e7

Upload 12 files

Browse files
app.py ADDED
@@ -0,0 +1,173 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ import os
2
+ # Ensure SAMBANOVA_BASE_URL is in the environment for litellm
3
+ # This should be set before dynamic_cheatsheet.language_model is imported if it relies on it at import time,
4
+ # but it's generally used at runtime when making the API call.
5
+ # Setting it here early in app.py is a safeguard.
6
+ SAMBANOVA_DEFINED_BASE_URL = "https://api.sambanova.ai/v1"
7
+ if "SAMBANOVA_BASE_URL" not in os.environ:
8
+ os.environ["SAMBANOVA_BASE_URL"] = SAMBANOVA_DEFINED_BASE_URL
9
+ print(f"SAMBANOVA_BASE_URL environment variable set to: {SAMBANOVA_DEFINED_BASE_URL}")
10
+ elif os.environ["SAMBANOVA_BASE_URL"] != SAMBANOVA_DEFINED_BASE_URL:
11
+ print(f"Warning: SAMBANOVA_BASE_URL environment variable is already set to {os.environ['SAMBANOVA_BASE_URL']}, but app expects {SAMBANOVA_DEFINED_BASE_URL}. Using the existing one.")
12
+
13
+ import gradio as gr
14
+ import sys
15
+
16
+ # Add the project root to the Python path to allow importing dynamic_cheatsheet
17
+ sys.path.insert(0, os.path.abspath(os.path.join(os.path.dirname(__file__), ".")))
18
+
19
+ from dynamic_cheatsheet.language_model import LanguageModel
20
+
21
+ # --- Configuration ---
22
+ SAMBANOVA_API_KEY = os.environ.get("SAMBANOVA_API_KEY")
23
+ # SAMBANOVA_BASE_URL is now set from SAMBANOVA_DEFINED_BASE_URL to env var if not present
24
+ SAMBANOVA_MODEL_NAME = "samba/DeepSeek-R1-Distill-Llama-70B" # Using litellm convention for SambaNova
25
+
26
+ GENERATOR_PROMPT_PATH = "prompts/generator_prompt.txt"
27
+ CURATOR_PROMPT_PATH = "prompts/curator_prompt_for_dc_cumulative.txt"
28
+
29
+ GENERATOR_PROMPT = ""
30
+ CURATOR_PROMPT = ""
31
+
32
+ try:
33
+ with open(GENERATOR_PROMPT_PATH, "r") as f:
34
+ GENERATOR_PROMPT = f.read()
35
+ with open(CURATOR_PROMPT_PATH, "r") as f:
36
+ CURATOR_PROMPT = f.read()
37
+ except FileNotFoundError:
38
+ print(f"Error: Prompt files not found at {GENERATOR_PROMPT_PATH} or {CURATOR_PROMPT_PATH}. Please ensure they exist.")
39
+ GENERATOR_PROMPT = "You are a helpful assistant. Given a question and a cheatsheet, provide an answer. Cheatsheet: [[CHEATSHEET]] Question: [[QUESTION]] FINAL ANSWER: <answer></answer>"
40
+ CURATOR_PROMPT = "You are a helpful assistant. Given a question, a model answer, and a previous cheatsheet, update the cheatsheet. Previous Cheatsheet: [[PREVIOUS_CHEATSHEET]] Question: [[QUESTION]] Model Answer: [[MODEL_ANSWER]] NEW CHEATSHEET: <cheatsheet></cheatsheet>"
41
+
42
+ # --- Global variable for cheatsheet ---
43
+ current_cheatsheet_cache = "(empty)"
44
+
45
+ def initialize_model():
46
+ if not SAMBANOVA_API_KEY:
47
+ raise gr.Error("SAMBANOVA_API_KEY environment variable not set. Please set it in your Hugging Face Space secrets or local environment.")
48
+ # LanguageModel will be modified to handle samba/ prefix using env vars for API key/base URL via litellm
49
+ model = LanguageModel(
50
+ model_name=SAMBANOVA_MODEL_NAME
51
+ )
52
+ return model
53
+
54
+ def generate_cheatsheet_func(training_data_text, progress=gr.Progress(track_tqdm=True)):
55
+ global current_cheatsheet_cache
56
+ if not training_data_text.strip():
57
+ current_cheatsheet_cache = "(empty)"
58
+ return "Training data is empty. Cheatsheet reset to (empty)."
59
+
60
+ model = initialize_model()
61
+
62
+ training_examples = [ex.strip() for ex in training_data_text.split("\n") if ex.strip()]
63
+
64
+ cheatsheet_content = "(empty)"
65
+
66
+ progress(0, desc="Initializing Cheatsheet Generation")
67
+ for i, example_input in enumerate(progress.tqdm(training_examples, desc="Generating Cheatsheet")):
68
+ print(f"Processing training example {i+1}/{len(training_examples)}: {example_input[:50]}...")
69
+ try:
70
+ results_dict = model.advanced_generate(
71
+ approach_name="DynamicCheatsheet_Cumulative",
72
+ input_txt=example_input,
73
+ cheatsheet=cheatsheet_content,
74
+ generator_template=GENERATOR_PROMPT,
75
+ cheatsheet_template=CURATOR_PROMPT,
76
+ temperature=0.1,
77
+ max_tokens=1024
78
+ )
79
+ cheatsheet_content = results_dict.get("final_cheatsheet", cheatsheet_content)
80
+ except Exception as e:
81
+ print(f"Error processing example '{example_input[:50]}...': {e}")
82
+ # Continue with the current cheatsheet, and show error in UI
83
+ gr.Warning(f"Error on example '{example_input[:30]}...': {e}. Skipping this example.")
84
+ pass
85
+ current_cheatsheet_cache = cheatsheet_content
86
+ return current_cheatsheet_cache
87
+
88
+ def get_answers_func(user_query):
89
+ global current_cheatsheet_cache
90
+ if not user_query.strip():
91
+ return "Query is empty.", "Query is empty."
92
+
93
+ model = initialize_model()
94
+ answer_with_cheatsheet = "Error retrieving answer."
95
+ answer_without_cheatsheet = "Error retrieving answer."
96
+
97
+ # Inference WITH cheatsheet
98
+ try:
99
+ print(f"Querying WITH cheatsheet ({current_cheatsheet_cache[:50]}...)")
100
+ results_with_cheatsheet = model.advanced_generate(
101
+ approach_name="DynamicCheatsheet_Cumulative",
102
+ input_txt=user_query,
103
+ cheatsheet=current_cheatsheet_cache,
104
+ generator_template=GENERATOR_PROMPT,
105
+ cheatsheet_template=CURATOR_PROMPT,
106
+ temperature=0.1,
107
+ max_tokens=512
108
+ )
109
+ answer_with_cheatsheet = results_with_cheatsheet.get("final_answer", "Error: Could not extract answer.")
110
+ except Exception as e:
111
+ print(f"Error (with cheatsheet): {e}")
112
+ answer_with_cheatsheet = f"Error during inference with cheatsheet: {e}"
113
+
114
+ # Inference WITHOUT cheatsheet
115
+ try:
116
+ print(f"Querying WITHOUT cheatsheet...")
117
+ results_without_cheatsheet = model.advanced_generate(
118
+ approach_name="DynamicCheatsheet_Cumulative",
119
+ input_txt=user_query,
120
+ cheatsheet="(empty)",
121
+ generator_template=GENERATOR_PROMPT,
122
+ cheatsheet_template=CURATOR_PROMPT,
123
+ temperature=0.1,
124
+ max_tokens=512
125
+ )
126
+ answer_without_cheatsheet = results_without_cheatsheet.get("final_answer", "Error: Could not extract answer.")
127
+ except Exception as e:
128
+ print(f"Error (without cheatsheet): {e}")
129
+ answer_without_cheatsheet = f"Error during inference without cheatsheet: {e}"
130
+
131
+ return answer_with_cheatsheet, answer_without_cheatsheet
132
+
133
+ # --- Gradio Interface ---
134
+ with gr.Blocks(title="Task Caching Demo", theme=gr.themes.Soft()) as demo:
135
+ gr.Markdown("# Task Caching Demo")
136
+ gr.Markdown("Demonstrates the effect of using a dynamically generated cheatsheet (Task Caching) on model inference. Uses SambaNova API via `litellm`.")
137
+
138
+ with gr.Tabs():
139
+ with gr.TabItem("1. Generate Cheatsheet (Task Caching)"):
140
+ gr.Markdown("Paste your training data below, one example per line. This data will be used to build a cumulative cheatsheet. The process may take some time depending on the number of examples.")
141
+ training_data_input = gr.Textbox(lines=10, label="Training Data", placeholder="Example 1: What is the capital of France?\nExample 2: Solve 2+2.")
142
+ generate_cheatsheet_button = gr.Button("Generate Cheatsheet (Task Caching)", variant="primary")
143
+ cheatsheet_output = gr.Textbox(label="Generated Cheatsheet", lines=15, interactive=False, show_label=True)
144
+ generate_cheatsheet_button.click(
145
+ fn=generate_cheatsheet_func,
146
+ inputs=training_data_input,
147
+ outputs=cheatsheet_output,
148
+ show_progress="full"
149
+ )
150
+
151
+ with gr.TabItem("2. Test Inference"):
152
+ gr.Markdown("Enter your query below. The model will attempt to answer it twice: once using the generated cheatsheet (if any), and once without it.")
153
+ query_input = gr.Textbox(lines=3, label="Your Query", placeholder="e.g., What is the solution to 5 6 6 8 in the Game of 24?")
154
+ get_answers_button = gr.Button("Get Answers", variant="primary")
155
+
156
+ with gr.Row():
157
+ answer_with_cheatsheet_output = gr.Textbox(label="Answer WITH Cheatsheet", lines=10, interactive=False, show_label=True)
158
+ answer_without_cheatsheet_output = gr.Textbox(label="Answer WITHOUT Cheatsheet", lines=10, interactive=False, show_label=True)
159
+
160
+ get_answers_button.click(
161
+ fn=get_answers_func,
162
+ inputs=query_input,
163
+ outputs=[answer_with_cheatsheet_output, answer_without_cheatsheet_output]
164
+ )
165
+
166
+ gr.Markdown("**Important:** Ensure `SAMBANOVA_API_KEY` is set as a secret in your Hugging Face Space or as an environment variable if running locally. `SAMBANOVA_BASE_URL` is set to `https://api.sambanova.ai/v1` by default if not found in environment.")
167
+
168
+ if __name__ == "__main__":
169
+ if not SAMBANOVA_API_KEY:
170
+ print("Warning: SAMBANOVA_API_KEY is not set. The application will likely fail to contact the SambaNova API.")
171
+ print("Please set the SAMBANOVA_API_KEY environment variable.")
172
+ demo.launch()
173
+
dynamic_cheatsheet/.DS_Store ADDED
Binary file (6.15 kB). View file
 
dynamic_cheatsheet/__init__.py ADDED
File without changes
dynamic_cheatsheet/language_model.py ADDED
@@ -0,0 +1,298 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ import numpy as np
2
+ import tiktoken
3
+ from typing import List, Tuple
4
+ from sklearn.metrics.pairwise import cosine_similarity
5
+ from .utils.execute_code import extract_and_run_python_code
6
+ from .utils.extractor import extract_answer, extract_cheatsheet
7
+ from litellm import completion
8
+ from functools import partial
9
+ import os # Added for SAMBANOVA env vars
10
+
11
+ class LanguageModel:
12
+ def __init__(self,
13
+ model_name: str,
14
+ ) -> None:
15
+ """
16
+ LanguageModel class to interact with different language models.
17
+
18
+ Arguments:
19
+ model_name : str : The name of the language model to use.
20
+
21
+ Raises:
22
+ ValueError : If the model name is not found or supported.
23
+ """
24
+
25
+ self.model_name = model_name
26
+
27
+ # Known model list (remains the same)
28
+ known_model_list = [
29
+ "openai/gpt-4o-mini", "openai/gpt-4o-mini-2024-07-18",
30
+ "openai/gpt-4o", "openai/gpt-4o-2024-08-06", "openai/gpt-4o-2024-11-20",
31
+ "openai/gpt-3.5-turbo",
32
+ "together_ai/meta-llama/Llama-3.3-70B-Instruct-Turbo",
33
+ "meta-llama/Llama-3.3-70B-Instruct-Turbo",
34
+ "openai/o3-mini", "openai/o3-mini-2025-01-31",
35
+ "openai/o1", "openai/o1-2024-12-17",
36
+ "anthropic/claude-3-5-sonnet-latest", "anthropic/claude-3-5-sonnet-20241022",
37
+ "anthropic/claude-3-5-haiku-latest", "anthropic/claude-3-5-haiku-20241022",
38
+ "anthropic/claude-3-7-sonnet-latest", "anthropic/claude-3-7-sonnet-20250219",
39
+ "together_ai/meta-llama/Llama-3.3-70B-Instruct-Turbo-Free",
40
+ "together_ai/deepseek-ai/DeepSeek-R1",
41
+ "together_ai/deepseek-ai/DeepSeek-R1-Distill-Llama-70B",
42
+ "together_ai/deepseek-ai/DeepSeek-R1-Distill-Qwen-14B",
43
+ "together_ai/Qwen/Qwen2.5-Coder-32B-Instruct",
44
+ "together_ai/Qwen/QwQ-32B",
45
+ "together_ai/Qwen/Qwen2-72B-Instruct",
46
+ "together_ai/Qwen/Qwen2.5-7B-Instruct-Turbo",
47
+ "together_ai/Qwen/Qwen2.5-72B-Instruct-Turbo",
48
+ "gemini/gemini-2.0-flash",
49
+ "ollama/llama3:70b",
50
+ ]
51
+
52
+ # Load the client for the model based on the model name
53
+ if self.model_name.startswith("samba/"):
54
+ samba_api_key = os.environ.get("SAMBANOVA_API_KEY")
55
+ samba_base_url = os.environ.get("SAMBANOVA_BASE_URL", "https://api.sambanova.ai/v1") # Default if not set
56
+ if not samba_api_key:
57
+ raise ValueError("SAMBANOVA_API_KEY environment variable not set for SambaNova model.")
58
+ # For SambaNova (OpenAI compatible), explicitly pass api_key and api_base
59
+ # The model name for litellm should be just the model identifier, not the full "samba/" prefix if api_base is provided.
60
+ # However, litellm docs suggest that for OpenAI compatible endpoints, the model name passed to `completion`
61
+ # should be what the endpoint expects. The `model` param in `partial` here is the one sent in the request body.
62
+ # The `custom_llm_provider` in litellm is another way, but direct params are simpler for OpenAI compatibility.
63
+ # Let's try keeping the model name as is (e.g. "samba/DeepSeek-R1-Distill-Llama-70B")
64
+ # and provide api_key and api_base. LiteLLM should use these for any model if provided.
65
+ # If this doesn't work, the model name might need to be stripped of "samba/" if api_base is set.
66
+ # According to LiteLLM docs, for custom OpenAI-compatible endpoints, you can pass `base_url` and `api_key`.
67
+ # The `model` parameter to `litellm.completion` will be the actual model ID the endpoint expects.
68
+ # The `self.model_name` here is e.g. "samba/DeepSeek-R1-Distill-Llama-70B".
69
+ # We need to ensure the `model` argument to `completion` is just "DeepSeek-R1-Distill-Llama-70B"
70
+ # and set `custom_llm_provider="openai"` along with `api_base` and `api_key`.
71
+ # Or, if SambaNova is a recognized provider by a different name in litellm, use that.
72
+ # Given the error, litellm is not recognizing "samba/" as a provider directly.
73
+ # The simp actual_model_name = self.model_name.split("samba/", 1)[1] if "samba/" in self.model_name else self.model_name
74
+ self.client = partial(completion,
75
+ model=actual_model_name,
76
+ api_key=samba_api_key,
77
+ api_base=samba_base_url,
78
+ custom_llm_provider="openai"
79
+ )
80
+ print(f"Initialized SambaNova model '{actual_model_name}' via custom OpenAI provider settings with api_base: {samba_base_url}")
81
+ elif self.model_name in known_model_list:
82
+ self.client = partial(completion, model=self.model_name)
83
+ else:
84
+ print(f"Warning: Model '{self.model_name}' not in explicit list and does not start with recognized prefixes. Attempting to initialize with litellm directly.")
85
+ try:
86
+ self.client = partial(completion, model=self.model_name)
87
+ print(f"Successfully initialized model '{self.model_name}' via litellm fallback.")
88
+ except Exception as e: raise ValueError(f"Model '{self.model_name}' is not in the known list, does not start with recognized prefixes, and could not be initialized by litellm directly: {{e}}")
89
+ self.gpt4Tokenizer = tiktoken.encoding_for_model("gpt-4o")
90
+
91
+ def count_tokens(self, text: str) -> int:
92
+ """
93
+ Count the number of tokens in the text.
94
+ """
95
+ tokens = self.gpt4Tokenizer.encode(text)
96
+ return len(tokens)
97
+
98
+ def generate(self,
99
+ history: List[str],
100
+ temperature: float = 0.1,
101
+ max_tokens: int = 2048,
102
+ current_depth: int = 1,
103
+ max_depth_num_rounds: int = 3,
104
+ allow_code_execution: bool = True,
105
+ code_execution_flag: str = "EXECUTE CODE!",
106
+ final_output: str = ""
107
+ ) -> str:
108
+ """
109
+ Generate a response from the language model.
110
+ """
111
+ if len(history) == 0:
112
+ raise ValueError("History must contain at least one message.")
113
+
114
+ # The self.client is already a partial function with model, api_key, base_url, etc., pre-filled for SambaNova
115
+ response = self.client(
116
+ messages=history,
117
+ # model=self.model_name, # This is now part of the partial self.client for SambaNova
118
+ temperature=temperature,
119
+ max_tokens=max_tokens, # litellm uses max_tokens or max_completion_tokens
120
+ )
121
+ output = response.choices[0].message.content # Corrected access to content
122
+
123
+ pre_code_execution_flag = output.split(code_execution_flag)[0].strip()
124
+ if allow_code_execution and code_execution_flag in output and pre_code_execution_flag.endswith("```"):
125
+ output_prefix = output.split(code_execution_flag)[0].strip()
126
+ executed_code = extract_and_run_python_code(output_prefix)
127
+ executed_code = executed_code.strip()
128
+ current_output = f"{output_prefix}\n{code_execution_flag}\n\n{executed_code}"
129
+ final_output = f"{final_output}\n\n{current_output}".strip()
130
+
131
+ if current_depth <= max_depth_num_rounds:
132
+ warning_txt = ""
133
+ if current_depth == max_depth_num_rounds:
134
+ warning_txt = f" (This is the last round. No more code execution will be allowed. Please present your final solution now.)"
135
+ new_messages = [
136
+ {"role": "assistant", "content": current_output},
137
+ {"role": "user", "content": f"Proceed with any additional steps required and provide the completed solution. If everything is already complete, type FINAL ANSWER and submit it in the expected format. If you are stuck, please try alternative methods to solve the problem and provide the final solution.{warning_txt}"}
138
+ ]
139
+ history += new_messages
140
+ return self.generate(
141
+ history=history,
142
+ temperature=temperature,
143
+ max_tokens=max_tokens,
144
+ current_depth=current_depth+1,
145
+ max_depth_num_rounds=max_depth_num_rounds,
146
+ allow_code_execution=allow_code_execution,
147
+ code_execution_flag=code_execution_flag,
148
+ final_output=final_output,
149
+ )
150
+ else:
151
+ return final_output
152
+ else:
153
+ final_output = f"{final_output}\n\n{output}".strip()
154
+ return final_output
155
+
156
+ def advanced_generate(self,
157
+ approach_name: str,
158
+ input_txt: str,
159
+ cheatsheet: str = None,
160
+ generator_template: str = None,
161
+ cheatsheet_template: str = None,
162
+ temperature: float = 0.0,
163
+ max_tokens: int = 2048,
164
+ max_num_rounds: int = 1,
165
+ allow_code_execution: bool = True,
166
+ code_execution_flag: str = "EXECUTE CODE!",
167
+ add_previous_answers_to_cheatsheet: bool = True,
168
+ original_input_corpus: List[str] = None,
169
+ original_input_embeddings: np.ndarray = None,
170
+ generator_outputs_so_far: List[str] = None,
171
+ retrieve_top_k: int = 3,
172
+ ) -> dict:
173
+ """
174
+ Generate a response from the language model.
175
+ Returns dict instead of Tuple for clarity.
176
+ """
177
+
178
+ if approach_name == "default":
179
+ generator_prompt = generator_template.replace("[[QUESTION]]", input_txt).replace("[[CHEATSHEET]]", "(empty)")
180
+ generator_history = [
181
+ {"role": "user", "content": generator_prompt},
182
+ ]
183
+ generator_output = self.generate(
184
+ history=generator_history,
185
+ temperature=temperature,
186
+ max_tokens=max_tokens,
187
+ allow_code_execution=allow_code_execution,
188
+ code_execution_flag=code_execution_flag,
189
+ )
190
+ generator_answer = extract_answer(generator_output)
191
+ return {
192
+ "input_txt": input_txt,
193
+ "steps": [
194
+ {
195
+ "round": 0,
196
+ "generator_prompt": generator_prompt,
197
+ "generator_output": generator_output,
198
+ "generator_answer": generator_answer,
199
+ "current_cheatsheet": None,
200
+ "new_cheatsheet": None,
201
+ }
202
+ ],
203
+ "previous_answers": None,
204
+ "final_answer": generator_answer,
205
+ "final_output": generator_output,
206
+ "final_cheatsheet": None,
207
+ }
208
+
209
+ elif approach_name == "DynamicCheatsheet_Cumulative":
210
+ if cheatsheet is None:
211
+ raise ValueError("Cheatsheet must be provided for DynamicCheatsheet_Cumulative approach.")
212
+ if generator_template is None or cheatsheet_template is None:
213
+ raise ValueError("Generator and Cheatsheet templates must be provided for DynamicCheatsheet_Cumulative approach.")
214
+
215
+ steps = []
216
+ previous_answers = []
217
+ current_cheatsheet_in_round = cheatsheet # Use a local var for the loop
218
+
219
+ for round_num in range(max(1, max_num_rounds)):
220
+ generator_cheatsheet_content = current_cheatsheet_in_round
221
+ if round_num > 0 and add_previous_answers_to_cheatsheet and previous_answers:
222
+ previous_answers_txt = f"PREVIOUS ANSWERS:\n{'; '.join(previous_answers)}"
223
+ generator_cheatsheet_content = f"{generator_cheatsheet_content}\n\n{previous_answers_txt}"
224
+
225
+ generator_prompt = generator_template.replace("[[QUESTION]]", input_txt).replace("[[CHEATSHEET]]", generator_cheatsheet_content)
226
+
227
+ generator_history = [{"role": "user", "content": generator_prompt}]
228
+ generator_output = self.generate(
229
+ history=generator_history,
230
+ temperature=temperature,
231
+ max_tokens=max_tokens,
232
+ allow_code_execution=allow_code_execution,
233
+ code_execution_flag=code_execution_flag,
234
+ )
235
+ generator_answer = extract_answer(generator_output)
236
+
237
+ cheatsheet_prompt = cheatsheet_template.replace("[[QUESTION]]", input_txt).replace("[[MODEL_ANSWER]]", generator_output).replace("[[PREVIOUS_CHEATSHEET]]", current_cheatsheet_in_round)
238
+ cheatsheet_history = [{"role": "user", "content": cheatsheet_prompt}]
239
+ # Pass explicit provider details for curator model if it's also SambaNova
240
+ # Assuming curator uses the same model instance for now.
241
+ cheatsheet_model_output = self.generate(
242
+ history=cheatsheet_history,
243
+ temperature=temperature,
244
+ max_tokens=2*max_tokens, # As per original
245
+ allow_code_execution=False,
246
+ )
247
+ new_cheatsheet = extract_cheatsheet(response=cheatsheet_model_output, old_cheatsheet=current_cheatsheet_in_round)
248
+
249
+ steps.append({
250
+ "round": round_num,
251
+ "generator_prompt": generator_prompt,
252
+ "generator_output": generator_output,
253
+ "generator_answer": generator_answer,
254
+ "current_cheatsheet": current_cheatsheet_in_round,
255
+ "new_cheatsheet": new_cheatsheet,
256
+ })
257
+ current_cheatsheet_in_round = new_cheatsheet # Update for next potential round
258
+ if generator_answer:
259
+ previous_answers.append(f"Round {round_num+1}: {generator_answer}")
260
+
261
+ return {
262
+ "input_txt": input_txt,
263
+ "steps": steps,
264
+ "previous_answers": previous_answers,
265
+ "final_answer": generator_answer, # Answer from the last round
266
+ "final_cheatsheet": current_cheatsheet_in_round, # Cheatsheet from the last round
267
+ "final_output": generator_output, # Full output from the last generator call
268
+ }
269
+ elif approach_name == "FullHistoryAppending":
270
+ length_of_history = len(generator_outputs_so_far) if generator_outputs_so_far else 0
271
+ curated_cheatsheet = "(empty)"
272
+ if length_of_history > 0 and original_input_corpus and generator_outputs_so_far:
273
+ curated_cheatsheet = "### PREVIOUS SOLUTIONS (START)\n\n"
274
+ for i, (prev_input, prev_output) in enumerate(zip(original_input_corpus[:length_of_history], generator_outputs_so_far[:length_of_history])):
275
+ curated_cheatsheet += f"#### Previous Input #{i+1}:\n\n{prev_input}\n\n#### Model Solution to Previous Input #{i+1}:\n\n{prev_output}\n---\n---\n\n"
276
+ curated_cheatsheet += "#### PREVIOUS SOLUTIONS (END)"
277
+
278
+ generator_prompt = generator_template.replace("[[QUESTION]]", input_txt).replace("[[CHEATSHEET]]", curated_cheatsheet)
279
+ generator_history = [{"role": "user", "content": generator_prompt}]
280
+ generator_output = self.generate(
281
+ history=generator_history,
282
+ temperature=temperature,
283
+ max_tokens=max_tokens,
284
+ allow_code_execution=allow_code_execution,
285
+ code_execution_flag=code_execution_flag,
286
+ )
287
+ generator_answer = extract_answer(generator_output)
288
+ return {
289
+ "input_txt": input_txt,
290
+ "steps": [],
291
+ "previous_answers": [],
292
+ "final_answer": generator_answer,
293
+ "final_cheatsheet": curated_cheatsheet,
294
+ "final_output": generator_output,
295
+ }
296
+ else:
297
+ raise ValueError(f"Unknown approach_name: {approach_name}")
298
+
dynamic_cheatsheet/utils/__init__.py ADDED
File without changes
dynamic_cheatsheet/utils/evaluation.py ADDED
@@ -0,0 +1,293 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ import sys
2
+ import re
3
+ import os
4
+ from typing import List
5
+ # from .sonnet_eval import sonnet_errors
6
+ from .execute_code import execute_code_with_timeout
7
+
8
+
9
+ def clean_output_for_arithmetic(output: str) -> str:
10
+ """
11
+ Clean the output for arithmetic problems.
12
+
13
+ Args:
14
+ output (str): The output to clean.
15
+
16
+ Returns:
17
+ str: The cleaned output.
18
+ """
19
+ if "=" in output:
20
+ output = output.split("=")[1].strip()
21
+ if " is" in output:
22
+ output = output.split(" is")[1].strip()
23
+ if " equals" in output:
24
+ output = output.split(" equals")[1].strip()
25
+ if " evaluates to" in output:
26
+ output = output.split(" evaluates to")[1].strip()
27
+ if " is equal to" in output:
28
+ output = output.split(" is equal to")[1].strip()
29
+ return output
30
+
31
+
32
+ def clean_output_for_GameOf24(output: str) -> str:
33
+ """
34
+ Clean the output for GameOf24 problems.
35
+ """
36
+ if "=" in output:
37
+ output = output.split("=")[0].strip()
38
+ if "is" in output:
39
+ output = output.split("is")[1].strip()
40
+ if "equals" in output:
41
+ output = output.split("equals")[0].strip()
42
+ if "evaluates to" in output:
43
+ output = output.split("evaluates to")[0].strip()
44
+ return output
45
+
46
+
47
+ def eval_for_GameOf24(input: str, output: str) -> bool:
48
+ """
49
+ Given an input and output, check if the output is correct and follows the rules of the game.
50
+ """
51
+ clean_output = output
52
+
53
+ clean_output = clean_output_for_GameOf24(output)
54
+ clean_output = clean_output.replace("x", "*").strip()
55
+ clean_output = clean_output.replace("×", "*").strip()
56
+ clean_output = clean_output.replace("÷", "/").strip()
57
+
58
+ try:
59
+ # Get the value of the expression using eval
60
+ value = eval(clean_output)
61
+ if not (abs(value - 24) < 1e-3):
62
+ return False
63
+ # Split the input and output digits by space
64
+ input_digits = input.split(" ")
65
+ # Replace the following symbols with space
66
+ replacements = ["+", "-", "*", "/", "÷", "(", ")"]
67
+ for symbol in replacements:
68
+ clean_output = clean_output.replace(symbol, " ")
69
+ # Replace multiple spaces with single space
70
+ clean_output = re.sub(" +", " ", clean_output)
71
+ clean_output = clean_output.strip()
72
+ output_digits = clean_output.split(" ")
73
+ # Sort the digits
74
+ input_digits.sort()
75
+ output_digits.sort()
76
+ # Check if the digits are the same
77
+ if input_digits != output_digits:
78
+ return False
79
+ return True
80
+ except Exception as e:
81
+ return False
82
+
83
+
84
+ def remove_punctuation(output: str) -> str:
85
+ """
86
+ Remove punctuation from the output.
87
+ """
88
+ markers = [",", ";", ":", ".", '"']
89
+ for marker in markers:
90
+ output = output.replace(marker, "")
91
+ return output
92
+
93
+
94
+ def convert_newline_to_space(output: str) -> str:
95
+ """
96
+ Convert newline to space.
97
+ """
98
+ output = output.replace("\n", " ")
99
+ return output
100
+
101
+
102
+ def eval_for_exact_matching_with_no_punctuation(
103
+ output: str, target: str
104
+ ) -> bool:
105
+ """
106
+ Evaluate if the output is exactly the same as the target.
107
+ """
108
+ output = remove_punctuation(output)
109
+ output = convert_newline_to_space(output)
110
+ if target == output:
111
+ return True
112
+ return False
113
+
114
+
115
+ def eval_for_softmatch(input: str, output: str, target: str) -> bool:
116
+ """
117
+ Evaluate if the output is a soft match of the target.
118
+ """
119
+ output = remove_punctuation(output)
120
+ if target in output:
121
+ return True
122
+ return False
123
+
124
+
125
+ def eval_for_CheckmateInOne(input: str, output: str, target: str) -> bool:
126
+ """
127
+ Evaluate if the output is a checkmate in one.
128
+ """
129
+ output = output.strip()
130
+ if output[-1] == "#":
131
+ output = output.split(" ")[-1].strip()
132
+ # Based on the input, determine the number of the last move
133
+ last_move = input.split(".")[-1].strip()
134
+ move_idx = input.split(".")[-2].split(" ")[-1].strip()
135
+ # If the last move is an empty string, then the last move is white; otherwise, it is black
136
+ if last_move == "":
137
+ last_move = "White"
138
+ else:
139
+ last_move = "Black"
140
+ next_move_idx = str(int(move_idx) + 1)
141
+ if not (next_move_idx in output):
142
+ if target in output or (target[1] == 'x' and (target[0] + target[2:]) in output):
143
+ return True
144
+ else:
145
+ output = output.split(next_move_idx)[0].strip()
146
+ if target in output or (target[1] == 'x' and (target[0] + target[2:]) in output):
147
+ return True
148
+ return False
149
+
150
+
151
+ def eval_equation_balancer(input: str, output: str, target: str) -> bool:
152
+ """
153
+ Evaluate if the output is a valid equation balancer.
154
+ """
155
+ output = output.split("=")[0].strip()
156
+ target_val = target.split("=")[1].strip()
157
+ target = target.split("=")[0].strip()
158
+ # First make sure that the output has the same format as the target (when operators (e.g., +, -, *, /) are removed)
159
+ output_nums = output.replace("+", "").replace("-", "").replace("*", "").replace("/", "").replace(" ", "").strip()
160
+ target_nums = target.replace("+", "").replace("-", "").replace("*", "").replace("/", "").replace(" ", "").strip()
161
+ if output_nums != target_nums:
162
+ return False
163
+ # Now, evaluate the output and target
164
+ try:
165
+ output_value = eval(output)
166
+ if abs(output_value - eval(target_val)) < 1e-6:
167
+ return True
168
+ except Exception as e:
169
+ return False
170
+ return False
171
+
172
+
173
+ def eval_for_multiple_choice(input_text: str, final_answer: str, target: str) -> bool:
174
+ """
175
+ Evaluates if the final answer matches the target using pattern matching.
176
+
177
+ Args:
178
+ input_text (str): The original question text including options
179
+ final_answer (str): The model's answer
180
+ target (str): The correct answer
181
+
182
+ Returns:
183
+ bool: True if answer is correct, False otherwise
184
+ """
185
+ # Handle empty or None inputs
186
+ if not final_answer or not target:
187
+ return False
188
+
189
+ def clean_text(text: str) -> str:
190
+ if not text:
191
+ return ""
192
+ return text.lower().strip().replace('`', '').replace('(', '').replace(')', '')
193
+
194
+ def extract_option_text(input_text: str, option_letter: str) -> str:
195
+ try:
196
+ # Try different formats of options sections
197
+ options_section = ""
198
+ if 'options:' in input_text.lower():
199
+ options_section = input_text.lower().split('options:')[1].strip()
200
+ elif 'choices:' in input_text.lower():
201
+ options_section = input_text.lower().split('choices:')[1].strip()
202
+
203
+ if not options_section:
204
+ # Try to find options in the format (A) text, (B) text
205
+ lines = input_text.lower().split('\n')
206
+ for i, line in enumerate(lines):
207
+ if line.strip().startswith(f'({option_letter})') or line.strip().startswith(f'{option_letter})'):
208
+ return line.split(')', 1)[1].strip()
209
+
210
+ # Process the options section if found
211
+ for line in options_section.split('\n'):
212
+ line = line.strip()
213
+ if line.startswith(f'({option_letter})') or line.startswith(f'{option_letter})'):
214
+ return line.split(')', 1)[1].strip()
215
+ # Handle options like "A. text" format
216
+ if line.startswith(f'{option_letter}.'):
217
+ return line.split('.', 1)[1].strip()
218
+ except:
219
+ return ''
220
+ return ''
221
+
222
+ # Full option match (A), (B), etc. (e.g., (A) == (A))
223
+ if final_answer == target:
224
+ return True
225
+
226
+ # Clean and normalize inputs
227
+ clean_answer = clean_text(final_answer)
228
+ clean_target = clean_text(target)
229
+
230
+ # Handle target formats: (A), A), A, etc.
231
+ target_letter = ""
232
+ if len(clean_target) == 1:
233
+ target_letter = clean_target
234
+ elif clean_target.endswith(')'):
235
+ target_letter = clean_target[-2]
236
+ else:
237
+ # Extract the last character if it's a letter a-d or A-D
238
+ last_char = clean_target[-1]
239
+ if last_char in 'abcd':
240
+ target_letter = last_char
241
+
242
+ # Direct letter match (a, b, c, d)
243
+ if len(clean_answer) == 1 and clean_answer in 'abcd' and clean_answer == target_letter:
244
+ return True
245
+
246
+ # Handle answer formats like "A" or "A."
247
+ if clean_answer.startswith(target_letter) and (len(clean_answer) == 1 or
248
+ (len(clean_answer) == 2 and clean_answer[1] == '.')):
249
+ return True
250
+
251
+ # Handle answer formats like "Option A" or "Answer is A"
252
+ if clean_answer.endswith(target_letter) and (clean_answer[-2:] == f" {target_letter}" or
253
+ clean_answer[-3:] == f" {target_letter}."):
254
+ return True
255
+
256
+ # Text content match - check if the target option text is in the answer
257
+ target_text = extract_option_text(input_text, target_letter)
258
+
259
+ if target_text and target_text in clean_answer:
260
+ return True
261
+
262
+ # Handle numerical answers (if target is a number and answer contains that number)
263
+ if target_letter.isdigit() and target_letter in clean_answer:
264
+ return True
265
+
266
+ return False
267
+
268
+
269
+ def eval_for_pyton_programming_puzzles(input: str, output: str) -> bool:
270
+ """
271
+ Evaluate if the output is a valid Python programming puzzle solution.
272
+ """
273
+ if "```python" in output:
274
+ output = output.split("```python")[-1].strip()
275
+ output = output.split("```")[0].strip()
276
+
277
+ if "def sat" in output:
278
+ if "from typing" not in output:
279
+ output = f"from typing import *\n{output}"
280
+ code = f"{output}\nanswer = solution()\nprint(sat(answer))"
281
+ else:
282
+ code = f"from typing import *\n{input}\n{output}\nanswer = solution()\nprint(sat(answer))"
283
+
284
+ code = code.replace("List[", "list[")
285
+ eval_bool = execute_code_with_timeout(code, timeout=3)
286
+
287
+ if "NameError: name 'answer' is not defined" in eval_bool:
288
+ print(f"Eval bool: {eval_bool}")
289
+ print(f"Code:\n{code}")
290
+ print("*" * 100)
291
+ if "True" in eval_bool:
292
+ return True
293
+ return False
dynamic_cheatsheet/utils/execute_code.py ADDED
@@ -0,0 +1,101 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ """
2
+ This module provides functions to extract and execute Python code from a string.
3
+
4
+ The functions are:
5
+ * extract_and_run_python_code(txt: str) -> str: Extracts and executes Python code from a string.
6
+ * execute_code_with_timeout(code: str, timeout: int = 3) -> str: Executes Python code with a timeout and returns the output.
7
+
8
+ Additional functions can be added as needed.
9
+ """
10
+
11
+ import os
12
+ import tempfile
13
+ from subprocess import Popen, PIPE, TimeoutExpired
14
+
15
+ def extract_and_run_python_code(txt: str) -> str:
16
+ """
17
+ Extract and execute Python code from a provided string.
18
+
19
+ Handles missing print statements for non-comment last lines,
20
+ executes the code, and captures output or errors.
21
+
22
+ Parameters:
23
+ txt (str): Input string containing a possible Python code block.
24
+
25
+ Returns:
26
+ str: Execution result or error message wrapped in output formatting.
27
+ """
28
+ def extract_code(input_str: str) -> str:
29
+ """Extract Python code block delimited by ```python and ```."""
30
+ try:
31
+ return input_str.split("```python", 1)[1].split("```", 1)[0].strip()
32
+ except IndexError:
33
+ raise ValueError("No valid Python code block found.")
34
+
35
+ def ensure_print_statement(code: str) -> str:
36
+ """
37
+ Append a print statement if the last line isn't a comment or a print statement.
38
+ """
39
+ lines = code.splitlines()
40
+ last_line = lines[-1].rstrip()
41
+ if not last_line.startswith(("print(", "#", " ", "\t")) and (not ("return" in last_line)):# and len((last_line.split(" "))) == 1:
42
+ lines[-1] = f"print({last_line})"
43
+ return "\n".join(lines)
44
+
45
+ if "```python" not in txt:
46
+ return None # Return early if no Python code block is present
47
+
48
+ try:
49
+ # Extract and sanitize the code
50
+ code_block = extract_code(txt)
51
+ code_with_print = ensure_print_statement(code_block)
52
+
53
+ # Execute the code and return output
54
+ python_output = execute_code_with_timeout(code_with_print)
55
+ # return f"PYTHON CODE OUTPUT:\n'''\n{python_output}\n'''"
56
+ return f"Output of the Python code above:\n```\n{python_output}\n```"
57
+
58
+ except Exception as error:
59
+ return f"PYTHON CODE OUTPUT:\n```\nError: {str(error)}\n```"
60
+
61
+
62
+ # Python code execution function with timeout
63
+ # TODO (msuzgun): Improve the security of this function by using a sandboxed environment
64
+ def execute_code_with_timeout(code: str, timeout: int = 3) -> str:
65
+ """
66
+ Execute Python code with a timeout and return the output.
67
+
68
+ Parameters:
69
+ code (str): Python code to execute.
70
+ timeout (int): Timeout duration in seconds.
71
+
72
+ Returns:
73
+ str: Captured output or error message from the code execution.
74
+ """
75
+ with tempfile.NamedTemporaryFile(
76
+ mode="w+t", suffix=".py", delete=False
77
+ ) as temp_file:
78
+ temp_file.write(code)
79
+ temp_file.flush()
80
+
81
+ try:
82
+ # In case alias python=python3 is not set, use python3 instead of python
83
+ process = Popen(["python3", temp_file.name], stdout=PIPE, stderr=PIPE)
84
+ stdout, stderr = process.communicate(timeout=timeout)
85
+ captured_output = stdout.decode().strip()
86
+ error_output = stderr.decode().strip()
87
+
88
+ if captured_output == "":
89
+ if error_output != "":
90
+ captured_output = f"Error in execution: {error_output}"
91
+ else:
92
+ captured_output = "(No output was generated. It is possible that you did not include a print statement in your code. If you want to see the output, please include a print statement.)"
93
+
94
+ except TimeoutExpired:
95
+ process.kill()
96
+ captured_output = "Execution took too long, aborting..."
97
+
98
+ finally:
99
+ os.remove(temp_file.name)
100
+
101
+ return captured_output
dynamic_cheatsheet/utils/extractor.py ADDED
@@ -0,0 +1,115 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ """
2
+ This file contains the functions to extract the final answer, cheatsheet and solution evaluation from model responses.
3
+
4
+ The functions are:
5
+ * extract_answer(response: str) -> str: Extracts the final answer from the model response.
6
+ * extract_cheatsheet(response: str, old_cheatsheet: str) -> str: Extracts the cheatsheet from the model response.
7
+ * extract_solution(response: str, header: str = "SOLUTION EVALUATION:", error_message : str = "No solution evaluation found") -> str: Extracts the solution evaluation from the model response.
8
+
9
+ Additional functions can be added as needed.
10
+ """
11
+
12
+ def extract_answer(
13
+ response: str,
14
+ ) -> str:
15
+ """
16
+ Extracts the final answer from the model response.
17
+
18
+ Arguments:
19
+ response : str : The response from the model.
20
+
21
+ Returns:
22
+ str : The extracted final answer (if not found, returns "No final answer found").
23
+ """
24
+ if "<answer>" in response:
25
+ # <answer> (content) </answer>
26
+ try:
27
+ txt = response.split("<answer>")[-1].strip()
28
+ txt = txt.split("</answer>")[0].strip()
29
+ return txt
30
+ except:
31
+ return "No final answer found"
32
+ else:
33
+ if not("FINAL ANSWER" in response):
34
+ return "No final answer found"
35
+ try:
36
+ response = response.split("FINAL ANSWER")[-1].strip()
37
+ if response[0] == ":":
38
+ response = response[1:].strip()
39
+
40
+ # First decide whether to split by "```" or "'''" based on the presence of "```" or "'''"
41
+ idx_1 = response.find("'''")
42
+ idx_2 = response.find("```")
43
+ if min(idx_1, idx_2) != -1:
44
+ if idx_1 < idx_2:
45
+ response = response.split("'''")[1].strip()
46
+ else:
47
+ response = response.split("```")[1].strip()
48
+ else:
49
+ if idx_1 == -1:
50
+ response = response.split("```")[1].strip()
51
+ else:
52
+ response = response.split("'''")[1].strip()
53
+
54
+ # Special case for P3-Test task: If the first line contains "python" then remove it
55
+ if response.split("\n")[0].strip().lower() == "python":
56
+ response = "\n".join(response.split("\n")[1:]).strip()
57
+ return response
58
+ except:
59
+ return "No final answer found"
60
+
61
+
62
+ def extract_cheatsheet(
63
+ response: str,
64
+ old_cheatsheet: str,
65
+ ) -> str:
66
+ """
67
+ Extracts the cheatsheet from the model response.
68
+
69
+ Arguments:
70
+ response : str : The response from the model.
71
+ old_cheatsheet : str : The old cheatsheet to return if the new one is not found.
72
+
73
+ Returns:
74
+ str : The extracted cheatsheet (if not found, returns the old cheatsheet).
75
+ """
76
+ response = response.strip()
77
+ # <cheatsheet> (content) </cheatsheet>
78
+ if "<cheatsheet>" in response:
79
+ try:
80
+ txt = response.split("<cheatsheet>")[1].strip()
81
+ txt = txt.split("</cheatsheet>")[0].strip()
82
+ return txt
83
+ except:
84
+ return old_cheatsheet
85
+ else:
86
+ return old_cheatsheet
87
+
88
+
89
+ def extract_solution(
90
+ response: str,
91
+ header: str = "SOLUTION EVALUATION:",
92
+ error_message : str = "No solution evaluation found",
93
+ ) -> str:
94
+ """
95
+ Extracts the solution evaluation from the model response.
96
+
97
+ Arguments:
98
+ response : str : The response from the model.
99
+ header : str : The header to search for the solution evaluation.
100
+ error_message : str : The error message to return if the solution evaluation is not found.
101
+
102
+ Returns:
103
+ str : The extracted solution evaluation (if not found, returns the error message).
104
+ """
105
+ response = response.strip()
106
+ try:
107
+ txt = response.split(header)[1]
108
+ try:
109
+ txt = txt.split("'''")[1].strip()
110
+ except:
111
+ return txt.strip()
112
+ except:
113
+ return response
114
+ # return error_message
115
+ return txt
dynamic_cheatsheet/utils/sonnet_eval.py ADDED
@@ -0,0 +1,511 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ """
2
+ Defines function sonnet_errors(poem, target: str) -> Dict[str, Any]
3
+ which takes a target rhyme scheme (and optionally a list of required words) and returns a dict of errors
4
+
5
+ Returns an empty dictionary if there are no errors, so bool(sonnet_errors(poem, target)) is False if there are no
6
+ errors. It's a permissive check for sonnets errors, meaning that if it is unsure then it doesn't return an error.
7
+
8
+ Specifically,
9
+
10
+ * Check if it adheres to a given rhyming scheme
11
+ * Check if each line has 10-11 syllables, more precisely, there's some pronounciation of each line with 10-11 syllalbes
12
+
13
+ This omits a few things like rhymes and iambic pentameter.
14
+
15
+ # Rhymes
16
+
17
+ For rhymes, we use python `pronouncing` library based on:
18
+
19
+ * CMU pronouncing dictionary http://www.speech.cs.cmu.edu/cgi-bin/cmudict
20
+
21
+ # Syllable counting
22
+
23
+ Given that there are multiple ways to pronounce many words (e.g. "caramel" can be pronounced with 2 or 3 syllables),
24
+ we adopt a "permissive" approach and consult multiple tools for syllable counting:
25
+
26
+ * pronounce - a well-known pronunciation dict based on from CMU's pronouncing dictionary
27
+ * syllables - a Python library for syllable counting
28
+ * pyphen - a Python wrapper for the hyphenation library
29
+ """
30
+
31
+ from typing import Set, Dict, Any
32
+ import re
33
+ import joblib
34
+ import pyphen
35
+ import syllables
36
+ import pronouncing
37
+
38
+
39
+ ALLOWED_SYLLABLES = {
40
+ 10,
41
+ 11,
42
+ } # about 3-4% of legit lines have 11 syllables, so we allow it, > 99% have 10 or 11
43
+ NUM_REQUIRED_WORDS = 3
44
+
45
+ memory = joblib.Memory(
46
+ ".cache", verbose=0
47
+ ) # use cache to speed up repeated rhyme/syllable calls
48
+
49
+
50
+ def sonnet_errors(poem: str, target: str, verbose=False) -> Dict[str, Any]:
51
+ """
52
+ Checks for sonnet errors with respect to target rhyme scheme (and optional required words)
53
+
54
+ args:
55
+ poem: the poem to check
56
+ target: the rhyme scheme, e.g. "ABBA ABBA CDC DCD"
57
+ optionally target can have a list of required words, like
58
+ "ABBA ABBA CDC DCD, love train snail" each of these must be in the poem
59
+ verbose: if True, print out more details
60
+ """
61
+ if ", " in target:
62
+ scheme, rest = target.split(", ")
63
+ required_words = rest.split()
64
+ else:
65
+ scheme = target
66
+ required_words = []
67
+
68
+ errors = scheme_errors(poem, scheme, verbose=verbose)
69
+ assert isinstance(errors, dict)
70
+ missing_words = [w for w in required_words if w.lower() not in poem.lower()]
71
+ if any(missing_words):
72
+ errors["missing words"] = missing_words
73
+
74
+ syllable_errors = []
75
+ for line in split_poem(poem):
76
+ variations = syllable_variations(line)
77
+ if not (variations & ALLOWED_SYLLABLES):
78
+ syllable_errors.append((line, sorted(variations)))
79
+ if syllable_errors:
80
+ errors["syllable errors"] = syllable_errors
81
+
82
+ return errors
83
+
84
+
85
+ def clean_word(text: str):
86
+ return text.lower().strip(",.!?;: \"'[]()/")
87
+
88
+
89
+ def clean_line(line: str):
90
+ """
91
+ Clean a line from a poem.
92
+ Check if line ends with (A) or (B) ... and remove it
93
+ """
94
+ line = re.sub(r"\s*\([A-Za-z]\)\s*$", "", line)
95
+ return line.strip()
96
+
97
+
98
+ def split_poem(poem: str, min_line_len=3):
99
+ ans = [clean_line(l) for l in poem.splitlines()]
100
+ return [l for l in ans if len(l) > min_line_len]
101
+
102
+
103
+ @memory.cache
104
+ def slant_rhyming_parts(word: str):
105
+ consonants = set("BCDFGHJKLMNPQRSTVWXYZ")
106
+ ans = [
107
+ "".join(
108
+ ("R" if "R" in p else (p if p in consonants else "?"))
109
+ for p in pronouncing.rhyming_part(ph).split()
110
+ )
111
+ for ph in pronouncing.phones_for_word(word)
112
+ ]
113
+ ans = [a for a in ans if not all(i == "?" for i in a)]
114
+ ans = [a.replace("?", "") + ("?" if a.endswith("?") else "") for a in ans]
115
+ return set(ans)
116
+
117
+
118
+ @memory.cache
119
+ def get_rhymes(w):
120
+ return set(pronouncing.rhymes(w))
121
+
122
+
123
+ def scheme_errors(poem: str, scheme: str, verbose=False):
124
+ """Find errors with respect to a given rhyming scheme"""
125
+ lines = split_poem(poem)
126
+ scheme = scheme.replace(" ", "")
127
+
128
+ if len(lines) != len(scheme):
129
+ return {
130
+ "line count": f"Poem has {len(lines)} != {len(scheme)} lines in pattern {scheme}"
131
+ }
132
+
133
+ last_words = [clean_word(l.replace("-", " ").split()[-1]) for l in lines]
134
+
135
+ dictionary = pronouncing.cmudict.dict() # we ignore words not in dictionary
136
+
137
+ groups = []
138
+ for chars in sorted(set(scheme)):
139
+ groups.append(
140
+ [w for w, p in zip(last_words, scheme) if p == chars and w in dictionary]
141
+ )
142
+
143
+ slant_sets = {w: set(slant_rhyming_parts(w)) for g in groups for w in g}
144
+
145
+ scores = {}
146
+
147
+ if verbose:
148
+ print(groups)
149
+
150
+ for g in groups:
151
+ internal_words = set(g)
152
+ external_words = {w for h in groups if h is not g for w in h}
153
+ if len(internal_words) == 1:
154
+ continue # don't check rhymes if only word word in the group is in dictionary
155
+ for w in g:
156
+ rhymes = get_rhymes(w)
157
+ scores[w] = []
158
+ for comparisons in [internal_words, external_words]:
159
+ m = dict(rhymes=[], slant_rhymes=[])
160
+ scores[w].append(m)
161
+ for v in comparisons:
162
+ if v == w:
163
+ continue
164
+ if v in rhymes:
165
+ m["rhymes"].append(v)
166
+ elif slant_sets[v] & slant_sets[w]:
167
+ m["slant_rhymes"].append(v)
168
+
169
+ error_reasons = {}
170
+ suspicious_reasons = {}
171
+
172
+ for w in scores:
173
+ internal, external = scores[w]
174
+
175
+ if internal["rhymes"] or internal["slant_rhymes"]:
176
+ pass # ok if it rhymes (perfect or slant) with at least one other word in the group
177
+ elif len(external["rhymes"]) >= 2:
178
+ error_reasons[w] = "no internal rhymes, 2+ external perfect rhymes"
179
+ elif external["rhymes"]:
180
+ if len(external["slant_rhymes"]) >= 2:
181
+ error_reasons[
182
+ w
183
+ ] = "no internal rhymes, 1 external perfect rhyme, 2+ external slant rhymes"
184
+ else:
185
+ suspicious_reasons[
186
+ w
187
+ ] = "no internal rhymes/slant rhymes, 1 external perfect rhymes"
188
+ elif len(external["slant_rhymes"]) >= 3:
189
+ error_reasons[
190
+ w
191
+ ] = "no internal rhymes/slant rhymes, 3+ external slant rhymes"
192
+ if verbose:
193
+ print(w, "internal:", internal, "external:", external)
194
+
195
+ if len(error_reasons) + len(suspicious_reasons) >= 3:
196
+ error_reasons.update(suspicious_reasons)
197
+
198
+ return {
199
+ w: {
200
+ "reason": error_reasons[w],
201
+ "internal": scores[w][0],
202
+ "external": scores[w][1],
203
+ }
204
+ for w in error_reasons
205
+ }
206
+
207
+
208
+ def syllable_variations(text, verbose=False) -> Set[int]:
209
+ """
210
+ Given a text, return the set of possible numbers of syllables. It's a set because some words like "caramel" can
211
+ be pronounced with different numbers of syllables.
212
+ """
213
+ ans = {0}
214
+ for word in re.split("[ -]+", text):
215
+ word = clean_word(word)
216
+ if not word:
217
+ continue
218
+ options = word_syllables(word)
219
+ options = range(
220
+ min(options), max(options) + 1
221
+ ) # make it a range (so {2, 4} moves to [2, 3, 4])
222
+ ans = {x + y for x in ans for y in options}
223
+ return ans
224
+
225
+
226
+ @memory.cache
227
+ def word_syllables(word: str) -> Set[int]:
228
+ assert word == clean_word(
229
+ word
230
+ ), "Word should be cleaned before hitting word_syllables cache"
231
+ return SyllableCounters.count_word(word)
232
+
233
+
234
+ class SyllableCounters:
235
+ """
236
+ Simple class to count syllables in text.
237
+ """
238
+
239
+ _cmu_dict = None
240
+ _pyphen_counter = None
241
+
242
+ @staticmethod
243
+ def cmu_dict():
244
+ if not SyllableCounters._cmu_dict:
245
+ SyllableCounters._cmu_dict = pronouncing.cmudict.dict()
246
+ return SyllableCounters._cmu_dict
247
+
248
+ def cmu(word):
249
+ return {
250
+ pronouncing.syllable_count(pro) for pro in pronouncing.phones_for_word(word)
251
+ }
252
+
253
+ @staticmethod
254
+ def pyphen_counter():
255
+ if not SyllableCounters._pyphen_counter:
256
+ SyllableCounters._pyphen_counter = pyphen.Pyphen(lang="en")
257
+ return SyllableCounters._pyphen_counter
258
+
259
+ @staticmethod
260
+ def count_word(word) -> Set[int]:
261
+ if not word:
262
+ return {0}
263
+
264
+ cmu = SyllableCounters.cmu(word)
265
+
266
+ pyph = SyllableCounters.pyphen_counter().inserted(word).count("-") + 1
267
+
268
+ syll = syllables.estimate(word)
269
+
270
+ ans = cmu | {pyph, syll}
271
+
272
+ if 0 in ans and len(ans) > 1:
273
+ ans.remove(0)
274
+
275
+ return ans
276
+
277
+
278
+ TESTS = [
279
+ ["In savannah where tall trees kiss the sky,", 10],
280
+ ["A giraffe named Joe with love-stricken grace,", 10],
281
+ ["Did find a turtle named Sarah nearby,", 10],
282
+ ["Their eyes did meet, hearts raced in sweet embrace.", 10],
283
+ ["Though nature's laws deemed their love quite absurd,", 10],
284
+ ["Joe's neck would bend to whisper words of flame,", 10],
285
+ ["And Sarah's shell would tremble at each word,", 10],
286
+ ["In love's bizarre dance, they found no one to blame.", 11],
287
+ ["Through sun and storm, they'd wander, hoof and claw,", 10],
288
+ ["With love that no one ever could unravel,", 11],
289
+ ["In each other's eyes, perfection they saw,", 10],
290
+ ["A love so fierce, no distance could they travel.", 11],
291
+ ["So let us learn from turtle and giraffe,", 10],
292
+ ["That love's own shape can make the coldest laugh.", 10],
293
+ ["In yonder sky where colours blend so high,", 10],
294
+ ["A rainbow arcs, a bridge 'twixt earth and air.", 10],
295
+ ["Its radiant hues draw every gazing eye,", 12],
296
+ ["A painter's dream, a sight beyond compare.", 10],
297
+ ["Yet in the world of man, delight so small,", 10],
298
+ ["As gumball's sphere, with colours bright and clear.", 10],
299
+ ["Such simple joy it brings to one and all,", 10],
300
+ ["Its sweetness matched by colours we hold dear.", 10],
301
+ ["Both nature's arc and candy sphere delight,", 10],
302
+ ["The vast expanse and tiny bite unite,", 10],
303
+ ["In tales of wonder, stories to be told.", 10],
304
+ ["So let us cherish both the grand and small,", 10],
305
+ ["For beauty’s found in rainbow and in gumball.", 11],
306
+ ["When night's embrace hath shrouded all in black,", 10],
307
+ ["A flashlight's beam doth pierce the dark so deep,", 10],
308
+ ["From paths we've chosen, and vows we mean to keep.", 11],
309
+ ["Thou art like that beam, true, clear, and bright,", 9],
310
+ ["Cutting through the fog of my mind's own night,", 10],
311
+ ["Yet oft I find, by folly or by chance,", 10],
312
+ ["Distractions lead my wandering glance.", 9],
313
+ ["But even as stars, obscured by fleeting cloud,", 11],
314
+ ["Return to grace the heavens, proud and loud,", 10],
315
+ ["So shall my focus, once by ails distraught,", 10],
316
+ ["Return to thee, as ever it hath sought.", 10],
317
+ ["For in this world of fleeting sight and sound,", 10],
318
+ ]
319
+
320
+
321
+ def fixed_tests():
322
+ failures = []
323
+ for line, expected in TESTS:
324
+ variations = syllable_variations(line)
325
+ if expected not in variations:
326
+ print(f"Line `{line}` has {expected} syllables which isn't in {variations}")
327
+ failures.append((line, expected, variations))
328
+
329
+ # tests from https://www.mentalfloss.com/article/53661/car-mel-or-car-mel-3-reasons-syllabically-ambiguous-words :
330
+ for words, expected in [
331
+ (
332
+ "fire tire hour liar buyer flower drawer layer loyal royal file orange poem crayon".split(),
333
+ [1, 2],
334
+ ),
335
+ (
336
+ "caramel mayonnaise family chocolate camera different separate favorite realtor".split(),
337
+ [2, 3],
338
+ ),
339
+ ("mischievous".split(), [3, 4]),
340
+ ]:
341
+ for w in words:
342
+ variations = syllable_variations(w)
343
+ for i in expected:
344
+ if i not in variations:
345
+ print(
346
+ f"{w} give syllable_variations {variations} but should include {i}"
347
+ )
348
+ failures.append((w, i, variations))
349
+ return failures
350
+
351
+
352
+ def summarize_errors(errors, num_samples):
353
+ print(
354
+ f"Sonnet failure rate: {len(errors)/num_samples:.1%} out of {num_samples:,}, breakdown:"
355
+ )
356
+ wnl = sum("line count" in e for e in errors.values()) / num_samples
357
+ print(f"{wnl:.1%} wrong number of lines")
358
+ mw = sum(bool("missing words" in e) for e in errors.values()) / num_samples
359
+ print(f"{mw:.1%} missing words")
360
+ bl = sum(bool("syllable errors" in e) for e in errors.values()) / num_samples
361
+ print(f"{bl:.1%} poems with at least one line with wrong number of syllables")
362
+ rhyme_errors = (
363
+ sum(any(" " not in k for k in e) for e in errors.values()) / num_samples
364
+ )
365
+ both = (
366
+ sum(
367
+ (bool("syllable errors" in e) and any(" " not in k for k in e))
368
+ for e in errors.values()
369
+ )
370
+ / num_samples
371
+ )
372
+ print(
373
+ f"{rhyme_errors:.1%} poems with rhyme errors ({both:.1%} poems with both rhyme and syllable errors)"
374
+ )
375
+
376
+
377
+ def corpus_check_scheme(corpus_filename, scheme):
378
+ with open(corpus_filename, "r") as f:
379
+ poems = [p.strip() for p in f.read().split("\n\n") if p]
380
+ errors = {}
381
+ for p in poems:
382
+ e = sonnet_errors(p, scheme)
383
+ if e:
384
+ errors[p] = e
385
+ print("*" * 50)
386
+ sonnet_errors(p, scheme, verbose=True)
387
+ print("scheme", scheme)
388
+ print(p)
389
+ print()
390
+ print(e)
391
+ print("<" * 50)
392
+
393
+ summarize_errors(errors, len(poems))
394
+
395
+
396
+ def test():
397
+ assert not sonnet_errors(
398
+ """Not like the brazen giant of Greek fame,
399
+ With conquering limbs astride from land to land;
400
+ Here at our sea-washed, sunset gates shall stand
401
+ A mighty woman with a torch, whose flame
402
+ Is the imprisoned lightning, and her name
403
+ Mother of Exiles. From her beacon-hand
404
+ Glows world-wide welcome; her mild eyes command
405
+ The air-bridged harbor that twin cities frame.
406
+
407
+ "Keep, ancient lands, your storied pomp!" cries she
408
+ With silent lips. "Give me your tired, your poor,
409
+ Your huddled masses yearning to breathe free,
410
+ The wretched refuse of your teeming shore.
411
+ Send these, the homeless, tempest-tost to me,
412
+ I lift my lamp beside the golden door!"
413
+ """,
414
+ "ABBA ABBA CDCDCD",
415
+ )
416
+
417
+ assert not sonnet_errors(
418
+ """How do I love thee? Let me count the ways.
419
+ I love thee to the depth and breadth and height
420
+ My soul can reach, when feeling out of sight
421
+ For the ends of being and ideal grace.
422
+ I love thee to the level of every day’s
423
+ Most quiet need, by sun and candle-light.
424
+ I love thee freely, as men strive for right.
425
+ I love thee purely, as they turn from praise.
426
+ I love thee with the passion put to use
427
+ In my old griefs, and with my childhood’s faith.
428
+ I love thee with a love I seemed to lose
429
+ With my lost saints. I love thee with the breath,
430
+ Smiles, tears, of all my life; and, if God choose,
431
+ I shall but love thee better after death.""",
432
+ "abba abba cdcdcd",
433
+ )
434
+
435
+ assert not sonnet_errors(
436
+ """When, in disgrace with fortune and men’s eyes,
437
+ I all alone beweep my outcast state,
438
+ And trouble deaf heaven with my bootless cries,
439
+ And look upon myself, and curse my fate,
440
+ Wishing me like to one more rich in hope,
441
+ Featur’d like him, like him with friends possess’d,
442
+ Desiring this man’s art and that man’s scope,
443
+ With what I most enjoy contented least;
444
+ Yet in these thoughts myself almost despising,
445
+ Haply I think on thee, and then my state,
446
+ Like to the lark at break of day arising
447
+ From sullen earth, sings hymns at heaven’s gate;
448
+ For thy sweet love remember’d such wealth brings
449
+ That then I scorn to change my state with kings.""",
450
+ "ABAB CDCD EFEF GG",
451
+ )
452
+
453
+ assert sonnet_errors(
454
+ """How do I love thee? Let me count the ways.
455
+ I love thee to the depth and breadth and height
456
+ My soul can reach, when feeling out of sight
457
+ For the ends of being and ideal grace.
458
+ I love thee to the level of every day’s
459
+ Most quiet need, by sun and candle-light.
460
+ I love thee freely, as men strive for right.
461
+ I love thee purely, as they turn from praise.
462
+ I love thee with the passion put to use
463
+ In my old griefs, and with my childhood’s faith.
464
+ I love thee with a love I seemed to lose
465
+ With my lost saints. I love thee with the breath,
466
+ Smiles, tears, of all my life; and, if God choose,
467
+ I shall but love thee better after death.""",
468
+ "ABAB CDCD EFEF GG",
469
+ )
470
+
471
+ aaa = sonnet_errors(
472
+ """How do I love thee? Let me count the ways.
473
+ I love thee to the depth and breadth and height
474
+ My soul can reach, when feeling out of sight
475
+ For the ends of being and ideal grace.
476
+ I love thee to the level of every day’s
477
+ Most quiet need, by sun and candle-light.
478
+ I love thee freely, as men strive for right.
479
+ I love thee purely, as they turn from praise.
480
+ I love thee with the passion put to use
481
+ In my old griefs, and with my childhood’s faith.
482
+ I love thee with a love I seemed to lose
483
+ With my lost saints. I love thee with the breath,
484
+ Smiles, tears, of all my life; and, if God choose,
485
+ I shall but love thee better after death.""",
486
+ "ABBA ABBA CDC DCD",
487
+ # abba abba cdc dcd: (correct)
488
+ # "ABAB CDCD EFEF GG", (false)
489
+ )
490
+
491
+ print(aaa)
492
+
493
+ aaa = sonnet_errors(
494
+ """How do I love thee? Let me count the ways (A)
495
+ I love thee to the depth and breadth and height (B)
496
+ My soul can reach, when feeling out of sight (B)
497
+ For the ends of being and ideal grace (A)
498
+ I love thee to the level of every day’s (A)
499
+ Most quiet need, by sun and candle-light (B)
500
+ I love thee freely, as men strive for right (B)
501
+ I love thee purely, as they turn from praise (A)
502
+ I love thee with the passion put to use (C)
503
+ In my old griefs, and with my childhood’s faith (D)
504
+ I love thee with a love I seemed to lose (C)
505
+ With my lost saints. I love thee with the breath (D)
506
+ Smiles, tears, of all my life; and, if God choose (C)
507
+ I shall but love thee better after death (D).""",
508
+ "ABBA ABBA CDC DCD",
509
+ # abba abba cdc dcd: (correct)
510
+ # "ABAB CDCD EFEF GG", (false)
511
+ )
prompts/curator_prompt_for_dc_cumulative.txt ADDED
@@ -0,0 +1,149 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ # CHEATSHEET REFRENCE CURATOR
2
+
3
+ #### 1. Purpose and Goals
4
+ As the Cheatsheet Curator, you are tasked with creating a continuously evolving reference designed to help solve a wide variety of tasks, including algorithmic challenges, debugging, creative writing, and more. The cheatsheet's purpose is to consolidate verified solutions, reusable strategies, and critical insights into a single, well-structured resource.
5
+
6
+ - The cheatsheet should include quick, accurate, reliable, and practical solutions to a range of technical and creative challenges.
7
+ - After seeing each input, you should improve the content of the cheatsheet, synthesizing lessons, insights, tricks, and errors learned from past problems and adapting to new challenges.
8
+
9
+ ---
10
+
11
+ #### 2. Core Responsibilities
12
+ As the Cheatsheet Curator, you should:
13
+ - Curate and preserve knolwedge: Select and document only the most relevant, most useful, and most actionable solutions and strategies, while preserving old content of the cheatsheet.
14
+ - Maintain accuracy: Ensure that all entries in the cheatsheet are accurate, clear, and well-contextualized.
15
+ - Refine and update content: Continuously update and improve the content of the cheatsheet by incorporating new insights and solutions, removing repetitions or trivial information, and adding efficient solutions.
16
+ - Ensure practicality and comprehensiveness: Provide critical and informative examples, as well as efficient code snippets and actionable guidelines.
17
+
18
+ Before updating the cheatsheet, however, you should first assess the correctness of the provided solution and strategically incorporate code blocks, insights, and solutions into the new cheatsheet. Always aim to preserve and keep correct, useful, and illustrative solutions and strategies for future cheatsheets.
19
+
20
+ ---
21
+
22
+ #### 3. Principles and Best Practices
23
+ 1. Accuracy and Relevance:
24
+ - Only include solutions and strategies that have been tested and proven effective.
25
+ - Clearly state any assumptions, limitations, or dependencies (e.g., specific Python libraries or solution hacks).
26
+ - For computational problems, encourage Python usage for more accurate calculations.
27
+
28
+ 2. Iterative Refinement:
29
+ - Continuously improve the cheatsheet by synthesizing both old and new solutions, refining explanations, and removing redundancies.
30
+ - Rather than deleting old content and writing new content each time, consider ways to maintain table content and synthesize information from multiple solutions.
31
+ - After solving a new problem, document any reusable codes, algorithms, strategies, edge cases, or optimization techniques.
32
+
33
+ 3. Clarity and Usability:
34
+ - Write concise, actioanble, well-structured entries.
35
+ - Focus on key insights or strategies that make solutions correct and effective.
36
+
37
+ 4. Reusability:
38
+ - Provide clear solutions, pseudocodes, and meta strategies that are easily adaptable to different contexts.
39
+ - Avoid trivial content; focus on non-obvious, critical solution details and approaches.
40
+ - Make sure to add as many examples as you can in the cheatsheet.
41
+ - Any useful, efficient, generalizable, and illustrative solutions to the previous problems should be included in the cheatsheet.
42
+
43
+ ---
44
+
45
+ #### 4. Cheatsheet Structure
46
+ The cheatsheet can be divided into the following sections:
47
+
48
+ 1. Solutions, Implementation Patterns, and Code Snippets:
49
+ - Document reusable code snippets, algorithms, and solution templates.
50
+ - Include descriptions, annotated examples, and potential pitfalls, albeit succinctly.
51
+
52
+ 2. [OPTIONAL] Edge Cases and Validation Traps:
53
+ - Catalog scenarios that commonly cause errors or unexpected behavior.
54
+ - Provide checks, validations, or alternative approaches to handle them.
55
+
56
+ 3. General Meta-Reasoning Strategies:
57
+ - Describe high-level problem-solving frameworks and heuristics (e.g., use Python to solve heuristic problems; in bipartite graphs, max matching = min vertex cover, etc.)
58
+ - Provide concrete yet succinct step-by-step guides for tackling complex problems.
59
+
60
+ 4. Implement a Usage Counter
61
+ - Each entry must include a usage count: Increase the count every time a strategy is successfully used in problem-solving.
62
+ - Use the count to prioritize frequently used solutions over rarely applied ones.
63
+
64
+ ---
65
+
66
+ #### 5. Formatting Guidelines
67
+ Use the following structure for each memory item:
68
+
69
+ ```
70
+ <memory_item>
71
+ <description>
72
+ [Briefly describe the problem context, purpose, and key aspects of the solution.] (Refence: Q1, Q2, Q6, etc.)
73
+ </description>
74
+ <example>
75
+ [Provide a well-documented code snippet, worked-out solution, or efficient strategy.]
76
+ </example>
77
+ </memory_item>
78
+ ** Count: [Number of times this strategy has been used to solve a problem.]
79
+
80
+
81
+ <memory_item>
82
+ [...]
83
+ </memory_item>
84
+
85
+ [...]
86
+
87
+ <memory_item>
88
+ [...]
89
+ </memory_item>
90
+
91
+ ```
92
+
93
+ - Tagging: Use references like `(Q14)` or `(Q22)` to link entries to their originating contexts.
94
+ - Grouping: Organize entries into logical sections and subsections.
95
+ - Prioritizing: incorporate efficient algorithmic solutions, tricks, and strategies into the cheatsheet.
96
+ - Diversity: Have as many useful and relevant memory items as possible to guide the model to tackle future questions.
97
+
98
+ N.B. Keep in mind that once the cheatsheet is updated, any previous content not directly included will be lost and cannot be retrieved. Therefore, make sure to explicitly copy any (or all) relevant information from the previous cheatsheet to the new cheatsheet!!!
99
+
100
+ ---
101
+
102
+ #### 6. Cheatsheet Template
103
+ Use the following format for creating and updating the cheatsheet:
104
+
105
+ NEW CHEATSHEET:
106
+ ```
107
+ <cheatsheet>
108
+
109
+ Version: [Version Number]
110
+
111
+ SOLUTIONS, IMPLEMENTATION PATTERNS, AND CODE SNIPPETS
112
+ <memory_item>
113
+ [...]
114
+ </memory_item>
115
+
116
+ <memory_item>
117
+ [...]
118
+ </memory_item>
119
+
120
+ GENERAL META-REASONING STRATEGIES
121
+ <memory_item>
122
+ [...]
123
+ </memory_item>
124
+
125
+ </cheatsheet>
126
+ ```
127
+
128
+ N.B. Make sure that all information related to the cheatsheet is wrapped inside the <cheatsheet> block. The cheatsheet can be as long as circa 2000-2500 words.
129
+
130
+ -----
131
+ -----
132
+
133
+ ## PREVIOUS CHEATSHEET
134
+
135
+ [[PREVIOUS_CHEATSHEET]]
136
+
137
+ -----
138
+ -----
139
+
140
+ ## CURRENT INPUT
141
+
142
+ [[QUESTION]]
143
+
144
+ -----
145
+ -----
146
+
147
+ ## MODEL ANSWER TO THE CURRENT INPUT
148
+
149
+ [[MODEL_ANSWER]]
prompts/generator_prompt.txt ADDED
@@ -0,0 +1,81 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ # GENERATOR (PROBLEM SOLVER)
2
+
3
+ Instruction: You are an expert problem-solving assistant tasked with analyzing and solving various questions using a combination of your expertise and provided reference materials. Each task will include:
4
+ 1. A specific question or problem to solve
5
+ 2. A cheatsheet containing relevant strategies, patterns, and examples from similar problems
6
+
7
+ ---
8
+
9
+ ## 1. ANALYSIS & STRATEGY
10
+
11
+ - Carefully analyze both the question and cheatsheet before starting
12
+ - Search for and identify any applicable patterns, strategies, or examples within the cheatsheet
13
+ - Create a structured approach to solving the problem at hand
14
+ - Review and document any limitations in the provided reference materials
15
+
16
+ ## 2. SOLUTION DEVELOPMENT
17
+
18
+ - Present your solution using clear, logical steps that others can follow and review
19
+ - Explain your reasoning and methodology before presenting final conclusions
20
+ - Provide detailed explanations for each step of the process
21
+ - Check and verify all assumptions and intermediate calculations
22
+
23
+ ## 3. PROGRAMMING TASKS
24
+
25
+ When coding is required:
26
+ - Write clean, efficient Python code
27
+ - Follow the strict code formatting and execution protocol (always use the Python code formatting block; furthermore, after the code block, always explicitly request execution by appending: "EXECUTE CODE!"):
28
+ ```python
29
+ # Your code here
30
+ ```
31
+ EXECUTE CODE!
32
+
33
+ - All required imports and dependencies should be clearly declared at the top of your code
34
+ - Include clear inline comments to explain any complex programming logic
35
+ - Perform result validation after executing your code
36
+ - Apply optimization techniques from the cheatsheet when applicable
37
+ - The code should be completely self-contained without external file dependencies--it should be ready to be executed right away
38
+ - Do not include any placeholders, system-specific paths, or hard-coded local paths
39
+ - Feel free to use standard and widely-used pip packages
40
+ - Opt for alternative methods if errors persist during execution
41
+ - Exclude local paths and engine-specific settings (e.g., avoid configurations like chess.engine.SimpleEngine.popen_uci("/usr/bin/stockfish"))
42
+
43
+ ## 4. FINAL ANSWER FORMAT
44
+
45
+ ALWAYS present your final answer in the following format:
46
+
47
+ FINAL ANSWER:
48
+ <answer>
49
+ (final answer)
50
+ </answer>
51
+
52
+ N.B. Make sure that the final answer is properly wrapped inside the <answer> block.
53
+
54
+ * For multiple-choice questions: Only provide the letter choice (e.g., (A))
55
+ * For numerical answers: Only provide the final number (e.g., 42)
56
+ * For other types of answers, including free-response answers: Provide the complete final answer
57
+
58
+ Example:
59
+ Q: What is the meaning of life?
60
+ A: [...]
61
+ FINAL ANSWER:
62
+ <answer>
63
+ 42
64
+ </answer>
65
+
66
+ -----
67
+
68
+ CHEATSHEET:
69
+ '''
70
+ [[CHEATSHEET]]
71
+ '''
72
+
73
+ -----
74
+ -----
75
+
76
+ Now it is time to solve the following question.
77
+
78
+ CURRENT INPUT:
79
+ '''
80
+ [[QUESTION]]
81
+ '''
requirements.txt ADDED
@@ -0,0 +1,7 @@
 
 
 
 
 
 
 
 
1
+ gradio
2
+ litellm
3
+ numpy
4
+ scikit-learn
5
+ tiktoken
6
+ # openai # litellm handles openai compatible endpoints, direct openai sdk might not be needed by the app itself
7
+