John-Jiang's picture
init commit
5301c48
# Complete prompts that need no additional template text
COMPLETE_PROMPTS = {
"data_gen": """
You are a data generation expert. Your primary objective is to create
high-quality synthetic data that strictly adheres to the provided guidelines.
The user has provided specific instructions for data generation.
- Carefully analyze the given instructions.
- Ensure the generated data aligns with the specified requirements.
- Maintain accuracy, coherence, and logical consistency.
user_instruction: {{user_instruction}}
{% if good_examples %}
The user has provided high-quality reference examples.
- Identify patterns, structures, and key characteristics from these examples.
- Generate data that maintains a similar style, quality, and relevance.
- Ensure variations while preserving meaningful consistency.
good_examples: {{good_examples}}
{% endif %}
{% if bad_examples %}
The following examples represent poor-quality data.
- Avoid replicating errors, inconsistencies, or undesirable patterns.
- Ensure generated data is free from the flaws present in these examples.
bad_examples: {{bad_examples}}
{% endif %}
{% if duplicate_examples %}
The user has specified examples that should not be duplicated.
- Ensure the generated data remains unique and does not replicate these examples.
- Introduce meaningful variations while maintaining quality and consistency.
duplicate_examples: {{duplicate_examples}}
{% endif %}
{% if topic %}
The generated data should be contextually relevant to the given topic: '{{topic}}'.
- Maintain thematic consistency.
- Ensure factual accuracy where applicable.
{% endif %}
Generate unique and high-quality data points.
- Ensure diversity in the dataset while maintaining coherence.
- Avoid redundant or repetitive entries.
""",
}
# Partial prompts that need to be combined with user-provided content
PARTIAL_PROMPTS = {
"data_gen": {
"header": """You are a data generation expert. Your primary objective is to create
high-quality synthetic data that strictly adheres to the provided guidelines.""",
"footer": """
Generate unique and high-quality data points.
- Ensure diversity in the dataset while maintaining coherence.
- Avoid redundant or repetitive entries.
""",
},
}
# QA pair generation prompt
qa_generation = """
Create {num_pairs} question-answer pairs from this text for LLM training.
Rules:
1. Questions must be about important facts in the text
2. Answers must be directly supported by the text
3. Return JSON format only:
[
{{
"question": "Question 1?",
"answer": "Answer 1."
}},
{{
"question": "Question 2?",
"answer": "Answer 2."
}}
]
Text:
{text}
"""