Spaces:
Running
Running
File size: 17,233 Bytes
1f38061 |
1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20 21 22 23 24 25 26 27 28 29 30 31 32 33 34 35 36 37 38 39 40 41 42 43 44 45 46 47 48 49 50 51 52 53 54 55 56 57 58 59 60 61 62 63 64 65 66 67 68 69 70 71 72 73 74 75 76 77 78 79 80 81 82 83 84 85 86 87 88 89 90 91 92 93 94 95 96 97 98 99 100 101 102 103 104 105 106 107 108 109 110 111 112 113 114 115 116 117 118 119 120 121 122 123 124 125 126 127 128 129 130 131 132 133 134 135 136 137 138 139 140 141 142 143 144 145 146 147 148 149 150 151 152 153 154 155 156 157 158 159 160 161 162 163 164 165 166 167 168 169 170 171 172 173 174 175 176 177 178 179 180 181 182 183 184 185 186 187 188 189 190 191 192 193 194 195 196 197 198 199 200 201 202 203 204 205 206 207 208 209 210 211 212 213 214 215 216 217 218 219 220 221 222 223 224 225 226 227 228 229 230 231 232 233 234 235 236 237 238 239 240 241 242 243 244 245 246 247 248 249 250 251 252 253 254 255 256 257 258 259 260 261 262 263 264 265 266 267 268 269 270 271 272 273 274 275 276 277 278 279 280 281 282 283 284 285 286 287 288 289 290 291 292 293 294 295 296 297 298 299 300 301 302 303 304 305 306 307 308 309 310 311 312 313 314 315 316 317 318 319 320 321 322 323 324 325 326 327 328 329 330 331 332 333 334 335 336 337 338 339 340 341 342 343 344 345 346 347 348 349 350 351 352 353 354 355 356 357 358 359 360 361 362 363 364 365 366 367 368 369 370 371 372 |
#!/usr/bin/env python3
"""
Generate Synthetic Hackathon Participants
This script creates 100 diverse hackathon participants and saves them to the database.
The participants represent various personas that might attend a hackathon focused on
"Connected Experiences" and AI agents.
"""
import random
import sys
import os
from faker import Faker
from hackathon_organizer.database import initialize_database, add_participant, get_participants_dataframe
# Initialize Faker for generating realistic names and emails
fake = Faker()
# Define various participant personas and attributes
TECHNICAL_BACKGROUNDS = [
# Developers
"Full-stack developer with {years} years of experience in {stack}. {additional}",
"Backend engineer specializing in {backend_tech}. {additional}",
"Frontend developer focused on {frontend_tech}. {additional}",
"Mobile developer with expertise in {mobile_tech}. {additional}",
"DevOps engineer with experience in {devops_tech}. {additional}",
"Data scientist working with {ds_tech}. {additional}",
"Machine learning engineer focused on {ml_tech}. {additional}",
"AI researcher specializing in {ai_tech}. {additional}",
"Game developer using {game_tech}. {additional}",
"Embedded systems engineer working with {embedded_tech}. {additional}",
# Technical but not coding-focused
"UX/UI designer with {years} years of experience. {additional}",
"Product manager for {product_type} products. {additional}",
"QA engineer with expertise in {qa_tech}. {additional}",
"Technical writer specializing in {writing_focus}. {additional}",
"Solution architect with background in {arch_focus}. {additional}",
]
NON_TECHNICAL_BACKGROUNDS = [
"Marketing professional with experience in {marketing_focus}. {additional}",
"Business development specialist in the {industry} industry. {additional}",
"Project manager with {years} years of experience in {pm_focus}. {additional}",
"Entrepreneur and founder of a {startup_type} startup. {additional}",
"Student studying {field} at {university}. {additional}",
"Design thinking facilitator and innovation consultant. {additional}",
"Content creator focusing on {content_focus}. {additional}",
"Sales professional in the {sales_industry} sector. {additional}",
"HR specialist interested in tech talent and culture. {additional}",
"Non-profit professional looking to leverage technology for social impact. {additional}",
]
GOALS = [
# Learning-focused
"I want to learn about AI and how it can enhance user experiences at events.",
"I'm here to understand how to build AI agents and apply them to real-world problems.",
"I hope to gain practical experience with AI technologies and expand my technical skills.",
"I want to learn from experienced developers and improve my coding abilities.",
"I'm looking to understand how AI can create more meaningful human connections.",
# Project-focused
"I want to build a prototype that demonstrates the power of AI in connecting people.",
"I'm hoping to create an innovative solution that addresses the challenges of virtual events.",
"My goal is to develop an AI agent that enhances real-world social interactions.",
"I want to build something impressive for my portfolio that showcases my skills.",
"I'm aiming to create a practical tool that event organizers can actually use.",
# Networking-focused
"I'm primarily here to network with other professionals in the AI and event space.",
"I want to meet potential co-founders for a startup idea I've been developing.",
"I'm looking to connect with mentors who can guide my career in tech.",
"I hope to find collaborators for future projects beyond this hackathon.",
"I want to expand my professional network in the Toronto tech community.",
# Career-focused
"I'm exploring career opportunities in AI development and looking to showcase my skills.",
"I want to transition from my current role to a more tech-focused position.",
"I'm hoping this experience will help me land a job at an innovative tech company.",
"I want to demonstrate my abilities to potential employers or clients.",
"I'm building skills that will help me advance in my current organization.",
# Fun/Experience-focused
"I'm here for the creative experience and the thrill of building something in 24 hours.",
"I want to have fun while challenging myself technically.",
"I'm curious about hackathons and wanted to experience one firsthand.",
"I enjoy the collaborative atmosphere of hackathons and the energy they generate.",
"I'm looking for a break from my routine and a chance to work on something different.",
]
# Technical stack components
STACK_COMPONENTS = {
"years": [str(i) for i in range(1, 16)],
"stack": [
"JavaScript/TypeScript and Python", "MERN stack", "MEAN stack", "Ruby on Rails",
"Django and React", "Vue.js and Node.js", "PHP and Laravel", "Java Spring Boot",
".NET and Angular", "Go and React", "Python Flask and Vue.js"
],
"backend_tech": [
"Node.js and Express", "Django and PostgreSQL", "Ruby on Rails", "Java Spring Boot",
"ASP.NET Core", "PHP and Laravel", "Go microservices", "Python FastAPI",
"GraphQL APIs", "Serverless architectures on AWS"
],
"frontend_tech": [
"React and Redux", "Angular and RxJS", "Vue.js and Vuex", "Svelte and SvelteKit",
"Next.js", "Gatsby", "React Native", "Flutter", "TypeScript and Material UI",
"Tailwind CSS and Alpine.js"
],
"mobile_tech": [
"React Native", "Flutter", "Swift for iOS", "Kotlin for Android",
"Xamarin", "Ionic", "PWAs", "Unity for mobile games", "NativeScript",
"Mobile AR/VR applications"
],
"devops_tech": [
"Kubernetes and Docker", "AWS infrastructure", "Azure DevOps", "Google Cloud Platform",
"CI/CD pipelines", "Terraform and infrastructure as code", "Jenkins and GitLab CI",
"Monitoring and observability tools", "Site Reliability Engineering practices",
"Security automation"
],
"ds_tech": [
"Python, Pandas, and scikit-learn", "R and Tidyverse", "SQL and data warehousing",
"Tableau and data visualization", "Big data technologies like Spark",
"ETL pipelines", "Statistical analysis", "A/B testing methodologies",
"Natural Language Processing", "Computer Vision"
],
"ml_tech": [
"TensorFlow and Keras", "PyTorch", "scikit-learn", "deep learning models",
"MLOps and model deployment", "reinforcement learning", "computer vision algorithms",
"NLP models", "recommendation systems", "time series forecasting"
],
"ai_tech": [
"large language models", "generative AI", "conversational agents", "computer vision systems",
"reinforcement learning", "multimodal AI", "AI ethics and responsible AI",
"autonomous systems", "AI for social good", "explainable AI"
],
"game_tech": [
"Unity", "Unreal Engine", "Godot", "WebGL", "AR/VR development",
"mobile game development", "game AI", "procedural generation",
"multiplayer networking", "game physics"
],
"embedded_tech": [
"Arduino", "Raspberry Pi", "IoT devices", "embedded Linux",
"RTOS", "C/C++ for microcontrollers", "sensor networks",
"firmware development", "hardware interfaces", "low-power systems"
],
"product_type": [
"SaaS", "mobile", "enterprise", "consumer", "AI-powered",
"IoT", "fintech", "healthtech", "edtech", "e-commerce"
],
"qa_tech": [
"automated testing", "Selenium and Cypress", "performance testing",
"security testing", "mobile app testing", "API testing",
"test-driven development", "behavior-driven development",
"continuous integration testing", "accessibility testing"
],
"writing_focus": [
"API documentation", "user guides", "developer tutorials",
"knowledge bases", "technical blogs", "software requirements",
"open source documentation", "technical specifications",
"UX writing", "compliance documentation"
],
"arch_focus": [
"cloud architectures", "microservices", "serverless",
"enterprise systems", "distributed systems", "API design",
"security architectures", "data platforms", "IoT systems",
"mobile and web applications"
],
"additional": [
"I enjoy working in collaborative environments.",
"I'm passionate about creating accessible technology.",
"I've contributed to several open source projects.",
"I'm interested in ethical technology and responsible innovation.",
"I enjoy mentoring junior developers.",
"I have a background in design thinking.",
"I've worked in startups and enterprise environments.",
"I'm particularly interested in AI ethics.",
"I love solving complex algorithmic problems.",
"I focus on creating user-centered solutions.",
"I have experience leading small technical teams.",
"I'm self-taught and constantly learning new technologies.",
"I have a computer science degree but learned most of my skills on the job.",
"I'm currently transitioning careers into tech.",
"I'm an advocate for diversity in tech.",
"I've organized tech meetups and community events.",
"I'm interested in the intersection of technology and sustainability.",
"I have experience in both technical and business roles.",
"I'm passionate about making technology more accessible to everyone.",
"I enjoy the challenges of working with legacy systems.",
"", # Empty for some participants
]
}
# Non-technical components
NON_TECH_COMPONENTS = {
"marketing_focus": [
"digital marketing", "content strategy", "brand development",
"social media campaigns", "event promotion", "growth hacking",
"community building", "influencer partnerships", "SEO/SEM",
"product marketing"
],
"industry": [
"technology", "healthcare", "finance", "education", "retail",
"entertainment", "manufacturing", "non-profit", "government",
"hospitality"
],
"years": [str(i) for i in range(1, 16)],
"pm_focus": [
"agile methodologies", "waterfall approaches", "hybrid frameworks",
"technical projects", "creative initiatives", "product launches",
"organizational change", "international teams", "startup environments",
"enterprise transformations"
],
"startup_type": [
"tech", "social impact", "e-commerce", "healthcare", "education",
"fintech", "sustainability", "B2B SaaS", "consumer app", "AI/ML"
],
"field": [
"Computer Science", "Business Administration", "Design", "Marketing",
"Engineering", "Data Science", "Psychology", "Communications",
"Information Technology", "Entrepreneurship"
],
"university": [
"University of Toronto", "York University", "Ryerson University",
"Seneca College", "Humber College", "OCAD University",
"George Brown College", "McMaster University", "Waterloo University",
"Queen's University"
],
"content_focus": [
"tech tutorials", "industry trends", "career development",
"product reviews", "educational content", "lifestyle and tech",
"startup stories", "coding challenges", "design inspiration",
"thought leadership"
],
"sales_industry": [
"SaaS", "hardware", "consulting services", "enterprise solutions",
"consumer tech", "B2B technology", "telecommunications",
"cybersecurity", "cloud services", "digital transformation"
],
"additional": [
"I'm excited to learn more about technology and how it can solve real problems.",
"I bring a unique perspective from my non-technical background.",
"I'm interested in the human aspects of technology.",
"I'm looking to collaborate with technical team members and contribute my skills.",
"I have strong communication and presentation skills.",
"I excel at understanding user needs and translating them into requirements.",
"I'm good at explaining complex concepts to diverse audiences.",
"I have experience managing stakeholder expectations.",
"I'm skilled at identifying market opportunities.",
"I enjoy bridging the gap between technical and non-technical teams.",
"I have a creative approach to problem-solving.",
"I'm passionate about user experience and accessibility.",
"I have a network of industry connections that could be valuable.",
"I'm experienced in gathering and synthesizing user feedback.",
"I'm interested in how technology can create social impact.",
"I have experience in project coordination and team organization.",
"I'm good at creating compelling narratives around technical products.",
"I'm curious about AI and its potential applications.",
"I have a background in psychology and understand human behavior.",
"I'm skilled at facilitating workshops and brainstorming sessions.",
"", # Empty for some participants
]
}
def generate_background(is_technical=True):
"""Generate a realistic background for a participant."""
if is_technical:
template = random.choice(TECHNICAL_BACKGROUNDS)
components = STACK_COMPONENTS
else:
template = random.choice(NON_TECHNICAL_BACKGROUNDS)
components = NON_TECH_COMPONENTS
# Fill in the template with random components
for key in components:
if "{" + key + "}" in template:
template = template.replace("{" + key + "}", random.choice(components[key]))
return template
def generate_linkedin_profile(name):
"""Generate a realistic LinkedIn profile URL based on the name."""
# Remove spaces and special characters, convert to lowercase
name_part = ''.join(c for c in name if c.isalnum()).lower()
# Add some randomness to ensure uniqueness
if random.random() < 0.3:
# Some people use just their name
profile = name_part
elif random.random() < 0.6:
# Some add a random number
profile = f"{name_part}{random.randint(1, 999)}"
else:
# Some add their profession or location
suffixes = ["dev", "tech", "to", "canada", "design", "pm", "product", "marketing", "ai"]
profile = f"{name_part}-{random.choice(suffixes)}"
return f"linkedin.com/in/{profile}"
def generate_participants(count=100):
"""Generate a specified number of diverse hackathon participants."""
participants = []
# Define the distribution of technical vs non-technical participants
# For a hackathon, we'll have more technical participants but still a good mix
technical_count = int(count * 0.7) # 70% technical
non_technical_count = count - technical_count # 30% non-technical
# Generate technical participants
for _ in range(technical_count):
name = fake.name()
email = fake.email()
linkedin = generate_linkedin_profile(name)
background = generate_background(is_technical=True)
goals = random.choice(GOALS)
participants.append({
"email": email,
"name": name,
"linkedin_profile": linkedin,
"background": background,
"goals": goals
})
# Generate non-technical participants
for _ in range(non_technical_count):
name = fake.name()
email = fake.email()
linkedin = generate_linkedin_profile(name)
background = generate_background(is_technical=False)
goals = random.choice(GOALS)
participants.append({
"email": email,
"name": name,
"linkedin_profile": linkedin,
"background": background,
"goals": goals
})
# Shuffle the participants to mix technical and non-technical
random.shuffle(participants)
return participants
def main():
"""Main function to generate participants and save them to the database."""
print("Initializing database...")
initialize_database()
print("Generating 100 diverse hackathon participants...")
participants = generate_participants(100)
print("Adding participants to the database...")
for p in participants:
add_participant(p)
print("Participants added successfully.")
# Get and display a sample of the participants
df = get_participants_dataframe()
print(f"\nTotal participants in database: {len(df)}")
print("\nSample of participants:")
print(df.sample(5))
if __name__ == "__main__":
# Check if Faker is installed
try:
import faker
except ImportError:
print("The 'faker' package is required but not installed.")
print("Please install it using: pip install faker")
sys.exit(1)
main() |