Spaces:

ahmednoorx
/

cold-email-assistant

Running

App Files Files Community

cold-email-assistant / email_gen.py

ahmednoorx

Update email_gen.py

0d94680 verified about 2 months ago

raw

history blame

25.4 kB

	import os
	import json
	from llama_cpp import Llama
	import re
	from huggingface_hub import hf_hub_download
	import random

	class EmailGenerator:
	def __init__(self, custom_model_path=None):
	self.model = None
	self.model_path = custom_model_path or self._download_model()
	self._load_model()
	self.prompt_templates = self._load_prompt_templates()

	def _download_model(self):
	"""Download Vicuna-7B GGUF model from Hugging Face"""
	try:
	model_name = "TheBloke/vicuna-7B-v1.5-GGUF"
	filename = "vicuna-7b-v1.5.Q4_K_M.gguf"

	print("Downloading Vicuna-7B model... This may take a while.")
	model_path = hf_hub_download(
	repo_id=model_name,
	filename=filename,
	cache_dir="./models"
	)
	print(f"Model downloaded to: {model_path}")
	return model_path
	except Exception as e:
	print(f"Error downloading model: {e}")
	return None

	def _load_model(self):
	"""Load the GGUF model using llama-cpp-python"""
	try:
	if self.model_path and os.path.exists(self.model_path):
	print(f"🤖 Loading language model from: {self.model_path}")
	self.model = Llama(
	model_path=self.model_path,
	n_ctx=2048, # Context length
	n_threads=2, # Reduced for stability
	n_batch=512, # Batch size
	verbose=False,
	use_mmap=True, # Memory mapping for efficiency
	use_mlock=False # Don't lock memory
	)
	print("✅ Model loaded successfully!")

	# Test the model with a simple prompt
	test_response = self.model("Test", max_tokens=5, temperature=0.1)
	if test_response and 'choices' in test_response:
	print("✅ Model test successful")
	else:
	print("⚠️ Model test failed, will use fallback")
	self.model = None

	else:
	print("❌ No valid model path found. Using advanced fallback generation.")
	self.model = None
	except Exception as e:
	print(f"❌ Error loading model: {e}")
	print("🔄 Will use advanced fallback generation system")
	self.model = None

	def _generate_with_model(self, prompt, max_tokens=250, temperature=0.7):
	"""Generate text using the loaded model with retry logic"""
	try:
	if self.model:
	# First attempt
	response = self.model(
	prompt,
	max_tokens=max_tokens,
	temperature=temperature,
	top_p=0.9,
	stop=["</s>", "\n\n\n", "EXAMPLE", "Now write"],
	echo=False
	)
	result = response['choices'][0]['text'].strip()

	# Check if result is valid
	if self._is_valid_output(result):
	return result

	# Retry with different temperature if first attempt failed
	print("First attempt failed, retrying with adjusted parameters...")
	response = self.model(
	prompt,
	max_tokens=max_tokens,
	temperature=min(temperature + 0.2, 1.0),
	top_p=0.8,
	stop=["</s>", "\n\n\n", "EXAMPLE", "Now write"],
	echo=False
	)
	return response['choices'][0]['text'].strip()
	else:
	return self._fallback_generation(prompt)
	except Exception as e:
	print(f"Error generating with model: {e}")
	return self._fallback_generation(prompt)

	def _is_valid_output(self, output):
	"""Check if the generated output is valid"""
	if not output or len(output) < 20:
	return False

	# Check for incomplete JSON
	if '{' in output and '}' not in output:
	return False

	# Check for common failure patterns
	failure_patterns = [
	'I cannot', 'I apologize', 'I\'m sorry',
	'[Your Name]', '[Company]', '[Product]',
	'EXAMPLE', 'Now write'
	]

	return not any(pattern in output for pattern in failure_patterns)

	def _parse_json_response(self, response):
	"""Parse JSON response from the model"""
	try:
	# Clean up the response
	response = response.strip()

	# Extract JSON if it's embedded in text
	json_match = re.search(r'\{[^}]"subject"[^}]\}', response, re.DOTALL)
	if json_match:
	response = json_match.group(0)

	# Parse JSON
	data = json.loads(response)

	subject = data.get('subject', '').strip()
	body = data.get('body', '').strip()

	# Clean up quotes and formatting
	subject = subject.strip('"\'')
	body = body.strip('"\'')

	return subject, body

	except (json.JSONDecodeError, KeyError) as e:
	print(f"JSON parsing error: {e}")
	return self._extract_fallback_content(response)

	def _extract_fallback_content(self, response):
	"""Extract subject and body from non-JSON response"""
	lines = response.split('\n')
	subject = ""
	body = ""

	# Look for subject line
	for line in lines:
	if any(word in line.lower() for word in ['subject:', 'subj:', 'sub:']):
	subject = re.sub(r'^[^:]*:', '', line).strip()
	break

	# Look for body
	body_started = False
	body_lines = []
	for line in lines:
	if body_started:
	if line.strip():
	body_lines.append(line.strip())
	elif any(word in line.lower() for word in ['body:', 'email:', 'hi ', 'dear ', 'hello ']):
	body_started = True
	clean_line = re.sub(r'^[^:]*:', '', line).strip()
	if clean_line and not clean_line.lower().startswith(('body', 'email')):
	body_lines.append(clean_line)

	body = '\n'.join(body_lines) if body_lines else response

	# Fallback if parsing failed
	if not subject:
	subject = f"Partnership opportunity"
	if not body or len(body) < 20:
	body = "Hi,\n\nI'd love to explore how we can help your business grow.\n\nInterested in a quick call?\n\nBest regards"

	return subject, body

	def _advanced_fallback_generation(self, name, company, company_info, tone="Professional"):
	"""Advanced fallback with company-specific personalization"""

	# Extract industry and key details from company info
	industry_hints = self._extract_industry_details(company_info)

	# Create tone-specific templates
	if tone.lower() == "friendly":
	templates = [
	{
	"subject": f"Love what {company} is doing{industry_hints['subject_suffix']}",
	"body": f"Hi {name},\n\nJust came across {company}{industry_hints['context']} - really impressive work!\n\nWe've helped similar {industry_hints['industry']} companies {industry_hints['benefit']}. Mind if I share a quick example?\n\n15-minute call work for you?\n\nCheers,\nAlex"
	},
	{
	"subject": f"Quick idea for {company}",
	"body": f"Hi {name},\n\n{company}'s {industry_hints['focus']} caught my eye. We just helped a similar company {industry_hints['specific_result']}.\n\nWorth exploring for {company}?\n\nBest,\nSam"
	}
	]
	elif tone.lower() == "direct":
	templates = [
	{
	"subject": f"{company} + {industry_hints['solution']}?",
	"body": f"Hi {name},\n\n{industry_hints['direct_opener']} for {company}.\n\nResult: {industry_hints['specific_result']}.\n\nInterested? 10-minute call?\n\n-Alex"
	},
	{
	"subject": f"ROI opportunity for {company}",
	"body": f"{name},\n\nQuick question: Is {company} looking to {industry_hints['goal']}?\n\nWe reduced costs by 35% for a similar {industry_hints['industry']} company.\n\nWorth a conversation?\n\nBest,\nSam"
	}
	]
	else: # Professional
	templates = [
	{
	"subject": f"Operational efficiency opportunity - {company}",
	"body": f"Hi {name},\n\nI noticed {company} specializes in {industry_hints['specialty']}. We recently helped a similar organization {industry_hints['professional_result']}.\n\nWould you be open to a brief conversation about how this might apply to {company}?\n\nBest regards,\nAlex Thompson"
	},
	{
	"subject": f"Thought on {company}'s {industry_hints['focus']}",
	"body": f"Hi {name},\n\n{company}'s work in {industry_hints['area']} is impressive. We've developed solutions that help {industry_hints['industry']} companies {industry_hints['benefit']}.\n\nWould you be interested in a 15-minute discussion about potential applications for {company}?\n\nBest regards,\nSarah Chen"
	}
	]

	template = random.choice(templates)
	return template["subject"], template["body"]

	def _extract_industry_details(self, company_info):
	"""Extract industry-specific details for personalization"""
	info_lower = company_info.lower() if company_info else ""

	if any(word in info_lower for word in ['tech', 'software', 'saas', 'ai', 'digital']):
	return {
	'industry': 'tech',
	'specialty': 'technology solutions',
	'focus': 'innovation',
	'area': 'technology',
	'benefit': 'scale their platforms and reduce technical debt',
	'goal': 'optimize your development pipeline',
	'solution': 'DevOps automation',
	'context': ' and their tech stack',
	'subject_suffix': ' with tech',
	'direct_opener': 'We implemented automated testing',
	'specific_result': 'reduced deployment time by 60%',
	'professional_result': 'achieve 40% faster time-to-market for new features'
	}
	elif any(word in info_lower for word in ['manufactur', 'industrial', 'equipment', 'materials']):
	return {
	'industry': 'manufacturing',
	'specialty': 'industrial operations',
	'focus': 'production efficiency',
	'area': 'manufacturing',
	'benefit': 'optimize their production lines and reduce waste',
	'goal': 'increase production efficiency',
	'solution': 'process optimization',
	'context': ' and their manufacturing capabilities',
	'subject_suffix': ' in manufacturing',
	'direct_opener': 'We streamlined production workflows',
	'specific_result': 'increased throughput by 45%',
	'professional_result': 'achieve 30% improvement in production efficiency'
	}
	elif any(word in info_lower for word in ['health', 'medical', 'pharma', 'clinical']):
	return {
	'industry': 'healthcare',
	'specialty': 'healthcare solutions',
	'focus': 'patient outcomes',
	'area': 'healthcare',
	'benefit': 'improve patient outcomes while reducing costs',
	'goal': 'enhance patient care efficiency',
	'solution': 'workflow optimization',
	'context': ' and their patient care approach',
	'subject_suffix': ' in healthcare',
	'direct_opener': 'We optimized patient flow systems',
	'specific_result': 'reduced wait times by 50%',
	'professional_result': 'achieve 25% improvement in patient satisfaction scores'
	}
	else:
	return {
	'industry': 'business',
	'specialty': 'business operations',
	'focus': 'growth',
	'area': 'operations',
	'benefit': 'streamline operations and drive growth',
	'goal': 'scale your operations',
	'solution': 'process optimization',
	'context': ' and their business model',
	'subject_suffix': '',
	'direct_opener': 'We automated key business processes',
	'specific_result': 'increased efficiency by 40%',
	'professional_result': 'achieve 35% operational cost reduction'
	}

	def _load_prompt_templates(self):
	"""Load sophisticated prompt templates for different use cases"""
	return {
	"few_shot_template": '''You are an elite B2B sales copywriter. Write ONE personalized cold email that sounds natural and converts.

	<examples>
	EXAMPLE 1:
	SUBJECT: Quick question about Acme's EU expansion
	BODY: Hi Sarah,

	Saw Acme just launched in Berlin – congrats! We helped Contoso reduce their GDPR compliance prep by 68% with a simple automation.

	Worth a 10-minute chat about how this could apply to your EU rollout?

	Best,
	Alex

	EXAMPLE 2:
	SUBJECT: Thought on TechCorp's materials testing
	BODY: Hi John,

	Noticed TechCorp specializes in X-ray spectroscopy equipment. We just helped a similar lab increase throughput 40% with workflow optimization.

	Mind if I share what worked for them? 15-minute call?

	Best,
	Sam

	EXAMPLE 3:
	SUBJECT: Manufacturing efficiency idea for IndustrialCorp
	BODY: Hi Mike,

	IndustrialCorp's production line setup caught my attention. We automated similar processes for MetalWorks, reducing their cycle time by 35%.

	Open to a brief conversation about applications for your facility?

	Best regards,
	Jennifer
	</examples>

	Now write an email for:
	Name: {name}
	Company: {company}
	Company Info: {company_context}
	Tone: {tone}

	Requirements:
	- Use the company info naturally in the first 2 lines
	- Maximum 70 words in body (excluding signature)
	- Clear yes/no question at the end
	- No placeholders like [Your Name] or [Company]
	- Professional but conversational
	- Include specific benefit or result if possible

	Return ONLY this JSON format:
	{{"subject": "...", "body": "..."}}''',

	"industry_specific": {
	"technology": '''Write a cold email for a tech company. Focus on efficiency, scalability, and competitive advantage.''',
	"healthcare": '''Write a cold email for a healthcare company. Focus on patient outcomes, compliance, and cost reduction.''',
	"manufacturing": '''Write a cold email for a manufacturing company. Focus on production efficiency, quality, and cost savings.''',
	"services": '''Write a cold email for a service company. Focus on client satisfaction, process improvement, and growth.''',
	"default": '''Write a cold email that focuses on business growth and operational efficiency.'''
	}
	}

	def _extract_industry(self, company_info):
	"""Extract industry type from company information"""
	company_lower = company_info.lower()

	if any(word in company_lower for word in ['tech', 'software', 'saas', 'ai', 'digital', 'app', 'platform']):
	return 'technology'
	elif any(word in company_lower for word in ['health', 'medical', 'pharma', 'hospital', 'clinic']):
	return 'healthcare'
	elif any(word in company_lower for word in ['manufactur', 'factory', 'production', 'industrial', 'equipment']):
	return 'manufacturing'
	elif any(word in company_lower for word in ['service', 'consulting', 'agency', 'firm']):
	return 'services'
	else:
	return 'default'

	def _create_company_context(self, company, company_info):
	"""Create focused company context for the prompt"""
	# Extract key information and clean it up
	context_parts = []

	if company_info and len(company_info) > 10:
	# Extract meaningful phrases
	sentences = re.split(r'[.!?]+', company_info)
	for sentence in sentences[:3]: # First 3 sentences
	sentence = sentence.strip()
	if len(sentence) > 20 and not sentence.startswith('Title:'):
	# Remove common fluff words
	sentence = re.sub(r'Description:\s*', '', sentence)
	sentence = re.sub(r'Company Website:\s*', '', sentence)
	sentence = re.sub(r'LinkedIn:\s*', '', sentence)
	if sentence:
	context_parts.append(sentence)

	if not context_parts:
	context_parts.append(f"{company} is a company in their industry")

	return ' \| '.join(context_parts[:2]) # Max 2 key points

	def generate_email(self, name, company, company_info, tone="Professional", temperature=0.7):
	"""Generate both subject and email body using advanced prompting"""
	# Clean up and prepare context
	company_context = self._create_company_context(company, company_info)

	# Calibrate temperature for production readiness
	if temperature > 1.0:
	temperature = 0.8 # Cap at 0.8 for production readiness

	# Try AI generation first
	if self.model:
	try:
	# Build the prompt using few-shot template
	prompt = self.prompt_templates["few_shot_template"].format(
	name=name,
	company=company,
	company_context=company_context,
	tone=tone.lower()
	)

	# Generate with model
	response = self._generate_with_model(prompt, max_tokens=200, temperature=temperature)

	# Parse the response
	subject, body = self._parse_json_response(response)

	# Polish the content
	subject, body = self._polish_email_content(subject, body)

	# Validate quality
	quality_score, issues = self._validate_email_quality(subject, body, name, company)

	# If quality is good enough, return it
	if quality_score >= 70 and 'placeholders' not in issues:
	print(f"✅ AI generated email (Quality: {quality_score}%)")
	return subject, body
	else:
	print(f"⚠️ AI output quality too low ({quality_score}%), using advanced fallback")

	except Exception as e:
	print(f"❌ AI generation failed: {e}, using advanced fallback")

	# Use advanced fallback system
	print("🔄 Using advanced fallback generation")
	subject, body = self._advanced_fallback_generation(name, company, company_info, tone)

	# Always polish fallback content
	subject, body = self._polish_email_content(subject, body)

	return subject, body

	def _clean_subject(self, subject, company):
	"""Clean and validate subject line"""
	if not subject or len(subject) < 5:
	return f"Quick question about {company}"

	# Remove common prefixes
	subject = re.sub(r'^(Subject\|SUBJECT):\s*', '', subject, flags=re.IGNORECASE)
	subject = subject.strip('"\'')

	# Ensure reasonable length
	if len(subject) > 60:
	subject = subject[:57] + "..."

	return subject

	def _clean_body(self, body, name):
	"""Clean and validate email body"""
	if not body or len(body) < 20:
	return f"Hi {name},\n\nI'd love to discuss how we can help your business grow.\n\nInterested in a quick call?\n\nBest regards"

	# Remove common prefixes
	body = re.sub(r'^(Body\|BODY\|Email\|EMAIL):\s*', '', body, flags=re.IGNORECASE)

	# Ensure proper greeting
	if not body.lower().startswith(('hi ', 'hello ', 'dear ')):
	body = f"Hi {name},\n\n{body}"

	# Ensure proper closing
	closing_patterns = ['best regards', 'best,', 'sincerely', 'regards,', 'cheers,']
	has_closing = any(pattern in body.lower() for pattern in closing_patterns)

	if not has_closing:
	if not body.endswith('\n'):
	body += '\n'
	body += '\nBest regards'

	return body

	def _polish_email_content(self, subject, body):
	"""Polish email content for grammar and professionalism"""

	# Fix common grammar issues
	body = re.sub(r'\s+', ' ', body) # Multiple spaces
	body = re.sub(r'([.!?])\s*([a-z])', r'\1 \2', body) # Space after punctuation
	body = re.sub(r'(\w)\s\n\s(\w)', r'\1\n\n\2', body) # Proper paragraph spacing

	# Ensure professional closing
	if not re.search(r'(Best regards\|Best\|Sincerely\|Cheers),?\s*\n?[A-Z][a-z]+', body):
	if body.strip().endswith(','):
	body = body.strip() + '\n\nBest regards,\nAlex'
	else:
	body = body.strip() + '\n\nBest regards,\nAlex'

	# Fix subject line
	subject = subject.strip()
	if len(subject) > 65:
	subject = subject[:62] + "..."

	# Capitalize first letter of subject if not already
	if subject and subject[0].islower():
	subject = subject[0].upper() + subject[1:]

	return subject, body

	def _validate_email_quality(self, subject, body, name, company):
	"""Validate email quality and return quality score"""
	issues = []

	# Check subject length
	if len(subject) < 10 or len(subject) > 65:
	issues.append("subject_length")

	# Check body length
	words = len(body.split())
	if words < 20 or words > 150:
	issues.append("body_length")

	# Check for placeholders
	if '[Your Name]' in body or '[Company]' in body or '{{' in body:
	issues.append("placeholders")

	# Check personalization
	if name not in body or company not in body:
	issues.append("personalization")

	# Check for call-to-action
	cta_phrases = ['call', 'conversation', 'chat', 'discuss', 'talk', 'meeting', 'connect']
	if not any(phrase in body.lower() for phrase in cta_phrases):
	issues.append("no_cta")

	quality_score = max(0, 100 - (len(issues) * 15))
	return quality_score, issues

	def generate_multiple_variations(self, name, company, company_info, num_variations=3, tone="Professional"):
	"""Generate multiple email variations with different approaches"""
	variations = []
	tones = ["Professional", "Friendly", "Direct"]
	temperatures = [0.6, 0.7, 0.8]

	for i in range(num_variations):
	current_tone = tones[i % len(tones)]
	current_temp = temperatures[i % len(temperatures)]

	subject, email_body = self.generate_email(
	name, company, company_info,
	tone=current_tone, temperature=current_temp
	)

	variations.append({
	'variation': i + 1,
	'tone': current_tone,
	'temperature': current_temp,
	'subject': subject,
	'email_body': email_body
	})

	return variations

	def generate_email_v2(self, recipient_name, recipient_email, company_name, company_data, tone="professional", temperature=0.7):
	"""Compatibility method for different calling signatures"""
	# Extract company info from company_data if it's a dict
	if isinstance(company_data, dict):
	company_info = company_data.get('description', f"Company: {company_name}")
	else:
	company_info = str(company_data) if company_data else f"Company: {company_name}"

	# Call the main generate_email method
	subject, body = self.generate_email(
	name=recipient_name,
	company=company_name,
	company_info=company_info,
	tone=tone,
	temperature=temperature
	)

	# Return in the expected format
	return {
	'subject': subject,
	'content': body,
	'quality_score': 8.0
	}