Spaces:

ahmednoorx
/

cold-email-assistant

Running

App Files Files Community

cold-email-assistant / app.py

ahmednoorx

Add app.py

baf4fd0 verified 2 months ago

raw

history blame

16.1 kB

	import streamlit as st
	import pandas as pd
	import sqlite3
	import os
	from datetime import datetime
	import time
	from scraper import LinkedInScraper
	from email_gen import EmailGenerator

	# Configure Streamlit page
	st.set_page_config(
	page_title="Cold Email Outreach Assistant",
	page_icon="📧",
	layout="wide"
	)

	# Initialize session state
	if 'processed_data' not in st.session_state:
	st.session_state.processed_data = None
	if 'email_generator' not in st.session_state:
	st.session_state.email_generator = None

	def init_database():
	"""Initialize SQLite database for caching"""
	conn = sqlite3.connect('leads.db')
	cursor = conn.cursor()

	cursor.execute('''
	CREATE TABLE IF NOT EXISTS scraped_data (
	id INTEGER PRIMARY KEY AUTOINCREMENT,
	name TEXT,
	email TEXT,
	company TEXT,
	linkedin_url TEXT,
	scraped_info TEXT,
	generated_subject TEXT,
	generated_email TEXT,
	created_at TIMESTAMP DEFAULT CURRENT_TIMESTAMP
	)
	''')

	conn.commit()
	conn.close()

	def save_to_database(data):
	"""Save processed data to database"""
	conn = sqlite3.connect('leads.db')
	cursor = conn.cursor()

	for _, row in data.iterrows():
	cursor.execute('''
	INSERT OR REPLACE INTO scraped_data
	(name, email, company, linkedin_url, scraped_info, generated_subject, generated_email)
	VALUES (?, ?, ?, ?, ?, ?, ?)
	''', (
	row['name'], row['email'], row['company'], row['linkedin_url'],
	row.get('scraped_info', ''), row.get('generated_subject', ''),
	row.get('generated_email', '')
	))

	conn.commit()
	conn.close()

	def load_from_database():
	"""Load data from database"""
	conn = sqlite3.connect('leads.db')
	df = pd.read_sql_query('SELECT * FROM scraped_data ORDER BY created_at DESC', conn)
	conn.close()
	return df

	def main():
	st.title("📧 Cold Email Outreach Assistant")
	st.markdown("Upload your leads CSV and generate personalized cold emails using AI")

	# Initialize database
	init_database()

	# Sidebar for configuration
	with st.sidebar:
	st.header("⚙️ Configuration")

	# Model configuration
	st.subheader("AI Model Settings")
	model_option = st.selectbox(
	"Model Type",
	["Download Vicuna-7B (Recommended)", "Use Custom Model Path"]
	)

	if model_option == "Use Custom Model Path":
	custom_model_path = st.text_input("Custom Model Path", "")
	else:
	custom_model_path = None

	# Email generation settings
	st.subheader("📧 Email Generation")
	tone = st.selectbox(
	"Email Tone",
	["Professional", "Friendly", "Direct", "Authoritative"],
	index=0,
	help="Choose the tone for generated emails"
	)

	temperature = st.slider(
	"Creativity Level",
	min_value=0.3,
	max_value=1.0,
	value=0.7,
	step=0.1,
	help="Lower = more conservative, Higher = more creative"
	)

	generate_variations = st.checkbox(
	"Generate Multiple Variations",
	value=False,
	help="Generate 3 different email variations per lead"
	)

	# Scraping configuration
	st.subheader("🔍 Scraping Settings")
	scrape_timeout = st.slider("Scrape Timeout (seconds)", 5, 30, 10)
	use_selenium = st.checkbox("Use Selenium (slower but more reliable)", value=False)

	# Main content area
	tab1, tab2, tab3 = st.tabs(["📤 Upload & Process", "📊 Results", "📈 History"])

	with tab1:
	st.header("Upload Your Leads CSV")

	# File upload
	uploaded_file = st.file_uploader(
	"Choose a CSV file",
	type="csv",
	help="CSV should contain columns: name, email, company, linkedin_url"
	)

	if uploaded_file is not None:
	try:
	# Read CSV
	df = pd.read_csv(uploaded_file)

	# Validate columns
	required_columns = ['name', 'email', 'company', 'linkedin_url']
	missing_columns = [col for col in required_columns if col not in df.columns]

	if missing_columns:
	st.error(f"Missing required columns: {', '.join(missing_columns)}")
	st.info("Required columns: name, email, company, linkedin_url")
	else:
	st.success(f"✅ CSV loaded successfully! Found {len(df)} leads")
	st.dataframe(df.head())

	# Process data button
	if st.button("🚀 Start Processing", type="primary"):
	process_leads(df, scrape_timeout, use_selenium, custom_model_path, tone, temperature, generate_variations)

	except Exception as e:
	st.error(f"Error reading CSV: {str(e)}")

	with tab2:
	st.header("Processing Results")

	if st.session_state.processed_data is not None:
	df = st.session_state.processed_data

	# Display results
	st.success(f"✅ Processed {len(df)} leads successfully!")

	# Show detailed results
	for idx, row in df.iterrows():
	with st.expander(f"📋 {row['name']} - {row['company']} {'🎯' if row.get('tone_used') else ''}"):
	col1, col2, col3 = st.columns([2, 3, 1])

	with col1:
	st.subheader("📊 Scraped Information")
	st.text_area("Company Info", row.get('scraped_info', 'No info scraped'), height=100, key=f"info_{idx}")

	# Show generation settings if available
	if row.get('tone_used'):
	st.write(f"Tone: {row.get('tone_used', 'N/A')}")
	st.write(f"Temperature: {row.get('temperature_used', 'N/A')}")

	with col2:
	st.subheader("📧 Generated Email")
	subject = row.get('generated_subject', 'No subject generated')
	email_body = row.get('generated_email', 'No email generated')

	st.text_area("Subject", subject, height=50, key=f"subject_{idx}")
	st.text_area("Email Body", email_body, height=250, key=f"email_{idx}")

	with col3:
	st.subheader("📈 Quality")
	if subject and email_body:
	subject_len = len(subject)
	# Get main body without variations
	main_body = email_body.split('--- VARIATIONS ---')[0].strip()
	body_words = len(main_body.split())

	# Quality indicators
	if 15 <= subject_len <= 65:
	st.success(f"✅ Subject: {subject_len} chars")
	else:
	st.warning(f"⚠️ Subject: {subject_len} chars")

	if 25 <= body_words <= 100:
	st.success(f"✅ Body: {body_words} words")
	else:
	st.warning(f"⚠️ Body: {body_words} words")

	# Check for placeholders
	if '[Your Name]' in email_body or '{' in email_body:
	st.error("❌ Contains placeholders")
	else:
	st.success("✅ No placeholders")

	# Check for personalization
	if row['name'] in main_body and row['company'] in main_body:
	st.success("✅ Well personalized")
	else:
	st.warning("⚠️ Low personalization")

	# Check for CTA
	cta_words = ['call', 'conversation', 'chat', 'discuss', 'talk', 'meeting']
	if any(word in main_body.lower() for word in cta_words):
	st.success("✅ Has call-to-action")
	else:
	st.warning("⚠️ Weak call-to-action")

	# Overall quality score
	quality_score = 0
	if 15 <= subject_len <= 65: quality_score += 20
	if 25 <= body_words <= 100: quality_score += 25
	if '[Your Name]' not in email_body: quality_score += 25
	if row['name'] in main_body and row['company'] in main_body: quality_score += 20
	if any(word in main_body.lower() for word in cta_words): quality_score += 10

	if quality_score >= 80:
	st.success(f"🏆 Overall: {quality_score}% - Ready to send!")
	elif quality_score >= 60:
	st.warning(f"📝 Overall: {quality_score}% - Needs polish")
	else:
	st.error(f"🔧 Overall: {quality_score}% - Needs work")

	# Quick copy button
	email_text = f"Subject: {subject}\n\n{email_body}"
	st.text_area("Copy Email", email_text, height=100, key=f"copy_{idx}")

	# Export functionality
	if st.button("📥 Export to CSV"):
	csv_data = df.to_csv(index=False)
	st.download_button(
	label="⬇️ Download CSV",
	data=csv_data,
	file_name=f"cold_emails_{datetime.now().strftime('%Y%m%d_%H%M%S')}.csv",
	mime="text/csv"
	)
	else:
	st.info("👆 Upload and process a CSV file to see results here")

	with tab3:
	st.header("Processing History")

	# Load and display historical data
	try:
	history_df = load_from_database()
	if not history_df.empty:
	st.dataframe(history_df)

	# Export history
	if st.button("📥 Export History"):
	csv_data = history_df.to_csv(index=False)
	st.download_button(
	label="⬇️ Download History CSV",
	data=csv_data,
	file_name=f"email_history_{datetime.now().strftime('%Y%m%d_%H%M%S')}.csv",
	mime="text/csv"
	)
	else:
	st.info("No historical data found")
	except Exception as e:
	st.error(f"Error loading history: {str(e)}")

	def process_leads(df, scrape_timeout, use_selenium, custom_model_path, tone, temperature, generate_variations):
	"""Process the uploaded leads with enhanced email generation"""
	progress_bar = st.progress(0)
	status_text = st.empty()

	try:
	# Initialize components
	status_text.text("🔧 Initializing scraper...")
	scraper = LinkedInScraper(timeout=scrape_timeout, use_selenium=use_selenium)

	status_text.text("🤖 Initializing AI model...")
	if st.session_state.email_generator is None:
	st.session_state.email_generator = EmailGenerator(custom_model_path)

	email_gen = st.session_state.email_generator

	# Process each lead
	processed_data = []
	total_leads = len(df)

	for idx, row in df.iterrows():
	status_text.text(f"🔍 Processing {row['name']} ({idx + 1}/{total_leads})")

	# Scrape information
	scraped_info = scraper.scrape_linkedin_or_company(
	row['linkedin_url'],
	row['company']
	)

	# Generate email with new parameters
	status_text.text(f"✍️ Generating email for {row['name']} ({tone} tone)...")

	if generate_variations:
	# Generate multiple variations
	variations = email_gen.generate_multiple_variations(
	row['name'],
	row['company'],
	scraped_info,
	num_variations=3,
	tone=tone
	)

	# Use the first variation as primary
	subject = variations[0]['subject']
	email_body = variations[0]['email_body']

	# Store all variations in a formatted way
	variations_text = "\n\n--- VARIATIONS ---\n"
	for i, var in enumerate(variations, 1):
	variations_text += f"\nVariation {i} ({var['tone']}):\n"
	variations_text += f"Subject: {var['subject']}\n"
	variations_text += f"Body: {var['email_body']}\n"

	email_body += variations_text

	else:
	# Generate single email with specified parameters
	subject, email_body = email_gen.generate_email(
	row['name'],
	row['company'],
	scraped_info,
	tone=tone,
	temperature=temperature
	)

	# Add to processed data
	processed_data.append({
	'name': row['name'],
	'email': row['email'],
	'company': row['company'],
	'linkedin_url': row['linkedin_url'],
	'scraped_info': scraped_info,
	'generated_subject': subject,
	'generated_email': email_body,
	'tone_used': tone,
	'temperature_used': temperature
	})

	# Update progress
	progress_bar.progress((idx + 1) / total_leads)

	# Convert to DataFrame and save
	result_df = pd.DataFrame(processed_data)
	st.session_state.processed_data = result_df

	# Save to database
	save_to_database(result_df)

	status_text.text("✅ Processing completed!")
	st.success("🎉 All leads processed successfully!")

	# Show quality metrics
	avg_subject_length = result_df['generated_subject'].str.len().mean()
	avg_body_length = result_df['generated_email'].str.split().str.len().mean()

	st.info(f"📊 Quality Metrics: Avg subject length: {avg_subject_length:.0f} chars, Avg body length: {avg_body_length:.0f} words")

	except Exception as e:
	st.error(f"❌ Error during processing: {str(e)}")
	status_text.text("❌ Processing failed")

	if __name__ == "__main__":
	main()