ahmednoorx commited on
Commit
baf4fd0
Β·
verified Β·
1 Parent(s): 44937b2

Add app.py

Browse files
Files changed (1) hide show
  1. app.py +384 -0
app.py ADDED
@@ -0,0 +1,384 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ import streamlit as st
2
+ import pandas as pd
3
+ import sqlite3
4
+ import os
5
+ from datetime import datetime
6
+ import time
7
+ from scraper import LinkedInScraper
8
+ from email_gen import EmailGenerator
9
+
10
+ # Configure Streamlit page
11
+ st.set_page_config(
12
+ page_title="Cold Email Outreach Assistant",
13
+ page_icon="πŸ“§",
14
+ layout="wide"
15
+ )
16
+
17
+ # Initialize session state
18
+ if 'processed_data' not in st.session_state:
19
+ st.session_state.processed_data = None
20
+ if 'email_generator' not in st.session_state:
21
+ st.session_state.email_generator = None
22
+
23
+ def init_database():
24
+ """Initialize SQLite database for caching"""
25
+ conn = sqlite3.connect('leads.db')
26
+ cursor = conn.cursor()
27
+
28
+ cursor.execute('''
29
+ CREATE TABLE IF NOT EXISTS scraped_data (
30
+ id INTEGER PRIMARY KEY AUTOINCREMENT,
31
+ name TEXT,
32
+ email TEXT,
33
+ company TEXT,
34
+ linkedin_url TEXT,
35
+ scraped_info TEXT,
36
+ generated_subject TEXT,
37
+ generated_email TEXT,
38
+ created_at TIMESTAMP DEFAULT CURRENT_TIMESTAMP
39
+ )
40
+ ''')
41
+
42
+ conn.commit()
43
+ conn.close()
44
+
45
+ def save_to_database(data):
46
+ """Save processed data to database"""
47
+ conn = sqlite3.connect('leads.db')
48
+ cursor = conn.cursor()
49
+
50
+ for _, row in data.iterrows():
51
+ cursor.execute('''
52
+ INSERT OR REPLACE INTO scraped_data
53
+ (name, email, company, linkedin_url, scraped_info, generated_subject, generated_email)
54
+ VALUES (?, ?, ?, ?, ?, ?, ?)
55
+ ''', (
56
+ row['name'], row['email'], row['company'], row['linkedin_url'],
57
+ row.get('scraped_info', ''), row.get('generated_subject', ''),
58
+ row.get('generated_email', '')
59
+ ))
60
+
61
+ conn.commit()
62
+ conn.close()
63
+
64
+ def load_from_database():
65
+ """Load data from database"""
66
+ conn = sqlite3.connect('leads.db')
67
+ df = pd.read_sql_query('SELECT * FROM scraped_data ORDER BY created_at DESC', conn)
68
+ conn.close()
69
+ return df
70
+
71
+ def main():
72
+ st.title("πŸ“§ Cold Email Outreach Assistant")
73
+ st.markdown("Upload your leads CSV and generate personalized cold emails using AI")
74
+
75
+ # Initialize database
76
+ init_database()
77
+
78
+ # Sidebar for configuration
79
+ with st.sidebar:
80
+ st.header("βš™οΈ Configuration")
81
+
82
+ # Model configuration
83
+ st.subheader("AI Model Settings")
84
+ model_option = st.selectbox(
85
+ "Model Type",
86
+ ["Download Vicuna-7B (Recommended)", "Use Custom Model Path"]
87
+ )
88
+
89
+ if model_option == "Use Custom Model Path":
90
+ custom_model_path = st.text_input("Custom Model Path", "")
91
+ else:
92
+ custom_model_path = None
93
+
94
+ # Email generation settings
95
+ st.subheader("πŸ“§ Email Generation")
96
+ tone = st.selectbox(
97
+ "Email Tone",
98
+ ["Professional", "Friendly", "Direct", "Authoritative"],
99
+ index=0,
100
+ help="Choose the tone for generated emails"
101
+ )
102
+
103
+ temperature = st.slider(
104
+ "Creativity Level",
105
+ min_value=0.3,
106
+ max_value=1.0,
107
+ value=0.7,
108
+ step=0.1,
109
+ help="Lower = more conservative, Higher = more creative"
110
+ )
111
+
112
+ generate_variations = st.checkbox(
113
+ "Generate Multiple Variations",
114
+ value=False,
115
+ help="Generate 3 different email variations per lead"
116
+ )
117
+
118
+ # Scraping configuration
119
+ st.subheader("πŸ” Scraping Settings")
120
+ scrape_timeout = st.slider("Scrape Timeout (seconds)", 5, 30, 10)
121
+ use_selenium = st.checkbox("Use Selenium (slower but more reliable)", value=False)
122
+
123
+ # Main content area
124
+ tab1, tab2, tab3 = st.tabs(["πŸ“€ Upload & Process", "πŸ“Š Results", "πŸ“ˆ History"])
125
+
126
+ with tab1:
127
+ st.header("Upload Your Leads CSV")
128
+
129
+ # File upload
130
+ uploaded_file = st.file_uploader(
131
+ "Choose a CSV file",
132
+ type="csv",
133
+ help="CSV should contain columns: name, email, company, linkedin_url"
134
+ )
135
+
136
+ if uploaded_file is not None:
137
+ try:
138
+ # Read CSV
139
+ df = pd.read_csv(uploaded_file)
140
+
141
+ # Validate columns
142
+ required_columns = ['name', 'email', 'company', 'linkedin_url']
143
+ missing_columns = [col for col in required_columns if col not in df.columns]
144
+
145
+ if missing_columns:
146
+ st.error(f"Missing required columns: {', '.join(missing_columns)}")
147
+ st.info("Required columns: name, email, company, linkedin_url")
148
+ else:
149
+ st.success(f"βœ… CSV loaded successfully! Found {len(df)} leads")
150
+ st.dataframe(df.head())
151
+
152
+ # Process data button
153
+ if st.button("πŸš€ Start Processing", type="primary"):
154
+ process_leads(df, scrape_timeout, use_selenium, custom_model_path, tone, temperature, generate_variations)
155
+
156
+ except Exception as e:
157
+ st.error(f"Error reading CSV: {str(e)}")
158
+
159
+ with tab2:
160
+ st.header("Processing Results")
161
+
162
+ if st.session_state.processed_data is not None:
163
+ df = st.session_state.processed_data
164
+
165
+ # Display results
166
+ st.success(f"βœ… Processed {len(df)} leads successfully!")
167
+
168
+ # Show detailed results
169
+ for idx, row in df.iterrows():
170
+ with st.expander(f"πŸ“‹ {row['name']} - {row['company']} {'🎯' if row.get('tone_used') else ''}"):
171
+ col1, col2, col3 = st.columns([2, 3, 1])
172
+
173
+ with col1:
174
+ st.subheader("πŸ“Š Scraped Information")
175
+ st.text_area("Company Info", row.get('scraped_info', 'No info scraped'), height=100, key=f"info_{idx}")
176
+
177
+ # Show generation settings if available
178
+ if row.get('tone_used'):
179
+ st.write(f"**Tone:** {row.get('tone_used', 'N/A')}")
180
+ st.write(f"**Temperature:** {row.get('temperature_used', 'N/A')}")
181
+
182
+ with col2:
183
+ st.subheader("πŸ“§ Generated Email")
184
+ subject = row.get('generated_subject', 'No subject generated')
185
+ email_body = row.get('generated_email', 'No email generated')
186
+
187
+ st.text_area("Subject", subject, height=50, key=f"subject_{idx}")
188
+ st.text_area("Email Body", email_body, height=250, key=f"email_{idx}")
189
+
190
+ with col3:
191
+ st.subheader("πŸ“ˆ Quality")
192
+ if subject and email_body:
193
+ subject_len = len(subject)
194
+ # Get main body without variations
195
+ main_body = email_body.split('--- VARIATIONS ---')[0].strip()
196
+ body_words = len(main_body.split())
197
+
198
+ # Quality indicators
199
+ if 15 <= subject_len <= 65:
200
+ st.success(f"βœ… Subject: {subject_len} chars")
201
+ else:
202
+ st.warning(f"⚠️ Subject: {subject_len} chars")
203
+
204
+ if 25 <= body_words <= 100:
205
+ st.success(f"βœ… Body: {body_words} words")
206
+ else:
207
+ st.warning(f"⚠️ Body: {body_words} words")
208
+
209
+ # Check for placeholders
210
+ if '[Your Name]' in email_body or '{' in email_body:
211
+ st.error("❌ Contains placeholders")
212
+ else:
213
+ st.success("βœ… No placeholders")
214
+
215
+ # Check for personalization
216
+ if row['name'] in main_body and row['company'] in main_body:
217
+ st.success("βœ… Well personalized")
218
+ else:
219
+ st.warning("⚠️ Low personalization")
220
+
221
+ # Check for CTA
222
+ cta_words = ['call', 'conversation', 'chat', 'discuss', 'talk', 'meeting']
223
+ if any(word in main_body.lower() for word in cta_words):
224
+ st.success("βœ… Has call-to-action")
225
+ else:
226
+ st.warning("⚠️ Weak call-to-action")
227
+
228
+ # Overall quality score
229
+ quality_score = 0
230
+ if 15 <= subject_len <= 65: quality_score += 20
231
+ if 25 <= body_words <= 100: quality_score += 25
232
+ if '[Your Name]' not in email_body: quality_score += 25
233
+ if row['name'] in main_body and row['company'] in main_body: quality_score += 20
234
+ if any(word in main_body.lower() for word in cta_words): quality_score += 10
235
+
236
+ if quality_score >= 80:
237
+ st.success(f"πŸ† Overall: {quality_score}% - Ready to send!")
238
+ elif quality_score >= 60:
239
+ st.warning(f"πŸ“ Overall: {quality_score}% - Needs polish")
240
+ else:
241
+ st.error(f"πŸ”§ Overall: {quality_score}% - Needs work")
242
+
243
+ # Quick copy button
244
+ email_text = f"Subject: {subject}\n\n{email_body}"
245
+ st.text_area("Copy Email", email_text, height=100, key=f"copy_{idx}")
246
+
247
+ # Export functionality
248
+ if st.button("πŸ“₯ Export to CSV"):
249
+ csv_data = df.to_csv(index=False)
250
+ st.download_button(
251
+ label="⬇️ Download CSV",
252
+ data=csv_data,
253
+ file_name=f"cold_emails_{datetime.now().strftime('%Y%m%d_%H%M%S')}.csv",
254
+ mime="text/csv"
255
+ )
256
+ else:
257
+ st.info("πŸ‘† Upload and process a CSV file to see results here")
258
+
259
+ with tab3:
260
+ st.header("Processing History")
261
+
262
+ # Load and display historical data
263
+ try:
264
+ history_df = load_from_database()
265
+ if not history_df.empty:
266
+ st.dataframe(history_df)
267
+
268
+ # Export history
269
+ if st.button("πŸ“₯ Export History"):
270
+ csv_data = history_df.to_csv(index=False)
271
+ st.download_button(
272
+ label="⬇️ Download History CSV",
273
+ data=csv_data,
274
+ file_name=f"email_history_{datetime.now().strftime('%Y%m%d_%H%M%S')}.csv",
275
+ mime="text/csv"
276
+ )
277
+ else:
278
+ st.info("No historical data found")
279
+ except Exception as e:
280
+ st.error(f"Error loading history: {str(e)}")
281
+
282
+ def process_leads(df, scrape_timeout, use_selenium, custom_model_path, tone, temperature, generate_variations):
283
+ """Process the uploaded leads with enhanced email generation"""
284
+ progress_bar = st.progress(0)
285
+ status_text = st.empty()
286
+
287
+ try:
288
+ # Initialize components
289
+ status_text.text("πŸ”§ Initializing scraper...")
290
+ scraper = LinkedInScraper(timeout=scrape_timeout, use_selenium=use_selenium)
291
+
292
+ status_text.text("πŸ€– Initializing AI model...")
293
+ if st.session_state.email_generator is None:
294
+ st.session_state.email_generator = EmailGenerator(custom_model_path)
295
+
296
+ email_gen = st.session_state.email_generator
297
+
298
+ # Process each lead
299
+ processed_data = []
300
+ total_leads = len(df)
301
+
302
+ for idx, row in df.iterrows():
303
+ status_text.text(f"πŸ” Processing {row['name']} ({idx + 1}/{total_leads})")
304
+
305
+ # Scrape information
306
+ scraped_info = scraper.scrape_linkedin_or_company(
307
+ row['linkedin_url'],
308
+ row['company']
309
+ )
310
+
311
+ # Generate email with new parameters
312
+ status_text.text(f"✍️ Generating email for {row['name']} ({tone} tone)...")
313
+
314
+ if generate_variations:
315
+ # Generate multiple variations
316
+ variations = email_gen.generate_multiple_variations(
317
+ row['name'],
318
+ row['company'],
319
+ scraped_info,
320
+ num_variations=3,
321
+ tone=tone
322
+ )
323
+
324
+ # Use the first variation as primary
325
+ subject = variations[0]['subject']
326
+ email_body = variations[0]['email_body']
327
+
328
+ # Store all variations in a formatted way
329
+ variations_text = "\n\n--- VARIATIONS ---\n"
330
+ for i, var in enumerate(variations, 1):
331
+ variations_text += f"\nVariation {i} ({var['tone']}):\n"
332
+ variations_text += f"Subject: {var['subject']}\n"
333
+ variations_text += f"Body: {var['email_body']}\n"
334
+
335
+ email_body += variations_text
336
+
337
+ else:
338
+ # Generate single email with specified parameters
339
+ subject, email_body = email_gen.generate_email(
340
+ row['name'],
341
+ row['company'],
342
+ scraped_info,
343
+ tone=tone,
344
+ temperature=temperature
345
+ )
346
+
347
+ # Add to processed data
348
+ processed_data.append({
349
+ 'name': row['name'],
350
+ 'email': row['email'],
351
+ 'company': row['company'],
352
+ 'linkedin_url': row['linkedin_url'],
353
+ 'scraped_info': scraped_info,
354
+ 'generated_subject': subject,
355
+ 'generated_email': email_body,
356
+ 'tone_used': tone,
357
+ 'temperature_used': temperature
358
+ })
359
+
360
+ # Update progress
361
+ progress_bar.progress((idx + 1) / total_leads)
362
+
363
+ # Convert to DataFrame and save
364
+ result_df = pd.DataFrame(processed_data)
365
+ st.session_state.processed_data = result_df
366
+
367
+ # Save to database
368
+ save_to_database(result_df)
369
+
370
+ status_text.text("βœ… Processing completed!")
371
+ st.success("πŸŽ‰ All leads processed successfully!")
372
+
373
+ # Show quality metrics
374
+ avg_subject_length = result_df['generated_subject'].str.len().mean()
375
+ avg_body_length = result_df['generated_email'].str.split().str.len().mean()
376
+
377
+ st.info(f"πŸ“Š Quality Metrics: Avg subject length: {avg_subject_length:.0f} chars, Avg body length: {avg_body_length:.0f} words")
378
+
379
+ except Exception as e:
380
+ st.error(f"❌ Error during processing: {str(e)}")
381
+ status_text.text("❌ Processing failed")
382
+
383
+ if __name__ == "__main__":
384
+ main()