MusIre commited on
Commit
77d72f5
·
verified ·
1 Parent(s): c1b1c71

Update app.py

Browse files
Files changed (1) hide show
  1. app.py +0 -66
app.py CHANGED
@@ -4,72 +4,6 @@ import matplotlib.pyplot as plt
4
  import seaborn as sns
5
  import plotly.express as px
6
 
7
- num_rows = 20000
8
- df = pd.read_csv('emails.csv', on_bad_lines='skip', nrows=num_rows)
9
-
10
- def get_message(Series: pd.Series):
11
- result = pd.Series(index=Series.index)
12
- for row, message in enumerate(Series):
13
- message_words = message.split('\n')
14
- del message_words[:15]
15
- result.iloc[row] = ''.join(message_words).strip()
16
- return result
17
-
18
- def get_date(Series: pd.Series):
19
- result = pd.Series(index=Series.index)
20
- for row, message in enumerate(Series):
21
- message_words = message.split('\n')
22
- del message_words[0]
23
- del message_words[1:]
24
- result.iloc[row] = ''.join(message_words).strip()
25
- result.iloc[row] = result.iloc[row].replace('Date: ', '')
26
- print('Done parsing, converting to datetime format..')
27
- return pd.to_datetime(result)
28
-
29
- def get_sender_and_receiver(Series: pd.Series):
30
- sender = pd.Series(index = Series.index)
31
- recipient1 = pd.Series(index = Series.index)
32
- recipient2 = pd.Series(index = Series.index)
33
- recipient3 = pd.Series(index = Series.index)
34
-
35
- for row,message in enumerate(Series):
36
- message_words = message.split('\n')
37
- sender[row] = message_words[2].replace('From: ', '')
38
- recipient1[row] = message_words[3].replace('To: ', '')
39
- recipient2[row] = message_words[10].replace('X-cc: ', '')
40
- recipient3[row] = message_words[11].replace('X-bcc: ', '')
41
-
42
- return sender, recipient1, recipient2, recipient3
43
-
44
- def get_subject(Series: pd.Series):
45
- result = pd.Series(index = Series.index)
46
-
47
- for row, message in enumerate(Series):
48
- message_words = message.split('\n')
49
- message_words = message_words[4]
50
- result[row] = message_words.replace('Subject: ', '')
51
- return result
52
-
53
- def get_folder(Series: pd.Series):
54
- result = pd.Series(index = Series.index)
55
-
56
- for row, message in enumerate(Series):
57
- message_words = message.split('\n')
58
- message_words = message_words[12]
59
- result[row] = message_words.replace('X-Folder: ', '')
60
- return result
61
-
62
- df['text'] = get_message(df.message)
63
- df['sender'], df['recipient1'], df['recipient2'], df['recipient3'] = get_sender_and_receiver(df.message)
64
- df['Subject'] = get_subject(df.message)
65
- df['folder'] = get_folder(df.message)
66
- df['date'] = get_date(df.message)
67
-
68
- df = df.drop(['message', 'file'], axis = 1)
69
-
70
- df.head(100)
71
-
72
-
73
  import chromadb
74
  chroma_client = chromadb.Client()
75
 
 
4
  import seaborn as sns
5
  import plotly.express as px
6
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
7
  import chromadb
8
  chroma_client = chromadb.Client()
9