deepugaur commited on
Commit
8fe6c04
·
verified ·
1 Parent(s): cb24850

Create train_model.py

Browse files
Files changed (1) hide show
  1. train_model.py +52 -0
train_model.py ADDED
@@ -0,0 +1,52 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+
2
+
3
+ import numpy as np
4
+ import pandas as pd
5
+ import tensorflow as tf
6
+ from sklearn.model_selection import train_test_split
7
+ from tensorflow.keras.preprocessing.text import Tokenizer
8
+ from tensorflow.keras.preprocessing.sequence import pad_sequences
9
+ from tensorflow.keras.layers import Embedding, LSTM, Dense, Bidirectional, Dropout, Input
10
+ from tensorflow.keras.models import Model
11
+ from tensorflow.keras import regularizers
12
+ from tensorflow.keras.callbacks import EarlyStopping
13
+ from tensorflow.keras.optimizers import Adam
14
+
15
+ # Load and preprocess data
16
+ data = pd.read_csv("train prompt.csv", sep=',', quoting=3, encoding='ISO-8859-1', on_bad_lines='skip', engine='python')
17
+ data['label'] = data['label'].replace({'valid': 0, 'malicious': 1})
18
+
19
+ X = data['input'].values
20
+ y = data['label'].values
21
+ X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.2, random_state=42)
22
+
23
+ # Tokenizer and padding
24
+ tokenizer = Tokenizer(num_words=5000)
25
+ tokenizer.fit_on_texts(X_train)
26
+
27
+ X_train_seq = tokenizer.texts_to_sequences(X_train)
28
+ X_test_seq = tokenizer.texts_to_sequences(X_test)
29
+
30
+ max_length = 100
31
+ X_train_pad = pad_sequences(X_train_seq, maxlen=max_length)
32
+ X_test_pad = pad_sequences(X_test_seq, maxlen=max_length)
33
+
34
+ # Model definition
35
+ input_layer = Input(shape=(max_length,))
36
+ embedding_layer = Embedding(input_dim=5000, output_dim=128, input_length=max_length)(input_layer)
37
+ x = Bidirectional(LSTM(64, return_sequences=True, dropout=0.2, kernel_regularizer=regularizers.l2(0.01)))(embedding_layer)
38
+ x = Dropout(0.3)(x)
39
+ x = Bidirectional(LSTM(64, dropout=0.2, kernel_regularizer=regularizers.l2(0.01)))(x)
40
+ malicious_output = Dense(1, activation='sigmoid')(x)
41
+
42
+ model = Model(inputs=input_layer, outputs=malicious_output)
43
+ optimizer = Adam(learning_rate=0.0001)
44
+ model.compile(optimizer=optimizer, loss='binary_crossentropy', metrics=['accuracy'])
45
+
46
+ # Training the model
47
+ early_stopping = EarlyStopping(monitor='val_loss', patience=3, restore_best_weights=True)
48
+ model.fit(X_train_pad, y_train, epochs=5, batch_size=32, validation_data=(X_test_pad, y_test), callbacks=[early_stopping])
49
+
50
+ # Save the trained model
51
+ model.save("deep_learning_model.h5")
52
+ print("Model saved to deep_learning_model.h5")