deepugaur commited on
Commit
b6046df
·
verified ·
1 Parent(s): 7ca4fc3

Delete train_model.py

Browse files
Files changed (1) hide show
  1. train_model.py +0 -60
train_model.py DELETED
@@ -1,60 +0,0 @@
1
- # -*- coding: utf-8 -*-
2
- """train_model.ipynb
3
-
4
- Automatically generated by Colab.
5
-
6
- Original file is located at
7
- https://colab.research.google.com/drive/1R-i5F70PPmj1iBbLI_xkYtDWxwgf9YEw
8
- """
9
-
10
- import numpy as np
11
- import pandas as pd
12
- import tensorflow as tf
13
- from sklearn.model_selection import train_test_split
14
- from tensorflow.keras.preprocessing.text import Tokenizer
15
- from tensorflow.keras.preprocessing.sequence import pad_sequences
16
- from tensorflow.keras.layers import Embedding, LSTM, Dense, Bidirectional, Dropout, Input
17
- from tensorflow.keras.models import Model
18
- from tensorflow.keras import regularizers
19
- from tensorflow.keras.callbacks import EarlyStopping
20
- from tensorflow.keras.optimizers import Adam
21
-
22
- # Load and preprocess data
23
- data = pd.read_csv("train prompt.csv", sep=',', quoting=3, encoding='ISO-8859-1', on_bad_lines='skip', engine='python')
24
- data['label'] = data['label'].replace({'valid': 0, 'malicious': 1})
25
-
26
- X = data['input'].values
27
- y = data['label'].values
28
- X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.2, random_state=42)
29
-
30
- # Tokenizer and padding
31
- tokenizer = Tokenizer(num_words=5000)
32
- tokenizer.fit_on_texts(X_train)
33
-
34
- X_train_seq = tokenizer.texts_to_sequences(X_train)
35
- X_test_seq = tokenizer.texts_to_sequences(X_test)
36
-
37
- max_length = 100
38
- X_train_pad = pad_sequences(X_train_seq, maxlen=max_length)
39
- X_test_pad = pad_sequences(X_test_seq, maxlen=max_length)
40
-
41
- # Model definition
42
- input_layer = Input(shape=(max_length,))
43
- embedding_layer = Embedding(input_dim=5000, output_dim=128, input_length=max_length)(input_layer)
44
- x = Bidirectional(LSTM(64, return_sequences=True, dropout=0.2, kernel_regularizer=regularizers.l2(0.01)))(embedding_layer)
45
- x = Dropout(0.3)(x)
46
- x = Bidirectional(LSTM(64, dropout=0.2, kernel_regularizer=regularizers.l2(0.01)))(x)
47
- malicious_output = Dense(1, activation='sigmoid')(x)
48
-
49
- model = Model(inputs=input_layer, outputs=malicious_output)
50
- optimizer = Adam(learning_rate=0.0001)
51
- model.compile(optimizer=optimizer, loss='binary_crossentropy', metrics=['accuracy'])
52
-
53
- # Training the model
54
- early_stopping = EarlyStopping(monitor='val_loss', patience=3, restore_best_weights=True)
55
- model.fit(X_train_pad, y_train, epochs=5, batch_size=32, validation_data=(X_test_pad, y_test), callbacks=[early_stopping])
56
-
57
- # Save the trained model
58
- model.save("deep_learning_model.h5")
59
- print("Model saved to deep_learning_model.h5")
60
-