Spaces:
Sleeping
Sleeping
Delete train_model.py
Browse files- train_model.py +0 -60
train_model.py
DELETED
@@ -1,60 +0,0 @@
|
|
1 |
-
# -*- coding: utf-8 -*-
|
2 |
-
"""train_model.ipynb
|
3 |
-
|
4 |
-
Automatically generated by Colab.
|
5 |
-
|
6 |
-
Original file is located at
|
7 |
-
https://colab.research.google.com/drive/1R-i5F70PPmj1iBbLI_xkYtDWxwgf9YEw
|
8 |
-
"""
|
9 |
-
|
10 |
-
import numpy as np
|
11 |
-
import pandas as pd
|
12 |
-
import tensorflow as tf
|
13 |
-
from sklearn.model_selection import train_test_split
|
14 |
-
from tensorflow.keras.preprocessing.text import Tokenizer
|
15 |
-
from tensorflow.keras.preprocessing.sequence import pad_sequences
|
16 |
-
from tensorflow.keras.layers import Embedding, LSTM, Dense, Bidirectional, Dropout, Input
|
17 |
-
from tensorflow.keras.models import Model
|
18 |
-
from tensorflow.keras import regularizers
|
19 |
-
from tensorflow.keras.callbacks import EarlyStopping
|
20 |
-
from tensorflow.keras.optimizers import Adam
|
21 |
-
|
22 |
-
# Load and preprocess data
|
23 |
-
data = pd.read_csv("train prompt.csv", sep=',', quoting=3, encoding='ISO-8859-1', on_bad_lines='skip', engine='python')
|
24 |
-
data['label'] = data['label'].replace({'valid': 0, 'malicious': 1})
|
25 |
-
|
26 |
-
X = data['input'].values
|
27 |
-
y = data['label'].values
|
28 |
-
X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.2, random_state=42)
|
29 |
-
|
30 |
-
# Tokenizer and padding
|
31 |
-
tokenizer = Tokenizer(num_words=5000)
|
32 |
-
tokenizer.fit_on_texts(X_train)
|
33 |
-
|
34 |
-
X_train_seq = tokenizer.texts_to_sequences(X_train)
|
35 |
-
X_test_seq = tokenizer.texts_to_sequences(X_test)
|
36 |
-
|
37 |
-
max_length = 100
|
38 |
-
X_train_pad = pad_sequences(X_train_seq, maxlen=max_length)
|
39 |
-
X_test_pad = pad_sequences(X_test_seq, maxlen=max_length)
|
40 |
-
|
41 |
-
# Model definition
|
42 |
-
input_layer = Input(shape=(max_length,))
|
43 |
-
embedding_layer = Embedding(input_dim=5000, output_dim=128, input_length=max_length)(input_layer)
|
44 |
-
x = Bidirectional(LSTM(64, return_sequences=True, dropout=0.2, kernel_regularizer=regularizers.l2(0.01)))(embedding_layer)
|
45 |
-
x = Dropout(0.3)(x)
|
46 |
-
x = Bidirectional(LSTM(64, dropout=0.2, kernel_regularizer=regularizers.l2(0.01)))(x)
|
47 |
-
malicious_output = Dense(1, activation='sigmoid')(x)
|
48 |
-
|
49 |
-
model = Model(inputs=input_layer, outputs=malicious_output)
|
50 |
-
optimizer = Adam(learning_rate=0.0001)
|
51 |
-
model.compile(optimizer=optimizer, loss='binary_crossentropy', metrics=['accuracy'])
|
52 |
-
|
53 |
-
# Training the model
|
54 |
-
early_stopping = EarlyStopping(monitor='val_loss', patience=3, restore_best_weights=True)
|
55 |
-
model.fit(X_train_pad, y_train, epochs=5, batch_size=32, validation_data=(X_test_pad, y_test), callbacks=[early_stopping])
|
56 |
-
|
57 |
-
# Save the trained model
|
58 |
-
model.save("deep_learning_model.h5")
|
59 |
-
print("Model saved to deep_learning_model.h5")
|
60 |
-
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|