Spaces:
Sleeping
Sleeping
| import numpy as np | |
| import pandas as pd | |
| import tensorflow as tf | |
| from sklearn.model_selection import train_test_split | |
| from tensorflow.keras.preprocessing.text import Tokenizer | |
| from tensorflow.keras.preprocessing.sequence import pad_sequences | |
| from tensorflow.keras.layers import Embedding, LSTM, Dense, Bidirectional, Dropout, Input | |
| from tensorflow.keras.models import Model | |
| from tensorflow.keras import regularizers | |
| from tensorflow.keras.callbacks import EarlyStopping | |
| from tensorflow.keras.optimizers import Adam | |
| # Load and preprocess data | |
| data = pd.read_csv("train prompt.csv", sep=',', quoting=3, encoding='ISO-8859-1', on_bad_lines='skip', engine='python') | |
| data['label'] = data['label'].replace({'valid': 0, 'malicious': 1}) | |
| X = data['input'].values | |
| y = data['label'].values | |
| X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.2, random_state=42) | |
| # Tokenizer and padding | |
| tokenizer = Tokenizer(num_words=5000) | |
| tokenizer.fit_on_texts(X_train) | |
| X_train_seq = tokenizer.texts_to_sequences(X_train) | |
| X_test_seq = tokenizer.texts_to_sequences(X_test) | |
| max_length = 100 | |
| X_train_pad = pad_sequences(X_train_seq, maxlen=max_length) | |
| X_test_pad = pad_sequences(X_test_seq, maxlen=max_length) | |
| # Model definition | |
| input_layer = Input(shape=(max_length,)) | |
| embedding_layer = Embedding(input_dim=5000, output_dim=128, input_length=max_length)(input_layer) | |
| x = Bidirectional(LSTM(64, return_sequences=True, dropout=0.2, kernel_regularizer=regularizers.l2(0.01)))(embedding_layer) | |
| x = Dropout(0.3)(x) | |
| x = Bidirectional(LSTM(64, dropout=0.2, kernel_regularizer=regularizers.l2(0.01)))(x) | |
| malicious_output = Dense(1, activation='sigmoid')(x) | |
| model = Model(inputs=input_layer, outputs=malicious_output) | |
| optimizer = Adam(learning_rate=0.0001) | |
| model.compile(optimizer=optimizer, loss='binary_crossentropy', metrics=['accuracy']) | |
| # Training the model | |
| early_stopping = EarlyStopping(monitor='val_loss', patience=3, restore_best_weights=True) | |
| model.fit(X_train_pad, y_train, epochs=5, batch_size=32, validation_data=(X_test_pad, y_test), callbacks=[early_stopping]) | |
| # Save the trained model | |
| model.save("deep_learning_model.h5") | |
| print("Model saved to deep_learning_model.h5") | |