selfconstruct3d commited on
Commit
c791818
·
verified ·
1 Parent(s): a967b13

Update README.md

Browse files
Files changed (1) hide show
  1. README.md +17 -28
README.md CHANGED
@@ -26,39 +26,28 @@ The model was trained on a multilingual dataset of cybersecurity and non-cyberse
26
 
27
  ```python
28
  from sentence_transformers import SentenceTransformer
29
- from sklearn.model_selection import train_test_split
30
- from sklearn.preprocessing import LabelEncoder
31
- import pandas as pd
32
- import joblib
33
  from huggingface_hub import hf_hub_download
 
34
 
35
- # Load your cleaned dataset
36
- df = pd.read_csv("your_dataset.csv") # Requires 'clean_text' and 'label' columns
37
-
38
- # Load the sentence transformer
39
  embedder = SentenceTransformer("intfloat/multilingual-e5-large")
40
 
41
- # Train-test split
42
- X_train, X_test, y_train, y_test = train_test_split(
43
- df["clean_text"],
44
- df["label"],
45
- test_size=0.05,
46
- stratify=df["label"],
47
- random_state=42
48
- )
49
 
50
- # Encode labels
51
- label_encoder = LabelEncoder()
52
- y_train_enc = label_encoder.fit_transform(y_train)
53
- y_test_enc = label_encoder.transform(y_test)
 
54
 
55
- # Generate sentence embeddings
56
- X_train_emb = embedder.encode(X_train.tolist(), convert_to_numpy=True, show_progress_bar=True)
57
- X_test_emb = embedder.encode(X_test.tolist(), convert_to_numpy=True, show_progress_bar=True)
58
 
59
- # Load the trained classifier
60
- model_path = hf_hub_download(repo_id="selfconstruct3d/cybersec_classifier", filename="cybersec_classifier.pkl")
61
- model = joblib.load(model_path)
62
 
63
- # Predict
64
- y_pred = model.predict(X_test_emb)
 
 
26
 
27
  ```python
28
  from sentence_transformers import SentenceTransformer
 
 
 
 
29
  from huggingface_hub import hf_hub_download
30
+ import joblib
31
 
32
+ # 1. Load the embedding model
 
 
 
33
  embedder = SentenceTransformer("intfloat/multilingual-e5-large")
34
 
35
+ # 2. Load the pretrained MLP classifier from Hugging Face Hub
36
+ model_path = hf_hub_download(repo_id="selfconstruct3d/cybersec_classifier", filename="cybersec_classifier.pkl")
37
+ model = joblib.load(model_path)
 
 
 
 
 
38
 
39
+ # 3. Example input texts (can be in English or German)
40
+ texts = [
41
+ "A new ransomware attack has affected critical infrastructure in Germany.",
42
+ "The local sports club hosted its annual summer festival this weekend."
43
+ ]
44
 
45
+ # 4. Generate embeddings
46
+ embeddings = embedder.encode(texts, convert_to_numpy=True, show_progress_bar=False)
 
47
 
48
+ # 5. Predict cybersecurity relevance
49
+ predictions = model.predict(embeddings)
 
50
 
51
+ # 6. Output results
52
+ for text, label in zip(texts, predictions):
53
+ print(f"Text: {text}\nPrediction: {label}\n")