Spaces:
Running
Running
Upload folder using huggingface_hub
Browse files
__init__.py
CHANGED
@@ -1,9 +1 @@
|
|
1 |
-
#
|
2 |
-
from .masking import (
|
3 |
-
mask_email, mask_name, mask_date,
|
4 |
-
mask_ssn, mask_itin, mask_phone,
|
5 |
-
mask_email_udf, mask_name_udf, mask_date_udf,
|
6 |
-
mask_ssn_udf, mask_itin_udf, mask_phone_udf
|
7 |
-
)
|
8 |
-
|
9 |
-
from .utils import apply_masking
|
|
|
1 |
+
# Init for tests
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
__pycache__/__init__.cpython-311.pyc
ADDED
Binary file (135 Bytes). View file
|
|
__pycache__/test_masking.cpython-311-pytest-8.4.1.pyc
ADDED
Binary file (13.8 kB). View file
|
|
__pycache__/test_schema_masking.cpython-311-pytest-8.4.1.pyc
ADDED
Binary file (4.46 kB). View file
|
|
test_masking.py
ADDED
@@ -0,0 +1,32 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
1 |
+
from anonyspark.masking import (
|
2 |
+
mask_email, mask_name, mask_date,
|
3 |
+
mask_ssn, mask_itin, mask_phone
|
4 |
+
)
|
5 |
+
|
6 |
+
def test_mask_email():
|
7 |
+
assert mask_email("[email protected]") == "***@example.com"
|
8 |
+
assert mask_email("") is None
|
9 |
+
assert mask_email(None) is None
|
10 |
+
|
11 |
+
def test_mask_name():
|
12 |
+
assert mask_name("John") == "J***"
|
13 |
+
assert mask_name("") is None
|
14 |
+
assert mask_name(None) is None
|
15 |
+
|
16 |
+
def test_mask_date():
|
17 |
+
assert mask_date("1991-08-14") == "***-**-14"
|
18 |
+
assert mask_date("invalid") is None
|
19 |
+
assert mask_date(None) is None
|
20 |
+
|
21 |
+
def test_mask_ssn():
|
22 |
+
assert mask_ssn("123-45-6789") == "***-**-6789"
|
23 |
+
assert mask_ssn("invalid") is None
|
24 |
+
|
25 |
+
def test_mask_itin():
|
26 |
+
assert mask_itin("912-73-1234") == "***-**-1234"
|
27 |
+
assert mask_itin("123-45-6789") is None
|
28 |
+
|
29 |
+
def test_mask_phone():
|
30 |
+
assert mask_phone("123-456-7890") == "***-***-7890"
|
31 |
+
assert mask_phone("(123) 456-7890") == "***-***-7890"
|
32 |
+
assert mask_phone("invalid") is None
|
test_schema_masking.py
ADDED
@@ -0,0 +1,40 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
1 |
+
# tests/test_schema_masking.py
|
2 |
+
|
3 |
+
import sys
|
4 |
+
import os
|
5 |
+
|
6 |
+
sys.path.append("/content/anonyspark")
|
7 |
+
|
8 |
+
from pyspark.sql import SparkSession
|
9 |
+
from anonyspark.utils import apply_masking
|
10 |
+
|
11 |
+
def test_schema_masking():
|
12 |
+
spark = SparkSession.builder.master("local[1]").appName("Test").getOrCreate()
|
13 |
+
|
14 |
+
df = spark.createDataFrame([{
|
15 |
+
"email": "[email protected]",
|
16 |
+
"name": "John",
|
17 |
+
"dob": "1991-08-14",
|
18 |
+
"ssn": "123-45-6789",
|
19 |
+
"itin": "912-73-1234",
|
20 |
+
"phone": "123-456-7890"
|
21 |
+
}])
|
22 |
+
|
23 |
+
schema = {
|
24 |
+
"email": "email",
|
25 |
+
"name": "name",
|
26 |
+
"dob": "dob",
|
27 |
+
"ssn": "ssn",
|
28 |
+
"itin": "itin",
|
29 |
+
"phone": "phone"
|
30 |
+
}
|
31 |
+
|
32 |
+
masked_df = apply_masking(df, schema)
|
33 |
+
result = masked_df.collect()[0].asDict()
|
34 |
+
|
35 |
+
assert result["masked_email"] == "***@example.com"
|
36 |
+
assert result["masked_name"] == "J***"
|
37 |
+
assert result["masked_dob"] == "***-**-14"
|
38 |
+
assert result["masked_ssn"] == "***-**-6789"
|
39 |
+
assert result["masked_itin"] == "***-**-1234"
|
40 |
+
assert result["masked_phone"] == "***-***-7890"
|