Spaces:
Running
on
A10G
Running
on
A10G
Commit
·
fa826da
1
Parent(s):
f98590c
refactor markov_1 to simple_1
Browse files- .gitignore +1 -0
- app.py +1 -1
- demo_watermark.py +1 -1
- watermark_processor.py +4 -4
.gitignore
ADDED
|
@@ -0,0 +1 @@
|
|
|
|
|
|
|
| 1 |
+
__pycache__
|
app.py
CHANGED
|
@@ -31,7 +31,7 @@ arg_dict = {
|
|
| 31 |
'n_beams': 1,
|
| 32 |
'sampling_temp': 0.7,
|
| 33 |
'use_gpu': True,
|
| 34 |
-
'seeding_scheme': '
|
| 35 |
'gamma': 0.25,
|
| 36 |
'delta': 2.0,
|
| 37 |
'normalizers': '',
|
|
|
|
| 31 |
'n_beams': 1,
|
| 32 |
'sampling_temp': 0.7,
|
| 33 |
'use_gpu': True,
|
| 34 |
+
'seeding_scheme': 'simple_1',
|
| 35 |
'gamma': 0.25,
|
| 36 |
'delta': 2.0,
|
| 37 |
'normalizers': '',
|
demo_watermark.py
CHANGED
|
@@ -109,7 +109,7 @@ def parse_args():
|
|
| 109 |
parser.add_argument(
|
| 110 |
"--seeding_scheme",
|
| 111 |
type=str,
|
| 112 |
-
default="
|
| 113 |
help="Seeding scheme to use to generate the greenlists at each generation and verification step.",
|
| 114 |
)
|
| 115 |
parser.add_argument(
|
|
|
|
| 109 |
parser.add_argument(
|
| 110 |
"--seeding_scheme",
|
| 111 |
type=str,
|
| 112 |
+
default="simple_1",
|
| 113 |
help="Seeding scheme to use to generate the greenlists at each generation and verification step.",
|
| 114 |
)
|
| 115 |
parser.add_argument(
|
watermark_processor.py
CHANGED
|
@@ -35,7 +35,7 @@ class WatermarkBase:
|
|
| 35 |
vocab: list[int] = None,
|
| 36 |
gamma: float = 0.5,
|
| 37 |
delta: float = 2.0,
|
| 38 |
-
seeding_scheme: str = "
|
| 39 |
hash_key: int = 15485863, # just a large prime number to create a rng seed with sufficient bit width
|
| 40 |
select_green_tokens: bool = True,
|
| 41 |
):
|
|
@@ -56,7 +56,7 @@ class WatermarkBase:
|
|
| 56 |
if seeding_scheme is None:
|
| 57 |
seeding_scheme = self.seeding_scheme
|
| 58 |
|
| 59 |
-
if seeding_scheme == "
|
| 60 |
assert input_ids.shape[-1] >= 1, f"seeding_scheme={seeding_scheme} requires at least a 1 token prefix sequence to seed rng"
|
| 61 |
prev_token = input_ids[-1].item()
|
| 62 |
self.rng.manual_seed(self.hash_key * prev_token)
|
|
@@ -138,7 +138,7 @@ class WatermarkDetector(WatermarkBase):
|
|
| 138 |
self.z_threshold = z_threshold
|
| 139 |
self.rng = torch.Generator(device=self.device)
|
| 140 |
|
| 141 |
-
if self.seeding_scheme == "
|
| 142 |
self.min_prefix_len = 1
|
| 143 |
else:
|
| 144 |
raise NotImplementedError(f"Unexpected seeding_scheme: {self.seeding_scheme}")
|
|
@@ -149,7 +149,7 @@ class WatermarkDetector(WatermarkBase):
|
|
| 149 |
|
| 150 |
self.ignore_repeated_bigrams = ignore_repeated_bigrams
|
| 151 |
if self.ignore_repeated_bigrams:
|
| 152 |
-
assert self.seeding_scheme == "
|
| 153 |
|
| 154 |
|
| 155 |
def _compute_z_score(self, observed_count, T):
|
|
|
|
| 35 |
vocab: list[int] = None,
|
| 36 |
gamma: float = 0.5,
|
| 37 |
delta: float = 2.0,
|
| 38 |
+
seeding_scheme: str = "simple_1", # mostly unused/always default
|
| 39 |
hash_key: int = 15485863, # just a large prime number to create a rng seed with sufficient bit width
|
| 40 |
select_green_tokens: bool = True,
|
| 41 |
):
|
|
|
|
| 56 |
if seeding_scheme is None:
|
| 57 |
seeding_scheme = self.seeding_scheme
|
| 58 |
|
| 59 |
+
if seeding_scheme == "simple_1":
|
| 60 |
assert input_ids.shape[-1] >= 1, f"seeding_scheme={seeding_scheme} requires at least a 1 token prefix sequence to seed rng"
|
| 61 |
prev_token = input_ids[-1].item()
|
| 62 |
self.rng.manual_seed(self.hash_key * prev_token)
|
|
|
|
| 138 |
self.z_threshold = z_threshold
|
| 139 |
self.rng = torch.Generator(device=self.device)
|
| 140 |
|
| 141 |
+
if self.seeding_scheme == "simple_1":
|
| 142 |
self.min_prefix_len = 1
|
| 143 |
else:
|
| 144 |
raise NotImplementedError(f"Unexpected seeding_scheme: {self.seeding_scheme}")
|
|
|
|
| 149 |
|
| 150 |
self.ignore_repeated_bigrams = ignore_repeated_bigrams
|
| 151 |
if self.ignore_repeated_bigrams:
|
| 152 |
+
assert self.seeding_scheme == "simple_1", "No repeated bigram credit variant assumes the single token seeding scheme."
|
| 153 |
|
| 154 |
|
| 155 |
def _compute_z_score(self, observed_count, T):
|