Spaces:
Sleeping
Sleeping
Commit
·
a7d76f1
1
Parent(s):
a230391
Added a welcome, paper tldr tab
Browse files- app.py +1 -1
- demo_watermark.py +90 -45
app.py
CHANGED
|
@@ -35,7 +35,7 @@ arg_dict = {
|
|
| 35 |
'sampling_temp': 0.7,
|
| 36 |
'use_gpu': True,
|
| 37 |
'seeding_scheme': 'simple_1',
|
| 38 |
-
'gamma': 0.
|
| 39 |
'delta': 2.0,
|
| 40 |
'normalizers': '',
|
| 41 |
'ignore_repeated_bigrams': False,
|
|
|
|
| 35 |
'sampling_temp': 0.7,
|
| 36 |
'use_gpu': True,
|
| 37 |
'seeding_scheme': 'simple_1',
|
| 38 |
+
'gamma': 0.5,
|
| 39 |
'delta': 2.0,
|
| 40 |
'normalizers': '',
|
| 41 |
'ignore_repeated_bigrams': False,
|
demo_watermark.py
CHANGED
|
@@ -343,49 +343,63 @@ def run_gradio(args, model=None, device=None, tokenizer=None):
|
|
| 343 |
[](https://github.com/jwkirchenbauer/lm-watermarking)
|
| 344 |
"""
|
| 345 |
)
|
| 346 |
-
|
| 347 |
-
# pass
|
| 348 |
-
#  # buggy
|
| 349 |
-
|
| 350 |
-
with gr.Accordion("Understanding the output metrics",open=False):
|
| 351 |
-
gr.Markdown(
|
| 352 |
-
"""
|
| 353 |
-
- `z-score threshold` : The cuttoff for the hypothesis test
|
| 354 |
-
- `Tokens Counted (T)` : The number of tokens in the output that were counted by the detection algorithm.
|
| 355 |
-
The first token is ommitted in the simple, single token seeding scheme since there is no way to generate
|
| 356 |
-
a greenlist for it as it has no prefix token(s). Under the "Ignore Bigram Repeats" detection algorithm,
|
| 357 |
-
described in the bottom panel, this can be much less than the total number of tokens generated if there is a lot of repetition.
|
| 358 |
-
- `# Tokens in Greenlist` : The number of tokens that were observed to fall in their respective greenlist
|
| 359 |
-
- `Fraction of T in Greenlist` : The `# Tokens in Greenlist` / `T`. This is expected to be approximately `gamma` for human/unwatermarked text.
|
| 360 |
-
- `z-score` : The test statistic for the detection hypothesis test. If larger than the `z-score threshold`
|
| 361 |
-
we "reject the null hypothesis" that the text is human/unwatermarked, and conclude it is watermarked
|
| 362 |
-
- `p value` : The likelihood of observing the computed `z-score` under the null hypothesis. This is the likelihood of
|
| 363 |
-
observing the `Fraction of T in Greenlist` given that the text was generated without knowledge of the watermark procedure/greenlists.
|
| 364 |
-
If this is extremely _small_ we are confident that this many green tokens was not chosen by random chance.
|
| 365 |
-
- `prediction` : The outcome of the hypothesis test - whether the observed `z-score` was higher than the `z-score threshold`
|
| 366 |
-
- `confidence` : If we reject the null hypothesis, and the `prediction` is "Watermarked", then we report 1-`p value` to represent
|
| 367 |
-
the confidence of the detection based on the unlikeliness of this `z-score` observation.
|
| 368 |
-
"""
|
| 369 |
-
)
|
| 370 |
-
|
| 371 |
-
with gr.Accordion("A note on model capability",open=True):
|
| 372 |
-
gr.Markdown(
|
| 373 |
-
"""
|
| 374 |
-
This demo uses open-source language models that fit on a single GPU. These models are less powerful than proprietary commercial tools like ChatGPT, Claude, or Bard.
|
| 375 |
-
|
| 376 |
-
Importantly, we use a language model that is designed to "complete" your prompt, and not a model this is fine-tuned to follow instructions.
|
| 377 |
-
For best results, prompt the model with a few sentences that form the beginning of a paragraph, and then allow it to "continue" your paragraph.
|
| 378 |
-
Some examples include the opening paragraph of a wikipedia article, or the first few sentences of a story.
|
| 379 |
-
Longer prompts that end mid-sentence will result in more fluent generations.
|
| 380 |
-
"""
|
| 381 |
-
)
|
| 382 |
-
gr.Markdown(f"Language model: {args.model_name_or_path} {'(float16 mode)' if args.load_fp16 else ''}")
|
| 383 |
|
| 384 |
# Construct state for parameters, define updates and toggles
|
| 385 |
default_prompt = args.__dict__.pop("default_prompt")
|
| 386 |
session_args = gr.State(value=args)
|
| 387 |
|
| 388 |
-
with gr.Tab("
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 389 |
|
| 390 |
with gr.Row():
|
| 391 |
prompt = gr.Textbox(label=f"Prompt", interactive=True,lines=10,max_lines=10, value=default_prompt)
|
|
@@ -463,7 +477,8 @@ def run_gradio(args, model=None, device=None, tokenizer=None):
|
|
| 463 |
with gr.Column(scale=1):
|
| 464 |
select_green_tokens = gr.Checkbox(label="Select 'greenlist' from partition", value=args.select_green_tokens)
|
| 465 |
|
| 466 |
-
|
|
|
|
| 467 |
gr.Markdown(
|
| 468 |
"""
|
| 469 |
#### Generation Parameters:
|
|
@@ -515,6 +530,27 @@ def run_gradio(args, model=None, device=None, tokenizer=None):
|
|
| 515 |
"""
|
| 516 |
)
|
| 517 |
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 518 |
gr.HTML("""
|
| 519 |
<p>For faster inference without waiting in queue, you may duplicate the space and upgrade to GPU in settings.
|
| 520 |
Follow the github link at the top and host the demo on your own GPU hardware to test out larger models.
|
|
@@ -532,7 +568,8 @@ def run_gradio(args, model=None, device=None, tokenizer=None):
|
|
| 532 |
output_without_watermark.change(fn=detect_partial, inputs=[output_without_watermark,session_args], outputs=[without_watermark_detection_result,session_args])
|
| 533 |
output_with_watermark.change(fn=detect_partial, inputs=[output_with_watermark,session_args], outputs=[with_watermark_detection_result,session_args])
|
| 534 |
# Register main detection tab click
|
| 535 |
-
detect_btn.click(fn=detect_partial, inputs=[detection_input,session_args], outputs=[detection_result, session_args])
|
|
|
|
| 536 |
|
| 537 |
# State management logic
|
| 538 |
# update callbacks that change the state dict
|
|
@@ -624,10 +661,15 @@ def main(args):
|
|
| 624 |
model, tokenizer, device = load_model(args)
|
| 625 |
else:
|
| 626 |
model, tokenizer, device = None, None, None
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 627 |
|
| 628 |
-
|
| 629 |
-
|
| 630 |
-
|
| 631 |
"The diamondback terrapin or simply terrapin (Malaclemys terrapin) is a "
|
| 632 |
"species of turtle native to the brackish coastal tidal marshes of the "
|
| 633 |
"Northeastern and southern United States, and in Bermuda.[6] It belongs "
|
|
@@ -648,9 +690,12 @@ def main(args):
|
|
| 648 |
"or white. All have a unique pattern of wiggly, black markings or spots "
|
| 649 |
"on their body and head. The diamondback terrapin has large webbed "
|
| 650 |
"feet.[9] The species is"
|
| 651 |
-
|
| 652 |
|
| 653 |
-
|
|
|
|
|
|
|
|
|
|
| 654 |
|
| 655 |
term_width = 80
|
| 656 |
print("#"*term_width)
|
|
|
|
| 343 |
[](https://github.com/jwkirchenbauer/lm-watermarking)
|
| 344 |
"""
|
| 345 |
)
|
| 346 |
+
gr.Markdown(f"Language model: {args.model_name_or_path} {'(float16 mode)' if args.load_fp16 else ''}")
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 347 |
|
| 348 |
# Construct state for parameters, define updates and toggles
|
| 349 |
default_prompt = args.__dict__.pop("default_prompt")
|
| 350 |
session_args = gr.State(value=args)
|
| 351 |
|
| 352 |
+
with gr.Tab("Welcome"):
|
| 353 |
+
with gr.Row():
|
| 354 |
+
with gr.Column(scale=2):
|
| 355 |
+
gr.Markdown(
|
| 356 |
+
"""
|
| 357 |
+
Potential harms of large language models can be mitigated by *watermarking* a model's output.
|
| 358 |
+
*Watermarks* are embedded signals in the generated text that are invisible to humans but algorithmically
|
| 359 |
+
detectable, that allow *anyone* to later check whether a given span of text
|
| 360 |
+
was likely to have been generated by a model that uses the watermark.
|
| 361 |
+
|
| 362 |
+
This space showcases a watermarking approach that can be applied to _any_ generative language model.
|
| 363 |
+
For demonstration purposes, the space serves a "small" multi-billion parameter model (see the following note for caveats due to small size).
|
| 364 |
+
"""
|
| 365 |
+
)
|
| 366 |
+
with gr.Accordion("A note on model generation quality",open=False):
|
| 367 |
+
gr.Markdown(
|
| 368 |
+
"""
|
| 369 |
+
This demo uses open-source language models that fit on a single GPU. These models are less powerful than proprietary commercial tools like ChatGPT, Claude, or Bard.
|
| 370 |
+
|
| 371 |
+
Importantly, we use a language model that is designed to "complete" your prompt, and not a model this is fine-tuned to follow instructions.
|
| 372 |
+
For best results, prompt the model with a few sentences that form the beginning of a paragraph, and then allow it to "continue" your paragraph.
|
| 373 |
+
Some examples include the opening paragraph of a wikipedia article, or the first few sentences of a story.
|
| 374 |
+
Longer prompts that end mid-sentence will result in more fluent generations.
|
| 375 |
+
"""
|
| 376 |
+
)
|
| 377 |
+
gr.Markdown(
|
| 378 |
+
"""
|
| 379 |
+
**[Generate & Detect]**: The first tab shows that the watermark can be embedded with
|
| 380 |
+
negligible impact on text quality. You can try any prompt and compare the quality of
|
| 381 |
+
normal text (*Output Without Watermark*) to the watermarked text (*Output With Watermark*) below it.
|
| 382 |
+
Metrics on the right show that the watermark can be reliably detected.
|
| 383 |
+
Detection is very efficient and does not use the language model or its parameters.
|
| 384 |
+
|
| 385 |
+
**[Detector Only]**: You can also copy-paste the watermarked text (or any other text)
|
| 386 |
+
into the second tab. This can be used to see how many sentences you could remove and still detect the watermark.
|
| 387 |
+
You can also verify here that the detection has, by design, a low false-positive rate;
|
| 388 |
+
This means that human-generated text that you copy into this detector will not be marked as machine-generated.
|
| 389 |
+
|
| 390 |
+
You can find more details on how this watermark functions in our [ArXiv preprint](https://arxiv.org/abs/2301.10226).
|
| 391 |
+
"""
|
| 392 |
+
)
|
| 393 |
+
|
| 394 |
+
with gr.Column(scale=1):
|
| 395 |
+
gr.Markdown(
|
| 396 |
+
"""
|
| 397 |
+

|
| 398 |
+
"""
|
| 399 |
+
)
|
| 400 |
+
|
| 401 |
+
|
| 402 |
+
with gr.Tab("Generate & Detect"):
|
| 403 |
|
| 404 |
with gr.Row():
|
| 405 |
prompt = gr.Textbox(label=f"Prompt", interactive=True,lines=10,max_lines=10, value=default_prompt)
|
|
|
|
| 477 |
with gr.Column(scale=1):
|
| 478 |
select_green_tokens = gr.Checkbox(label="Select 'greenlist' from partition", value=args.select_green_tokens)
|
| 479 |
|
| 480 |
+
|
| 481 |
+
with gr.Accordion("What do the settings do?",open=False):
|
| 482 |
gr.Markdown(
|
| 483 |
"""
|
| 484 |
#### Generation Parameters:
|
|
|
|
| 530 |
"""
|
| 531 |
)
|
| 532 |
|
| 533 |
+
with gr.Accordion("What do the output metrics mean?",open=False):
|
| 534 |
+
gr.Markdown(
|
| 535 |
+
"""
|
| 536 |
+
- `z-score threshold` : The cuttoff for the hypothesis test
|
| 537 |
+
- `Tokens Counted (T)` : The number of tokens in the output that were counted by the detection algorithm.
|
| 538 |
+
The first token is ommitted in the simple, single token seeding scheme since there is no way to generate
|
| 539 |
+
a greenlist for it as it has no prefix token(s). Under the "Ignore Bigram Repeats" detection algorithm,
|
| 540 |
+
described in the bottom panel, this can be much less than the total number of tokens generated if there is a lot of repetition.
|
| 541 |
+
- `# Tokens in Greenlist` : The number of tokens that were observed to fall in their respective greenlist
|
| 542 |
+
- `Fraction of T in Greenlist` : The `# Tokens in Greenlist` / `T`. This is expected to be approximately `gamma` for human/unwatermarked text.
|
| 543 |
+
- `z-score` : The test statistic for the detection hypothesis test. If larger than the `z-score threshold`
|
| 544 |
+
we "reject the null hypothesis" that the text is human/unwatermarked, and conclude it is watermarked
|
| 545 |
+
- `p value` : The likelihood of observing the computed `z-score` under the null hypothesis. This is the likelihood of
|
| 546 |
+
observing the `Fraction of T in Greenlist` given that the text was generated without knowledge of the watermark procedure/greenlists.
|
| 547 |
+
If this is extremely _small_ we are confident that this many green tokens was not chosen by random chance.
|
| 548 |
+
- `prediction` : The outcome of the hypothesis test - whether the observed `z-score` was higher than the `z-score threshold`
|
| 549 |
+
- `confidence` : If we reject the null hypothesis, and the `prediction` is "Watermarked", then we report 1-`p value` to represent
|
| 550 |
+
the confidence of the detection based on the unlikeliness of this `z-score` observation.
|
| 551 |
+
"""
|
| 552 |
+
)
|
| 553 |
+
|
| 554 |
gr.HTML("""
|
| 555 |
<p>For faster inference without waiting in queue, you may duplicate the space and upgrade to GPU in settings.
|
| 556 |
Follow the github link at the top and host the demo on your own GPU hardware to test out larger models.
|
|
|
|
| 568 |
output_without_watermark.change(fn=detect_partial, inputs=[output_without_watermark,session_args], outputs=[without_watermark_detection_result,session_args])
|
| 569 |
output_with_watermark.change(fn=detect_partial, inputs=[output_with_watermark,session_args], outputs=[with_watermark_detection_result,session_args])
|
| 570 |
# Register main detection tab click
|
| 571 |
+
# detect_btn.click(fn=detect_partial, inputs=[detection_input,session_args], outputs=[detection_result, session_args])
|
| 572 |
+
detect_btn.click(fn=detect_partial, inputs=[detection_input,session_args], outputs=[detection_result, session_args], api_name="detection")
|
| 573 |
|
| 574 |
# State management logic
|
| 575 |
# update callbacks that change the state dict
|
|
|
|
| 661 |
model, tokenizer, device = load_model(args)
|
| 662 |
else:
|
| 663 |
model, tokenizer, device = None, None, None
|
| 664 |
+
tokenizer = AutoTokenizer.from_pretrained(args.model_name_or_path)
|
| 665 |
+
if args.use_gpu:
|
| 666 |
+
device = "cuda" if torch.cuda.is_available() else "cpu"
|
| 667 |
+
else:
|
| 668 |
+
device = "cpu"
|
| 669 |
|
| 670 |
+
|
| 671 |
+
# terrapin example
|
| 672 |
+
input_text = (
|
| 673 |
"The diamondback terrapin or simply terrapin (Malaclemys terrapin) is a "
|
| 674 |
"species of turtle native to the brackish coastal tidal marshes of the "
|
| 675 |
"Northeastern and southern United States, and in Bermuda.[6] It belongs "
|
|
|
|
| 690 |
"or white. All have a unique pattern of wiggly, black markings or spots "
|
| 691 |
"on their body and head. The diamondback terrapin has large webbed "
|
| 692 |
"feet.[9] The species is"
|
| 693 |
+
)
|
| 694 |
|
| 695 |
+
args.default_prompt = input_text
|
| 696 |
+
|
| 697 |
+
# Generate and detect, report to stdout
|
| 698 |
+
if not args.skip_model_load:
|
| 699 |
|
| 700 |
term_width = 80
|
| 701 |
print("#"*term_width)
|