Spaces:
Build error
Build error
chore: fix chatgpt step
Browse files
app.py
CHANGED
|
@@ -132,7 +132,7 @@ def encrypt_query_fn(query):
|
|
| 132 |
|
| 133 |
encrypted_tokens.append(encrypted_x)
|
| 134 |
|
| 135 |
-
print(
|
| 136 |
|
| 137 |
assert len({len(token) for token in encrypted_tokens}) == 1
|
| 138 |
|
|
@@ -355,7 +355,7 @@ def decrypt_fn(text) -> Dict:
|
|
| 355 |
else:
|
| 356 |
identified_df = pd.DataFrame(columns=["Identified Words", "Probability"])
|
| 357 |
|
| 358 |
-
print(
|
| 359 |
|
| 360 |
return anonymized_text, identified_df
|
| 361 |
|
|
@@ -380,25 +380,20 @@ def anonymization_with_fn(query):
|
|
| 380 |
|
| 381 |
def query_chatgpt_fn(anonymized_query, anonymized_document):
|
| 382 |
|
| 383 |
-
|
| 384 |
-
|
|
|
|
| 385 |
error_message = "Error ❌: Please generate the key first!"
|
| 386 |
-
return {
|
| 387 |
|
| 388 |
-
|
| 389 |
-
if not encryted_query_path.is_file():
|
| 390 |
error_message = "Error ❌: Please encrypt your query first!"
|
| 391 |
-
return {
|
| 392 |
-
|
| 393 |
-
decrypted_query_path = KEYS_DIR / "reconstructed_sentence"
|
| 394 |
-
if not decrypted_query_path.is_file():
|
| 395 |
-
error_message = "Error ❌: Please run the FHE computation first!"
|
| 396 |
-
return {anonymized_text_output: gr.update(value=error_message)}
|
| 397 |
|
| 398 |
prompt = read_txt(PROMPT_PATH)
|
| 399 |
|
| 400 |
# Prepare prompt
|
| 401 |
-
|
| 402 |
query = (
|
| 403 |
"Document content:\n```\n"
|
| 404 |
+ anonymized_document
|
|
@@ -407,7 +402,7 @@ def query_chatgpt_fn(anonymized_query, anonymized_document):
|
|
| 407 |
+ anonymized_query
|
| 408 |
+ "\n```"
|
| 409 |
)
|
| 410 |
-
print(
|
| 411 |
|
| 412 |
completion = client.chat.completions.create(
|
| 413 |
model="gpt-4-1106-preview", # Replace with "gpt-4" if available
|
|
@@ -438,7 +433,9 @@ def query_chatgpt_fn(anonymized_query, anonymized_document):
|
|
| 438 |
else:
|
| 439 |
processed_tokens.append(token)
|
| 440 |
deanonymized_response = "".join(processed_tokens)
|
| 441 |
-
|
|
|
|
|
|
|
| 442 |
|
| 443 |
|
| 444 |
demo = gr.Blocks(css=".markdown-body { font-size: 18px; }")
|
|
@@ -473,8 +470,7 @@ with demo:
|
|
| 473 |
|
| 474 |
with gr.Accordion("What is encrypted anonymization?", open=False):
|
| 475 |
gr.Markdown(
|
| 476 |
-
|
| 477 |
-
Anonymization is the process of removing personally identifiable information (PII)
|
| 478 |
from data to protect individual privacy.
|
| 479 |
|
| 480 |
To resolve trust issues when deploying anonymization as a cloud service, Fully Homomorphic
|
|
@@ -507,13 +503,16 @@ with demo:
|
|
| 507 |
|
| 508 |
########################## Main document Part ##########################
|
| 509 |
|
|
|
|
| 510 |
gr.Markdown("## Step 2: Private document")
|
| 511 |
|
| 512 |
with gr.Row():
|
| 513 |
with gr.Column():
|
| 514 |
gr.Markdown("**Original document:**")
|
| 515 |
gr.Markdown(
|
| 516 |
-
"""This document was retrieved from the
|
|
|
|
|
|
|
| 517 |
You can select and deselect sentences to customize the document that will be used
|
| 518 |
as the initial prompt for ChatGPT in step 5.
|
| 519 |
"""
|
|
@@ -522,7 +521,7 @@ with demo:
|
|
| 522 |
gr.Markdown("**Anonymized document:**")
|
| 523 |
gr.Markdown(
|
| 524 |
"""You can see below the anonymized text, replaced with hexademical strings, that
|
| 525 |
-
will be sent to ChatGPT.
|
| 526 |
|
| 527 |
ChatGPT will then be able to answer any queries about the document.
|
| 528 |
"""
|
|
@@ -554,7 +553,8 @@ with demo:
|
|
| 554 |
|
| 555 |
gr.Markdown(
|
| 556 |
"""Now, you can formulate a query. Please choose from the predefined options in
|
| 557 |
-
|
|
|
|
| 558 |
|
| 559 |
Remain concise and relevant to the context. Any off-topic query will not be processed.
|
| 560 |
"""
|
|
|
|
| 132 |
|
| 133 |
encrypted_tokens.append(encrypted_x)
|
| 134 |
|
| 135 |
+
print("Data encrypted ✅ on Client Side")
|
| 136 |
|
| 137 |
assert len({len(token) for token in encrypted_tokens}) == 1
|
| 138 |
|
|
|
|
| 355 |
else:
|
| 356 |
identified_df = pd.DataFrame(columns=["Identified Words", "Probability"])
|
| 357 |
|
| 358 |
+
print("Decryption done ✅ on Client Side")
|
| 359 |
|
| 360 |
return anonymized_text, identified_df
|
| 361 |
|
|
|
|
| 380 |
|
| 381 |
def query_chatgpt_fn(anonymized_query, anonymized_document):
|
| 382 |
|
| 383 |
+
print("------------ Step 5: ChatGPT communication")
|
| 384 |
+
|
| 385 |
+
if not (KEYS_DIR / f"{USER_ID}/evaluation_key").is_file():
|
| 386 |
error_message = "Error ❌: Please generate the key first!"
|
| 387 |
+
return {chatgpt_response_anonymized: gr.update(value=error_message)}
|
| 388 |
|
| 389 |
+
if not (CLIENT_DIR / f"{USER_ID}_encrypted_output").is_file():
|
|
|
|
| 390 |
error_message = "Error ❌: Please encrypt your query first!"
|
| 391 |
+
return {chatgpt_response_anonymized: gr.update(value=error_message)}
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 392 |
|
| 393 |
prompt = read_txt(PROMPT_PATH)
|
| 394 |
|
| 395 |
# Prepare prompt
|
| 396 |
+
initial_prompt = prompt + "\n"
|
| 397 |
query = (
|
| 398 |
"Document content:\n```\n"
|
| 399 |
+ anonymized_document
|
|
|
|
| 402 |
+ anonymized_query
|
| 403 |
+ "\n```"
|
| 404 |
)
|
| 405 |
+
print(f'initial_prompt:\n{initial_prompt}')
|
| 406 |
|
| 407 |
completion = client.chat.completions.create(
|
| 408 |
model="gpt-4-1106-preview", # Replace with "gpt-4" if available
|
|
|
|
| 433 |
else:
|
| 434 |
processed_tokens.append(token)
|
| 435 |
deanonymized_response = "".join(processed_tokens)
|
| 436 |
+
|
| 437 |
+
return {chatgpt_response_anonymized: gr.update(value=anonymized_response),
|
| 438 |
+
chatgpt_response_deanonymized: gr.update(value=deanonymized_response)}
|
| 439 |
|
| 440 |
|
| 441 |
demo = gr.Blocks(css=".markdown-body { font-size: 18px; }")
|
|
|
|
| 470 |
|
| 471 |
with gr.Accordion("What is encrypted anonymization?", open=False):
|
| 472 |
gr.Markdown(
|
| 473 |
+
"""Anonymization is the process of removing personally identifiable information (PII)
|
|
|
|
| 474 |
from data to protect individual privacy.
|
| 475 |
|
| 476 |
To resolve trust issues when deploying anonymization as a cloud service, Fully Homomorphic
|
|
|
|
| 503 |
|
| 504 |
########################## Main document Part ##########################
|
| 505 |
|
| 506 |
+
gr.Markdown("<hr />")
|
| 507 |
gr.Markdown("## Step 2: Private document")
|
| 508 |
|
| 509 |
with gr.Row():
|
| 510 |
with gr.Column():
|
| 511 |
gr.Markdown("**Original document:**")
|
| 512 |
gr.Markdown(
|
| 513 |
+
"""This document was retrieved from the
|
| 514 |
+
[Microsoft Presidio](https://huggingface.co/spaces/presidio/presidio_demo) demo.
|
| 515 |
+
|
| 516 |
You can select and deselect sentences to customize the document that will be used
|
| 517 |
as the initial prompt for ChatGPT in step 5.
|
| 518 |
"""
|
|
|
|
| 521 |
gr.Markdown("**Anonymized document:**")
|
| 522 |
gr.Markdown(
|
| 523 |
"""You can see below the anonymized text, replaced with hexademical strings, that
|
| 524 |
+
will be sent to ChatGPT.
|
| 525 |
|
| 526 |
ChatGPT will then be able to answer any queries about the document.
|
| 527 |
"""
|
|
|
|
| 553 |
|
| 554 |
gr.Markdown(
|
| 555 |
"""Now, you can formulate a query. Please choose from the predefined options in
|
| 556 |
+
<span style='color:grey'>“Queries examples”</span>" or craft a custom question in
|
| 557 |
+
the <span style='color:grey'>“Customized query”</span>" text box.
|
| 558 |
|
| 559 |
Remain concise and relevant to the context. Any off-topic query will not be processed.
|
| 560 |
"""
|