parahase / app.py
imseldrith's picture
Update app.py
c7337af
import gradio as gr
from bs4 import BeautifulSoup
from nltk.tokenize import word_tokenize
from nltk.corpus import wordnet
import random
import nltk
nltk.download('punkt')
nltk.download('wordnet')
def paraphrase_text(text):
# Tokenize the text
tokens = word_tokenize(text)
# Create a list to hold the paraphrased words
paraphrased_tokens = []
for token in tokens:
# Check if the token is a word
if token.isalpha():
# Get the synonyms of the word
synonyms = []
for syn in wordnet.synsets(token):
for lemma in syn.lemmas():
if lemma.name() != token:
synonyms.append(lemma.name())
# If there are synonyms available, choose a random one
if synonyms:
paraphrased_word = random.choice(synonyms)
# If no synonyms are available, use the original word
else:
paraphrased_word = token
# If the token is not a word, use it as-is
else:
paraphrased_word = token
# Add the paraphrased word to the list
paraphrased_tokens.append(paraphrased_word)
# Join the paraphrased tokens back into a string
paraphrased_text = ' '.join(paraphrased_tokens)
return paraphrased_text
def paraphrase_html(html_text):
# Parse the HTML using BeautifulSoup
soup = BeautifulSoup(html_text, 'html.parser')
# Find all the text nodes in the HTML
text_nodes = soup.find_all(text=True)
# Paraphrase the text nodes
for node in text_nodes:
node.replace_with(paraphrase_text(node.string))
# Return the paraphrased HTML
paraphrased_html = str(soup)
return paraphrased_html
inputs = gr.inputs.Textbox(label="Enter HTML text to paraphrase")
outputs = gr.outputs.HTML(label="Paraphrased HTML")
title = "HTML Paraphraser"
description = "Enter HTML text and get a paraphrased version in HTML format."
examples = [["<p>This is some <b>HTML</b> text to paraphrase.</p>"]]
gr.Interface(paraphrase_html, inputs, outputs, title=title, description=description, examples=examples).launch()