Spaces:

imseldrith
/

parahase

Sleeping

File size: 2,147 Bytes

7f15dc6
 
 
 
 
ff59b44
2eba18a
c7337af
7f15dc6
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
0118db1
7f15dc6
 
 
 
 
 
 
 
 
 
 
4e6f4f7
7f15dc6
 
4e6f4f7
7f15dc6
 
 
4e6f4f7
7f15dc6

import gradio as gr
from bs4 import BeautifulSoup
from nltk.tokenize import word_tokenize
from nltk.corpus import wordnet
import random
import nltk
nltk.download('punkt')
nltk.download('wordnet')
def paraphrase_text(text):
    # Tokenize the text
    tokens = word_tokenize(text)
    # Create a list to hold the paraphrased words
    paraphrased_tokens = []
    for token in tokens:
        # Check if the token is a word
        if token.isalpha():
            # Get the synonyms of the word
            synonyms = []
            for syn in wordnet.synsets(token):
                for lemma in syn.lemmas():
                    if lemma.name() != token:
                        synonyms.append(lemma.name())
            # If there are synonyms available, choose a random one
            if synonyms:
                paraphrased_word = random.choice(synonyms)
            # If no synonyms are available, use the original word
            else:
                paraphrased_word = token
        # If the token is not a word, use it as-is
        else:
            paraphrased_word = token
        # Add the paraphrased word to the list
        paraphrased_tokens.append(paraphrased_word)
    # Join the paraphrased tokens back into a string
    paraphrased_text = ' '.join(paraphrased_tokens)
    return paraphrased_text

def paraphrase_html(html_text):
    # Parse the HTML using BeautifulSoup
    soup = BeautifulSoup(html_text, 'html.parser')
    # Find all the text nodes in the HTML
    text_nodes = soup.find_all(text=True)
    # Paraphrase the text nodes
    for node in text_nodes:
        node.replace_with(paraphrase_text(node.string))
    # Return the paraphrased HTML
    paraphrased_html = str(soup)
    return paraphrased_html

inputs = gr.inputs.Textbox(label="Enter HTML text to paraphrase")
outputs = gr.outputs.HTML(label="Paraphrased HTML")

title = "HTML Paraphraser"
description = "Enter HTML text and get a paraphrased version in HTML format."
examples = [["<p>This is some <b>HTML</b> text to paraphrase.</p>"]]

gr.Interface(paraphrase_html, inputs, outputs, title=title, description=description, examples=examples).launch()