ContentigoLite / app.py
suhail0318's picture
Update app.py
c4507ed verified
import openai
import gradio as gr
import requests
from bs4 import BeautifulSoup
# Initialize OpenAI with your API key
openai.api_key = "Your API - KEY"
# Function to fetch and crawl website content
def fetch_website_content(url):
try:
# Send a GET request to the website
response = requests.get(url)
if response.status_code != 200:
return "Error: Could not fetch the webpage. Please check the URL."
# Parse the website content with BeautifulSoup
soup = BeautifulSoup(response.content, 'html.parser')
# Extract text content from paragraph tags
website_text = " ".join([p.text for p in soup.find_all('p')])
return website_text
except Exception as e:
return f"Error: {str(e)}"
# Function to split content into chunks that fit within the token limits
def split_content_into_chunks(content, max_chunk_size=3000):
# Split the content into chunks based on token limits
words = content.split()
chunks = []
while words:
chunk = words[:max_chunk_size]
chunks.append(" ".join(chunk))
words = words[max_chunk_size:]
return chunks
# Function to query GPT model with website content
def ask_question(url, question):
# Fetch website content
website_text = fetch_website_content(url)
if "Error" in website_text:
return website_text
# Split content into manageable chunks based on OpenAI's token limit
chunks = split_content_into_chunks(website_text)
# Initialize a variable to hold the entire response
full_answer = ""
# Query GPT model for each chunk
for chunk in chunks:
# Prepare the prompt for GPT
messages = [
{"role": "system", "content": "You are a helpful assistant who answers questions based on the following website content."},
{"role": "user", "content": f"Website content: {chunk}\n\nQuestion: {question}"}
]
# Use GPT-3.5-turbo model to generate an answer
try:
response = openai.ChatCompletion.create(
model="gpt-3.5-turbo", # Use gpt-4 if you have access to it
messages=messages,
max_tokens=3000, # Increase max_tokens to the highest possible value
temperature=0.5,
)
answer = response.choices[0].message['content'].strip()
full_answer += answer + "\n\n" # Append chunked responses together
except Exception as e:
return f"Error: {str(e)}"
return full_answer
# Gradio interface for chatbot
def chatbot(url, question):
return ask_question(url, question)
# Define Gradio interface using new syntax
iface = gr.Interface(
fn=chatbot,
inputs=[
gr.Textbox(label="Website URL", placeholder="Enter website URL here..."),
gr.Textbox(label="Your Question", placeholder="Ask a question to understand what is in the website or generate article based on the website information...")
],
outputs=gr.Textbox(label="Responses"),
title="Contentigo - Lite",
description="Ask questions about the content of any website. Also, generate articles based on the website content."
)
# Launch the Gradio interface
iface.launch()