Spaces:
Sleeping
Sleeping
import openai | |
import gradio as gr | |
import requests | |
from bs4 import BeautifulSoup | |
# Initialize OpenAI with your API key | |
openai.api_key = "Your API - KEY" | |
# Function to fetch and crawl website content | |
def fetch_website_content(url): | |
try: | |
# Send a GET request to the website | |
response = requests.get(url) | |
if response.status_code != 200: | |
return "Error: Could not fetch the webpage. Please check the URL." | |
# Parse the website content with BeautifulSoup | |
soup = BeautifulSoup(response.content, 'html.parser') | |
# Extract text content from paragraph tags | |
website_text = " ".join([p.text for p in soup.find_all('p')]) | |
return website_text | |
except Exception as e: | |
return f"Error: {str(e)}" | |
# Function to split content into chunks that fit within the token limits | |
def split_content_into_chunks(content, max_chunk_size=3000): | |
# Split the content into chunks based on token limits | |
words = content.split() | |
chunks = [] | |
while words: | |
chunk = words[:max_chunk_size] | |
chunks.append(" ".join(chunk)) | |
words = words[max_chunk_size:] | |
return chunks | |
# Function to query GPT model with website content | |
def ask_question(url, question): | |
# Fetch website content | |
website_text = fetch_website_content(url) | |
if "Error" in website_text: | |
return website_text | |
# Split content into manageable chunks based on OpenAI's token limit | |
chunks = split_content_into_chunks(website_text) | |
# Initialize a variable to hold the entire response | |
full_answer = "" | |
# Query GPT model for each chunk | |
for chunk in chunks: | |
# Prepare the prompt for GPT | |
messages = [ | |
{"role": "system", "content": "You are a helpful assistant who answers questions based on the following website content."}, | |
{"role": "user", "content": f"Website content: {chunk}\n\nQuestion: {question}"} | |
] | |
# Use GPT-3.5-turbo model to generate an answer | |
try: | |
response = openai.ChatCompletion.create( | |
model="gpt-3.5-turbo", # Use gpt-4 if you have access to it | |
messages=messages, | |
max_tokens=3000, # Increase max_tokens to the highest possible value | |
temperature=0.5, | |
) | |
answer = response.choices[0].message['content'].strip() | |
full_answer += answer + "\n\n" # Append chunked responses together | |
except Exception as e: | |
return f"Error: {str(e)}" | |
return full_answer | |
# Gradio interface for chatbot | |
def chatbot(url, question): | |
return ask_question(url, question) | |
# Define Gradio interface using new syntax | |
iface = gr.Interface( | |
fn=chatbot, | |
inputs=[ | |
gr.Textbox(label="Website URL", placeholder="Enter website URL here..."), | |
gr.Textbox(label="Your Question", placeholder="Ask a question to understand what is in the website or generate article based on the website information...") | |
], | |
outputs=gr.Textbox(label="Responses"), | |
title="Contentigo - Lite", | |
description="Ask questions about the content of any website. Also, generate articles based on the website content." | |
) | |
# Launch the Gradio interface | |
iface.launch() | |