Spaces:

suhail0318
/

ContentigoLite

Sleeping

File size: 3,315 Bytes

2e0e1df
a3e2fc7
2e0e1df
 
a3e2fc7
2e0e1df
c4507ed
a3e2fc7
2e0e1df
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
a3e2fc7
2e0e1df
 
 
 
 
 
 
 
 
 
 
 
a3e2fc7
2e0e1df
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
a3e2fc7
2e0e1df
 
 
a3e2fc7
2e0e1df
 
 
 
 
 
a3e2fc7
1766749
2e0e1df
 
a3e2fc7
 
2e0e1df

import openai
import gradio as gr
import requests
from bs4 import BeautifulSoup

# Initialize OpenAI with your API key
openai.api_key = "Your API - KEY"

# Function to fetch and crawl website content
def fetch_website_content(url):
    try:
        # Send a GET request to the website
        response = requests.get(url)
        if response.status_code != 200:
            return "Error: Could not fetch the webpage. Please check the URL."
        
        # Parse the website content with BeautifulSoup
        soup = BeautifulSoup(response.content, 'html.parser')
        
        # Extract text content from paragraph tags
        website_text = " ".join([p.text for p in soup.find_all('p')])
        return website_text
    
    except Exception as e:
        return f"Error: {str(e)}"

# Function to split content into chunks that fit within the token limits
def split_content_into_chunks(content, max_chunk_size=3000):
    # Split the content into chunks based on token limits
    words = content.split()
    chunks = []
    
    while words:
        chunk = words[:max_chunk_size]
        chunks.append(" ".join(chunk))
        words = words[max_chunk_size:]
    
    return chunks

# Function to query GPT model with website content
def ask_question(url, question):
    # Fetch website content
    website_text = fetch_website_content(url)
    
    if "Error" in website_text:
        return website_text
    
    # Split content into manageable chunks based on OpenAI's token limit
    chunks = split_content_into_chunks(website_text)
    
    # Initialize a variable to hold the entire response
    full_answer = ""
    
    # Query GPT model for each chunk
    for chunk in chunks:
        # Prepare the prompt for GPT
        messages = [
            {"role": "system", "content": "You are a helpful assistant who answers questions based on the following website content."},
            {"role": "user", "content": f"Website content: {chunk}\n\nQuestion: {question}"}
        ]
        
        # Use GPT-3.5-turbo model to generate an answer
        try:
            response = openai.ChatCompletion.create(
                model="gpt-3.5-turbo",  # Use gpt-4 if you have access to it
                messages=messages,
                max_tokens=3000,  # Increase max_tokens to the highest possible value
                temperature=0.5,
            )
            answer = response.choices[0].message['content'].strip()
            full_answer += answer + "\n\n"  # Append chunked responses together
            
        except Exception as e:
            return f"Error: {str(e)}"
    
    return full_answer

# Gradio interface for chatbot
def chatbot(url, question):
    return ask_question(url, question)

# Define Gradio interface using new syntax
iface = gr.Interface(
    fn=chatbot,
    inputs=[
        gr.Textbox(label="Website URL", placeholder="Enter website URL here..."),
        gr.Textbox(label="Your Question", placeholder="Ask a question to understand what is in the website or generate article based on the website information...")
    ],
    outputs=gr.Textbox(label="Responses"),
    title="Contentigo - Lite",
    description="Ask questions about the content of any website. Also, generate articles based on the website content."
)

# Launch the Gradio interface
iface.launch()