File size: 3,315 Bytes
2e0e1df
a3e2fc7
2e0e1df
 
a3e2fc7
2e0e1df
c4507ed
a3e2fc7
2e0e1df
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
a3e2fc7
2e0e1df
 
 
 
 
 
 
 
 
 
 
 
a3e2fc7
2e0e1df
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
a3e2fc7
2e0e1df
 
 
a3e2fc7
2e0e1df
 
 
 
 
 
a3e2fc7
1766749
2e0e1df
 
a3e2fc7
 
2e0e1df
 
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
import openai
import gradio as gr
import requests
from bs4 import BeautifulSoup

# Initialize OpenAI with your API key
openai.api_key = "Your API - KEY"

# Function to fetch and crawl website content
def fetch_website_content(url):
    try:
        # Send a GET request to the website
        response = requests.get(url)
        if response.status_code != 200:
            return "Error: Could not fetch the webpage. Please check the URL."
        
        # Parse the website content with BeautifulSoup
        soup = BeautifulSoup(response.content, 'html.parser')
        
        # Extract text content from paragraph tags
        website_text = " ".join([p.text for p in soup.find_all('p')])
        return website_text
    
    except Exception as e:
        return f"Error: {str(e)}"

# Function to split content into chunks that fit within the token limits
def split_content_into_chunks(content, max_chunk_size=3000):
    # Split the content into chunks based on token limits
    words = content.split()
    chunks = []
    
    while words:
        chunk = words[:max_chunk_size]
        chunks.append(" ".join(chunk))
        words = words[max_chunk_size:]
    
    return chunks

# Function to query GPT model with website content
def ask_question(url, question):
    # Fetch website content
    website_text = fetch_website_content(url)
    
    if "Error" in website_text:
        return website_text
    
    # Split content into manageable chunks based on OpenAI's token limit
    chunks = split_content_into_chunks(website_text)
    
    # Initialize a variable to hold the entire response
    full_answer = ""
    
    # Query GPT model for each chunk
    for chunk in chunks:
        # Prepare the prompt for GPT
        messages = [
            {"role": "system", "content": "You are a helpful assistant who answers questions based on the following website content."},
            {"role": "user", "content": f"Website content: {chunk}\n\nQuestion: {question}"}
        ]
        
        # Use GPT-3.5-turbo model to generate an answer
        try:
            response = openai.ChatCompletion.create(
                model="gpt-3.5-turbo",  # Use gpt-4 if you have access to it
                messages=messages,
                max_tokens=3000,  # Increase max_tokens to the highest possible value
                temperature=0.5,
            )
            answer = response.choices[0].message['content'].strip()
            full_answer += answer + "\n\n"  # Append chunked responses together
            
        except Exception as e:
            return f"Error: {str(e)}"
    
    return full_answer

# Gradio interface for chatbot
def chatbot(url, question):
    return ask_question(url, question)

# Define Gradio interface using new syntax
iface = gr.Interface(
    fn=chatbot,
    inputs=[
        gr.Textbox(label="Website URL", placeholder="Enter website URL here..."),
        gr.Textbox(label="Your Question", placeholder="Ask a question to understand what is in the website or generate article based on the website information...")
    ],
    outputs=gr.Textbox(label="Responses"),
    title="Contentigo - Lite",
    description="Ask questions about the content of any website. Also, generate articles based on the website content."
)

# Launch the Gradio interface
iface.launch()