import os
import re
import requests
from bs4 import BeautifulSoup
import google.generativeai as genai
import gradio as gr

def fetch_article_content(url):
    headers = {
        "User-Agent": "Mozilla/5.0 (Windows NT 10.0; Win64; x64)"
    }

    try:
        resp = requests.get(url, headers=headers, timeout=10)
    except Exception:
        return None, None, (None, None)

    if resp.status_code != 200 or 'text/html' not in resp.headers.get('Content-Type', ''):
        return None, None, (None, None)

    soup = BeautifulSoup(resp.text, 'html.parser')
    for tag in soup(['script', 'style', 'header', 'footer', 'noscript', 'form', 'nav', 'aside']):
        tag.decompose()

    title_tag = soup.find('h1') or soup.title
    title = title_tag.get_text().strip() if title_tag else "Untitled"
    if title.endswith(" - Wikipedia"):
        title = title.replace(" - Wikipedia", "")

    content_div = soup.find('div', {'class': 'mw-parser-output'}) or soup.body
    if content_div:
        for ref in content_div.find_all('sup', {'class': 'reference'}):
            ref.decompose()
        for ref_list in content_div.find_all(['ol', 'ul'], {'class': 'references'}):
            ref_list.decompose()
        paragraphs = content_div.find_all('p')
        text_content = "\n\n".join(p.get_text().strip() for p in paragraphs if p.get_text().strip())
    else:
        text_content = soup.get_text(separator="\n")

    text_content = text_content.strip()

    img_url, img_alt = None, ""
    imgs = content_div.find_all('img') if content_div else soup.find_all('img')
    for img in imgs:
        src = img.get('src', '')
        alt = img.get('alt', '')
        if not src:
            continue
        if "upload" in src or "commons" in src or "wikipedia" in src:
            img_url = src
            img_alt = alt if alt else ""
            break
        if alt.lower() not in ["logo", "icon"]:
            img_url = src
            img_alt = alt if alt else ""
            break

    if img_url:
        if img_url.startswith("//"):
            img_url = "https:" + img_url
        elif img_url.startswith("/"):
            from urllib.parse import urljoin
            img_url = urljoin(url, img_url)

        if not img_alt:
            from urllib.parse import unquote
            fname = unquote(img_url.split('/')[-1])
            fname = re.sub(r'^\d+px-', '', fname)
            fname = re.sub(r'\.[A-Za-z0-9]+$', '', fname)
            img_alt = fname.replace('_', ' ').strip()
            if not img_alt:
                img_alt = "Image"

    return title, text_content, (img_url, img_alt)

def generate_post(platform, title, content, model):
    platform = platform.lower()

    if platform == "reddit":
        style_instructions = (
            "an informal, conversational tone, as if posting on Reddit. "
            "Format the response using HTML tags for paragraphs and lists, "
            "but do not wrap it in triple backticks or ```html code blocks."
        )
    elif platform == "quora":
        style_instructions = (
            "a clear, detailed explanatory tone, as if answering on Quora. "
            "Use proper HTML for readability, without wrapping in code blocks."
        )
    else:
        style_instructions = "a clear and accessible tone"

    prompt = (
        f"Transform the following article content into {style_instructions}.\n"
        f"Output the result in valid HTML format with proper paragraphs (and lists if needed).\n"
        f"Do NOT include the title or image — only the body content in HTML.\n\n"
        f"Article Title: {title}\n"
        f"Article Content:\n\"\"\"\n{content}\n\"\"\""
    )

    try:
        response = model.generate_content(prompt)
    except Exception as e:
        return f"<p><em>Error: failed to generate {platform} content ({e})</em></p>"

    return response.text.strip()

def process_url(url, api_key):
    if not api_key:
        error_msg = "<p><em>API key is required.</em></p>"
        return error_msg, error_msg

    try:
        genai.configure(api_key=api_key)
        model = genai.GenerativeModel('gemini-1.5-pro-latest')
    except Exception as e:
        error_msg = f"<p><em>Failed to configure Gemini API: {e}</em></p>"
        return error_msg, error_msg

    title, content, (img_url, img_alt) = fetch_article_content(url)
    if not content:
        error_msg = f"<p><em>Could not retrieve content from this URL: <a href='{url}'>{url}</a></em></p>"
        return error_msg, error_msg

    reddit_body = generate_post("reddit", title, content, model)
    quora_body = generate_post("quora", title, content, model)

    source_html = f'<p><small><em>Source: <a href="{url}" target="_blank">{url}</a></em></small></p>'

    reddit_html = f"<h2>{title}</h2>\n"
    quora_html = f"<h2>{title}</h2>\n"

    if img_url:
        img_tag = f'<img src="{img_url}" alt="{img_alt}" style="max-width:100%; height:auto;" />\n'
        reddit_html += img_tag
        quora_html += img_tag

    reddit_html += reddit_body + source_html
    quora_html += quora_body + source_html

    return reddit_html, quora_html

# Gradio interface
demo = gr.Interface(
    fn=process_url,
    inputs=[
        gr.Textbox(label="Article URL", placeholder="https://en.wikipedia.org/wiki/Kefir"),
        gr.Textbox(label="Gemini API Key", placeholder="Paste your Gemini API key here", type="password")
    ],
    outputs=[
        gr.HTML(label="Reddit-formatted Post"),
        gr.HTML(label="Quora-formatted Post")
    ],
    title="Article → Reddit & Quora Post Generator",
    description="Enter an article link and your Gemini API key to generate Reddit- and Quora-style posts in HTML."
)

if __name__ == "__main__":
    demo.launch()