game / app.py
sagar007's picture
Update app.py
07b96fa verified
raw
history blame
3.51 kB
import gradio as gr
import requests
from bs4 import BeautifulSoup
import pandas as pd
from selenium import webdriver
from selenium.webdriver.chrome.options import Options
import csv
import random
import time
import os
# List of user agents to avoid bot detection
USER_AGENTS = [
"Mozilla/5.0 (Windows NT 10.0; Win64; x64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/91.0.4472.124 Safari/537.36",
"Mozilla/5.0 (Macintosh; Intel Mac OS X 10_15_7) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/91.0.4472.114 Safari/537.36",
"Mozilla/5.0 (X11; Ubuntu; Linux x86_64; rv:89.0) Gecko/20100101 Firefox/89.0"
]
# Function to initialize Selenium driver (headless)
def get_driver():
chrome_options = Options()
chrome_options.add_argument("--headless") # Run in headless mode
chrome_options.add_argument("--no-sandbox")
chrome_options.add_argument("--disable-dev-shm-usage")
driver = webdriver.Chrome(options=chrome_options)
return driver
# Function to scrape Flipkart laptop data
def scrape_flipkart(url):
try:
# Set up Selenium driver
driver = get_driver()
headers = {"User-Agent": random.choice(USER_AGENTS)}
# Load the page
driver.get(url)
time.sleep(5) # Wait for JavaScript to load content
# Get page source and parse with BeautifulSoup
soup = BeautifulSoup(driver.page_source, "html.parser")
driver.quit() # Close the driver
# Lists to store scraped data
products = []
prices = []
ratings = []
# Find laptop items (adjust class names based on Flipkart's HTML structure)
items = soup.find_all("div", class_="_1AtVbE") # Parent div for each product
for item in items:
# Product name
name_tag = item.find("div", class_="_4rR01T")
name = name_tag.text.strip() if name_tag else "N/A"
# Price
price_tag = item.find("div", class_="_30jeq3")
price = price_tag.text.strip() if price_tag else "N/A"
# Rating
rating_tag = item.find("div", class_="_3LWZlK")
rating = rating_tag.text.strip() if rating_tag else "N/A"
if name != "N/A": # Only append valid entries
products.append(name)
prices.append(price)
ratings.append(rating)
# Create DataFrame
df = pd.DataFrame({
"Product Name": products,
"Price": prices,
"Rating": ratings
})
# Save to CSV
csv_path = "flipkart_laptops.csv"
df.to_csv(csv_path, index=False, encoding="utf-8")
return f"Scraped {len(products)} laptops successfully!", csv_path
except Exception as e:
return f"Error: {str(e)}", None
# Gradio interface
with gr.Blocks(title="Flipkart Laptop Scraper") as demo:
gr.Markdown("# Flipkart Laptop Scraper")
gr.Markdown("Enter a Flipkart laptop category URL to scrape data and download as CSV.")
url_input = gr.Textbox(label="Flipkart URL", placeholder="e.g., https://www.flipkart.com/laptops/pr?sid=6bo,b5g")
scrape_btn = gr.Button("Scrape Data")
output_text = gr.Textbox(label="Status")
output_file = gr.File(label="Download CSV")
scrape_btn.click(
fn=scrape_flipkart,
inputs=url_input,
outputs=[output_text, output_file]
)
demo.launch()