game / app.py
sagar007's picture
Update app.py
99362db verified
raw
history blame
3.54 kB
import gradio as gr
import requests
from bs4 import BeautifulSoup
import pandas as pd
import chromedriver_autoinstaller
from selenium import webdriver
from selenium.webdriver.chrome.options import Options
import csv
import random
import time
import os
# List of user agents to avoid bot detection
USER_AGENTS = [
"Mozilla/5.0 (Windows NT 10.0; Win64; x64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/91.0.4472.124 Safari/537.36",
"Mozilla/5.0 (Macintosh; Intel Mac OS X 10_15_7) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/91.0.4472.114 Safari/537.36",
"Mozilla/5.0 (X11; Ubuntu; Linux x86_64; rv:89.0) Gecko/20100101 Firefox/89.0"
]
# Function to initialize Selenium driver (headless)
def get_driver():
chrome_options = Options()
chrome_options.add_argument("--headless") # Run in headless mode
chrome_options.add_argument("--no-sandbox")
chrome_options.add_argument("--disable-dev-shm-usage")
driver = webdriver.Chrome(options=chrome_options)
return driver
# Function to scrape Flipkart laptop data
def scrape_flipkart(url):
try:
# Set up Selenium driver
driver = get_driver()
headers = {"User-Agent": random.choice(USER_AGENTS)}
# Load the page
driver.get(url)
time.sleep(5) # Wait for JavaScript to load content
# Get page source and parse with BeautifulSoup
soup = BeautifulSoup(driver.page_source, "html.parser")
driver.quit() # Close the driver
# Lists to store scraped data
products = []
prices = []
ratings = []
# Find laptop items (adjust class names based on Flipkart's HTML structure)
items = soup.find_all("div", class_="_1AtVbE") # Parent div for each product
for item in items:
# Product name
name_tag = item.find("div", class_="_4rR01T")
name = name_tag.text.strip() if name_tag else "N/A"
# Price
price_tag = item.find("div", class_="_30jeq3")
price = price_tag.text.strip() if price_tag else "N/A"
# Rating
rating_tag = item.find("div", class_="_3LWZlK")
rating = rating_tag.text.strip() if rating_tag else "N/A"
if name != "N/A": # Only append valid entries
products.append(name)
prices.append(price)
ratings.append(rating)
# Create DataFrame
df = pd.DataFrame({
"Product Name": products,
"Price": prices,
"Rating": ratings
})
# Save to CSV
csv_path = "flipkart_laptops.csv"
df.to_csv(csv_path, index=False, encoding="utf-8")
return f"Scraped {len(products)} laptops successfully!", csv_path
except Exception as e:
return f"Error: {str(e)}", None
# Gradio interface
with gr.Blocks(title="Flipkart Laptop Scraper") as demo:
gr.Markdown("# Flipkart Laptop Scraper")
gr.Markdown("Enter a Flipkart laptop category URL to scrape data and download as CSV.")
url_input = gr.Textbox(label="Flipkart URL", placeholder="e.g., https://www.flipkart.com/laptops/pr?sid=6bo,b5g")
scrape_btn = gr.Button("Scrape Data")
output_text = gr.Textbox(label="Status")
output_file = gr.File(label="Download CSV")
scrape_btn.click(
fn=scrape_flipkart,
inputs=url_input,
outputs=[output_text, output_file]
)
demo.launch()