adpro commited on
Commit
74ee06b
·
verified ·
1 Parent(s): 8691072

Update app.py

Browse files
Files changed (1) hide show
  1. app.py +36 -15
app.py CHANGED
@@ -1,9 +1,8 @@
1
  import requests
2
- import bs4
3
- import gradio as gr
4
  from bs4 import BeautifulSoup
5
- from googlesearch import search
6
- import urllib.request
 
7
 
8
  def run_lora(prompt,site,start,end):
9
 
@@ -11,20 +10,42 @@ def run_lora(prompt,site,start,end):
11
 
12
 
13
  # to search
14
-
15
- page = requests.get("https://www.google.com/search?q=inurl:" +site + "+" +prompt + "&tbs=cdr%3A1%2Ccd_min%3A"+start+"%2Ccd_max%3A" + end)
16
- print("https://www.google.com/search?q=inurl:" +site + " " +prompt + "&tbs=cdr%3A1%2Ccd_min%3A"+start+"%2Ccd_max%3A" + end)
17
- soup = BeautifulSoup(page.content)
18
- features="html.parser"
19
- import re
20
  links = soup.findAll("a")
21
  mLink=""
22
- #for link in soup.find_all("a", "html.parser", parse_only=SoupStrainer('a')):
23
- for link in soup.find_all("a",href=re.compile("(?<=/url\?q=)(htt.*://.*)")):
24
- print (re.split(":(?=http)",link["href"].replace("/url?q=","").replace("'","")))
25
- mLink+=str(re.split(":(?=http)",link["href"].replace("/url?q=","").replace("'","")))
26
- return mLink
 
 
 
 
 
 
 
 
 
 
 
 
27
 
 
 
 
 
 
 
 
 
 
 
 
28
  with gr.Blocks() as app:
29
  gr.HTML("""<html>
30
  <head>
 
1
  import requests
 
 
2
  from bs4 import BeautifulSoup
3
+ from rich import print
4
+ from urllib.parse import urlparse
5
+ from urllib.parse import parse_qs
6
 
7
  def run_lora(prompt,site,start,end):
8
 
 
10
 
11
 
12
  # to search
13
+ query = "Python programming"
14
+ url = fhttps://www.google.com/search?q=inurl:" +site + "+" +prompt + "&tbs=cdr%3A1%2Ccd_min%3A"+start+"%2Ccd_max%3A" + end
15
+ response = requests.get(url)
16
+ soup = BeautifulSoup(response.text, 'html.parser')
17
+ print(soup)
 
18
  links = soup.findAll("a")
19
  mLink=""
20
+ extract_results(soup)
21
+
22
+ def extract_results(soup):
23
+ main = soup.select_one("#main")
24
+
25
+ res = []
26
+ for gdiv in main.select('.g, .fP1Qef'):
27
+ res.append(extract_section(gdiv))
28
+ return res
29
+ def extract_section(gdiv):
30
+ # Getting our elements
31
+ title = gdiv.select_one('h3')
32
+ link = gdiv.select_one('a')
33
+ description = gdiv.find('.BNeawe')
34
+ return {
35
+ # Extract title's text only if text is found
36
+ 'title': title.text if title else None,
37
 
38
+ 'link': extract_href(link['href']) if link else None,
39
+ 'description': description.text if description else None
40
+ }
41
+ def extract_href(href):
42
+ url = urlparse(href)
43
+ query = parse_qs(url.query)
44
+ if not ('q' in query and query['q'] and len(query['q']) > 0):
45
+ return None
46
+ return query['q'][0]
47
+
48
+
49
  with gr.Blocks() as app:
50
  gr.HTML("""<html>
51
  <head>