Omnibus commited on
Commit
06a9242
·
verified ·
1 Parent(s): a0dac77

Update app.py

Browse files
Files changed (1) hide show
  1. app.py +10 -6
app.py CHANGED
@@ -12,17 +12,14 @@ username="omnibus"
12
  dataset_name="tmp"
13
  save_data=f'https://huggingface.co/datasets/{username}/{dataset_name}/raw/main/'
14
  api=HfApi(token="")
15
- filename="test"
16
-
17
 
18
  def init():
19
  r = requests.get(f'{save_data}crawl/{filename}.json')
20
  print(f'status code main:: {r.status_code}')
21
  if r.status_code==200:
22
  lod = json.loads(r.text)
23
- #print(f'lod:: {lod}')
24
- #lod[0]['comment']=lod[0]['comment']+1
25
- #lod[0]['comment_list'].append({'user':persona[persona2]['name'],'datetime':'','comment':output,'reply_list':[]})
26
  else:
27
  lod={}
28
  return lod
@@ -338,7 +335,14 @@ def sitemap(url,file_state,level):
338
  print (e)
339
  except Exception as e:
340
  print (e)
341
- uri_key=sort_doc(link_box,file_state,8)
 
 
 
 
 
 
 
342
  ######## Save Database ########
343
  uid=uuid.uuid4()
344
  #for ea in list(uri_key.keys()):
 
12
  dataset_name="tmp"
13
  save_data=f'https://huggingface.co/datasets/{username}/{dataset_name}/raw/main/'
14
  api=HfApi(token="")
15
+ filename="urls"
16
+ filename2="pages"
17
 
18
  def init():
19
  r = requests.get(f'{save_data}crawl/{filename}.json')
20
  print(f'status code main:: {r.status_code}')
21
  if r.status_code==200:
22
  lod = json.loads(r.text)
 
 
 
23
  else:
24
  lod={}
25
  return lod
 
335
  print (e)
336
  except Exception as e:
337
  print (e)
338
+ #url_page=[]
339
+ url_front=[]
340
+ for ea_link in link2['TREE']:
341
+ url_list=ea_link.split("/")
342
+ url_front.append(url_list[0])
343
+ print(f'URL_FRONT:: {url_front}')
344
+ #url_key=sort
345
+ uri_key=sort_doc(url_front,file_state,8)
346
  ######## Save Database ########
347
  uid=uuid.uuid4()
348
  #for ea in list(uri_key.keys()):