Update app.py
Browse files
app.py
CHANGED
@@ -15,10 +15,129 @@ api=HfApi(token="")
|
|
15 |
filename="test"
|
16 |
|
17 |
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
18 |
|
19 |
#############################
|
20 |
|
21 |
-
|
|
|
|
|
|
|
|
|
22 |
r = requests.get(f'{save_data}crawl/{filename}.json')
|
23 |
print(f'status code main:: {r.status_code}')
|
24 |
if r.status_code==200:
|
@@ -181,7 +300,7 @@ def link_find(url):
|
|
181 |
return node1,node2
|
182 |
#https://huggingface.co/spaces/Omnibus/crawl
|
183 |
|
184 |
-
def sitemap(url,level):
|
185 |
uri=""
|
186 |
uri0=""
|
187 |
if url != "" and url != None:
|
@@ -219,7 +338,7 @@ def sitemap(url,level):
|
|
219 |
print (e)
|
220 |
except Exception as e:
|
221 |
print (e)
|
222 |
-
uri_key=sort_doc(link_box,8)
|
223 |
######## Save Database ########
|
224 |
uid=uuid.uuid4()
|
225 |
#for ea in list(uri_key.keys()):
|
@@ -277,6 +396,7 @@ def sitemap_OG(url,level):
|
|
277 |
print (e)
|
278 |
return link1
|
279 |
with gr.Blocks() as app:
|
|
|
280 |
with gr.Row():
|
281 |
with gr.Column(scale=3):
|
282 |
with gr.Row():
|
@@ -287,5 +407,5 @@ with gr.Blocks() as app:
|
|
287 |
outp=gr.JSON()
|
288 |
with gr.Column(scale=1):
|
289 |
outmap=gr.JSON()
|
290 |
-
btn.click(sitemap,[inp,level],[outp,outmap,key_json])
|
291 |
app.launch()
|
|
|
15 |
filename="test"
|
16 |
|
17 |
|
18 |
+
def init()
|
19 |
+
r = requests.get(f'{save_data}crawl/{filename}.json')
|
20 |
+
print(f'status code main:: {r.status_code}')
|
21 |
+
if r.status_code==200:
|
22 |
+
lod = json.loads(r.text)
|
23 |
+
#print(f'lod:: {lod}')
|
24 |
+
#lod[0]['comment']=lod[0]['comment']+1
|
25 |
+
#lod[0]['comment_list'].append({'user':persona[persona2]['name'],'datetime':'','comment':output,'reply_list':[]})
|
26 |
+
else:
|
27 |
+
lod={}
|
28 |
+
return lod
|
29 |
+
|
30 |
+
|
31 |
+
def sort_doc(in_list,file_state,steps_in=0,control=None):
|
32 |
+
lod=file_state
|
33 |
+
control_json={'control':'0123456789abcdefghijklmnopqrstuvwxyzABCDEFGHIJKLMNOPQRSTUVWXYZ','char':'','leng':62}
|
34 |
+
text=str(in_list)
|
35 |
+
key_cnt=len(in_list)
|
36 |
+
print(key_cnt)
|
37 |
+
control_char=list(control_json['control'])
|
38 |
+
char_len=len(control_char)
|
39 |
+
if not steps_in:
|
40 |
+
n_cnt=0
|
41 |
+
nx=key_cnt
|
42 |
+
while True:
|
43 |
+
if nx >= 1:
|
44 |
+
n_cnt+=1
|
45 |
+
nx = nx/char_len
|
46 |
+
else:
|
47 |
+
print("#######")
|
48 |
+
print(n_cnt)
|
49 |
+
print(nx)
|
50 |
+
print("#######")
|
51 |
+
steps=n_cnt
|
52 |
+
break
|
53 |
+
if steps_in:
|
54 |
+
steps=steps_in
|
55 |
+
|
56 |
+
if control:
|
57 |
+
control_len=control_json['leng']-steps
|
58 |
+
control_char_val=list(control_json['control'][:control_len])
|
59 |
+
control_val=list(control_json['control'][control_len:])
|
60 |
+
val_len=len(control_val)
|
61 |
+
|
62 |
+
json_out=lod
|
63 |
+
noun_list={}
|
64 |
+
step_list=[]
|
65 |
+
|
66 |
+
big_cnt=0
|
67 |
+
cnt=0
|
68 |
+
go=True
|
69 |
+
|
70 |
+
|
71 |
+
step_cont_box=[]
|
72 |
+
if lod:
|
73 |
+
print("LOD")
|
74 |
+
last_key=list(lod.keys())[-1]
|
75 |
+
print(last_key)
|
76 |
+
for ea_dig in last_key:
|
77 |
+
ea_dig=control_json['control'].index(ea_dig)
|
78 |
+
ea_dig=int(ea_dig)
|
79 |
+
print(f'{ea_dig} :: {list(control_json["control"][ea_dig])[0]}')
|
80 |
+
#step_cont_box.append(int(list(control_json["control"][ea_dig])[0]))
|
81 |
+
step_cont_box.append(ea_dig)
|
82 |
+
print(step_cont_box)
|
83 |
+
cnt=int(step_cont_box[-1])+1
|
84 |
+
if not lod:
|
85 |
+
print("NOT LOD")
|
86 |
+
for ii in range(steps):
|
87 |
+
print(ii)
|
88 |
+
step_cont_box.append(0)
|
89 |
+
#print (step_cont_box)
|
90 |
+
mod=0
|
91 |
+
pos=len(step_cont_box)-1
|
92 |
+
|
93 |
+
if go:
|
94 |
+
for i, ea in enumerate(in_list):
|
95 |
+
|
96 |
+
if go and ea not in list(lod.values()):
|
97 |
+
if cnt > char_len-1:
|
98 |
+
#print(step_cont_box)
|
99 |
+
go1=True
|
100 |
+
for ii,ev in enumerate(step_cont_box):
|
101 |
+
if go:
|
102 |
+
if ev >= char_len-1:
|
103 |
+
step_cont_box[ii]=0
|
104 |
+
if go1==True:
|
105 |
+
step_cont_box[ii-1]=step_cont_box[ii-1]+1
|
106 |
+
go1=False
|
107 |
+
cnt=1
|
108 |
+
else:
|
109 |
+
step_cont_box[pos]=cnt
|
110 |
+
cnt+=1
|
111 |
+
#print(step_cont_box)
|
112 |
+
out_js=""
|
113 |
+
for iii,j in enumerate(step_cont_box):
|
114 |
+
print(j)
|
115 |
+
out_js = out_js+control_char[j]
|
116 |
+
sen_obj=in_list[i]
|
117 |
+
|
118 |
+
json_out[out_js]=sen_obj
|
119 |
+
#print ("#################")
|
120 |
+
#print (out_js)
|
121 |
+
#print (sen_obj)
|
122 |
+
#print ("#################")
|
123 |
+
|
124 |
+
big_cnt+=1
|
125 |
+
if big_cnt==key_cnt:
|
126 |
+
print("DONE")
|
127 |
+
go=False
|
128 |
+
#noun_list=proc_nouns(json_out)
|
129 |
+
return json_out
|
130 |
+
|
131 |
+
|
132 |
+
|
133 |
|
134 |
#############################
|
135 |
|
136 |
+
|
137 |
+
|
138 |
+
|
139 |
+
|
140 |
+
def sort_doc_OG(in_list,steps_in=0,control=None):
|
141 |
r = requests.get(f'{save_data}crawl/{filename}.json')
|
142 |
print(f'status code main:: {r.status_code}')
|
143 |
if r.status_code==200:
|
|
|
300 |
return node1,node2
|
301 |
#https://huggingface.co/spaces/Omnibus/crawl
|
302 |
|
303 |
+
def sitemap(url,file_state,level):
|
304 |
uri=""
|
305 |
uri0=""
|
306 |
if url != "" and url != None:
|
|
|
338 |
print (e)
|
339 |
except Exception as e:
|
340 |
print (e)
|
341 |
+
uri_key=sort_doc(link_box,file_state,8)
|
342 |
######## Save Database ########
|
343 |
uid=uuid.uuid4()
|
344 |
#for ea in list(uri_key.keys()):
|
|
|
396 |
print (e)
|
397 |
return link1
|
398 |
with gr.Blocks() as app:
|
399 |
+
file_state=gr.State()
|
400 |
with gr.Row():
|
401 |
with gr.Column(scale=3):
|
402 |
with gr.Row():
|
|
|
407 |
outp=gr.JSON()
|
408 |
with gr.Column(scale=1):
|
409 |
outmap=gr.JSON()
|
410 |
+
btn.click(init,None,file_state).then(sitemap,[inp,file_state,level],[outp,outmap,key_json])
|
411 |
app.launch()
|