broadfield commited on
Commit
297872e
·
verified ·
1 Parent(s): 45e4e02

Update app.py

Browse files
Files changed (1) hide show
  1. app.py +10 -25
app.py CHANGED
@@ -11,6 +11,8 @@ import subprocess
11
  pp=PyPiper()
12
  pp.load_mod()
13
 
 
 
14
  def read_pdf(pdf_url):
15
  file_name=f"{uuid.uuid4()}.pdf"
16
  response = requests.get(pdf_url, stream=True)
@@ -19,31 +21,15 @@ def read_pdf(pdf_url):
19
  f.write(response.content)
20
  else:
21
  print(response.status_code)
22
- if not os.path.isdir("./images"):
23
- os.mkdir("./images")
24
- pdf_json=[]
25
  txt_out=""
26
- images = ""
27
  reader = PdfReader(file_name)
28
  number_of_pages = len(reader.pages)
29
  #file_name=str(pdf_path).split("\\")[-1]
30
  for i in range(number_of_pages):
31
  page = reader.pages[i]
32
- images=""
33
- if len(page.images) >0:
34
- for count, image_file_object in enumerate(page.images):
35
- with open( "./images/" + str(count) + image_file_object.name, "wb") as fp:
36
- fp.write(image_file_object.data)
37
- #buffer = io.BytesIO(image_file_object.data)
38
- #images.append({"name":file_name,"page":i,"cnt":count,"image":Image.open(buffer)})
39
- #images.append(str(image_file_object.data))
40
- images += "./images/" + str(i) + image_file_object.name + "\n"
41
- #text = f'{text}\n{page.extract_text()}'
42
- else:
43
- images=""
44
  txt_out+=page.extract_text()
45
- pdf_json.append({"page":i,"text":page.extract_text(),"images":images})
46
- return pdf_json,txt_out
47
  def stream_aud(url):
48
  inp=read_pdf(url)
49
  for ea in inp:
@@ -54,7 +40,6 @@ def stream_aud(url):
54
  def load_html(url):
55
  html=f"""<iframe src="https://docs.google.com/viewer?url={url})&embedded=true" frameborder="0" height="1200px" width="100%"></iframe></div>"""
56
  return html
57
-
58
 
59
  js="""
60
  function start() {
@@ -65,11 +50,11 @@ function start() {
65
 
66
  with gr.Blocks() as app:
67
  a=gr.Audio(streaming=True,autoplay=True)
68
- h=gr.JSON()
69
- t=gr.Textbox(label="url",interactive=False)
70
- bulk=gr.Textbox(label="bulk",interactive=False)
71
 
72
- app.load(None,None,t,js=js)
73
 
74
- t.change(read_pdf,t,[h,bulk]).then(pp.stream_tts,bulk,a)
75
- app.launch()
 
11
  pp=PyPiper()
12
  pp.load_mod()
13
 
14
+
15
+
16
  def read_pdf(pdf_url):
17
  file_name=f"{uuid.uuid4()}.pdf"
18
  response = requests.get(pdf_url, stream=True)
 
21
  f.write(response.content)
22
  else:
23
  print(response.status_code)
 
 
 
24
  txt_out=""
 
25
  reader = PdfReader(file_name)
26
  number_of_pages = len(reader.pages)
27
  #file_name=str(pdf_path).split("\\")[-1]
28
  for i in range(number_of_pages):
29
  page = reader.pages[i]
 
 
 
 
 
 
 
 
 
 
 
 
30
  txt_out+=page.extract_text()
31
+ return txt_out
32
+
33
  def stream_aud(url):
34
  inp=read_pdf(url)
35
  for ea in inp:
 
40
  def load_html(url):
41
  html=f"""<iframe src="https://docs.google.com/viewer?url={url})&embedded=true" frameborder="0" height="1200px" width="100%"></iframe></div>"""
42
  return html
 
43
 
44
  js="""
45
  function start() {
 
50
 
51
  with gr.Blocks() as app:
52
  a=gr.Audio(streaming=True,autoplay=True)
53
+ h=gr.HTML()
54
+ t=gr.Textbox(label="url",interactive=False,visible=False)
55
+ bulk=gr.Textbox(label="bulk",interactive=False,visible=False)
56
 
57
+ app.load(None,None,t,js=js).then(load_html,t,h)
58
 
59
+ t.change(read_pdf,t,bulk).then(pp.stream_tts,bulk,a)
60
+ app.queue(max_threads=40).launch()