mrsk1883 commited on
Commit
db4f512
·
1 Parent(s): cba2ea4

Delete utils.py

Browse files
Files changed (1) hide show
  1. utils.py +0 -58
utils.py DELETED
@@ -1,58 +0,0 @@
1
- from PyPDF2 import PdfReader
2
- from transformers import AutoModelForSeq2SeqLM, AutoTokenizer
3
- from gtts import gTTS
4
- import os
5
-
6
- # Download the summarization model and tokenizer
7
- model_name = "ArtifactAI/led_large_16384_arxiv_summarization"
8
- model = AutoModelForSeq2SeqLM.from_pretrained(model_name)
9
- tokenizer = AutoTokenizer.from_pretrained(model_name)
10
-
11
- def summarize_and_speak_pdf_abstract(pdf_path):
12
- """
13
- Reads a PDF file, extracts the abstract, summarizes it in one sentence, and generates an audio file of the summary.
14
-
15
- Args:
16
- pdf_path: Path to the PDF file.
17
- """
18
-
19
- # Summarize the abstract
20
- summary = summarize_pdf_abstract(pdf_path)
21
-
22
- # Define language and audio format
23
- language = "en" # Change this to your desired language
24
- audio_format = "mp3"
25
-
26
- # Create the text-to-speech object
27
- tts = gTTS(text=summary, lang=language)
28
-
29
- # Generate the audio file
30
- audio_file_name = f"summary.{audio_format}"
31
- tts.save(audio_file_name)
32
-
33
- print(f"Audio file created: {audio_file_name}")
34
-
35
- # Play the audio file (optional)
36
- # os.system(f"play {audio_file_name}")
37
-
38
-
39
- def summarize_pdf_abstract(pdf_path):
40
- """
41
- Reads a PDF file, extracts the abstract, and summarizes it in one sentence.
42
-
43
- Args:
44
- pdf_path: Path to the PDF file.
45
-
46
- Returns:
47
- A string containing the one-sentence summary of the abstract.
48
- """
49
-
50
- # Read the PDF file
51
- reader = PdfReader(open(pdf_path, "rb"))
52
-
53
- # Extract the abstract
54
- abstract_text = ""
55
- for page in reader.pages:
56
- # Search for keywords like "Abstract" or "Introduction"
57
- if (
58
- "Abstract" in page.extract_text