Spaces:
				
			
			
	
			
			
		Runtime error
		
	
	
	
			
			
	
	
	
	
		
		
		Runtime error
		
	🚸 🎨
Browse filesSigned-off-by: peter szemraj <[email protected]>
- app.py +1 -0
- summarize.py +2 -0
- utils.py +3 -3
    	
        app.py
    CHANGED
    
    | @@ -334,6 +334,7 @@ if __name__ == "__main__": | |
| 334 | 
             
                                uploaded_file = gr.File(
         | 
| 335 | 
             
                                    label="File Upload",
         | 
| 336 | 
             
                                    file_count="single",
         | 
|  | |
| 337 | 
             
                                    type="file",
         | 
| 338 | 
             
                                )
         | 
| 339 | 
             
                        with gr.Row():
         | 
|  | |
| 334 | 
             
                                uploaded_file = gr.File(
         | 
| 335 | 
             
                                    label="File Upload",
         | 
| 336 | 
             
                                    file_count="single",
         | 
| 337 | 
            +
                                    file_types=[".txt", ".md", ".pdf"],
         | 
| 338 | 
             
                                    type="file",
         | 
| 339 | 
             
                                )
         | 
| 340 | 
             
                        with gr.Row():
         | 
    	
        summarize.py
    CHANGED
    
    | @@ -114,7 +114,9 @@ def summarize_via_tokenbatches( | |
| 114 | 
             
                    tokenizer (): the tokenizer to use for summarization
         | 
| 115 | 
             
                    batch_length (int, optional): the length of each batch. Defaults to 2048.
         | 
| 116 | 
             
                    batch_stride (int, optional): the stride of each batch. Defaults to 16. The stride is the number of tokens that overlap between batches.
         | 
|  | |
| 117 |  | 
|  | |
| 118 | 
             
                Returns:
         | 
| 119 | 
             
                    list: a list of dictionaries containing the input tokens, the summary, and the summary score
         | 
| 120 | 
             
                """
         | 
|  | |
| 114 | 
             
                    tokenizer (): the tokenizer to use for summarization
         | 
| 115 | 
             
                    batch_length (int, optional): the length of each batch. Defaults to 2048.
         | 
| 116 | 
             
                    batch_stride (int, optional): the stride of each batch. Defaults to 16. The stride is the number of tokens that overlap between batches.
         | 
| 117 | 
            +
                    min_batch_length (int, optional): the minimum length of each batch. Defaults to 512.
         | 
| 118 |  | 
| 119 | 
            +
                    **kwargs: any additional arguments to pass to the model for inference
         | 
| 120 | 
             
                Returns:
         | 
| 121 | 
             
                    list: a list of dictionaries containing the input tokens, the summary, and the summary score
         | 
| 122 | 
             
                """
         | 
    	
        utils.py
    CHANGED
    
    | @@ -156,7 +156,7 @@ def extract_keywords( | |
| 156 | 
             
                for keyword in keywords:
         | 
| 157 | 
             
                    if not any(fuzz.ratio(keyword, other) > 70 for other in final_keywords):
         | 
| 158 | 
             
                        final_keywords.append(keyword)
         | 
| 159 | 
            -
                logger. | 
| 160 | 
             
                return final_keywords
         | 
| 161 |  | 
| 162 |  | 
| @@ -178,9 +178,9 @@ def saves_summary( | |
| 178 | 
             
                full_summary = "\n".join(sum_text)
         | 
| 179 |  | 
| 180 | 
             
                keywords = "_".join(extract_keywords(full_summary))
         | 
| 181 | 
            -
                logger. | 
| 182 | 
             
                outpath = (
         | 
| 183 | 
            -
                    Path.cwd() / f"document_summary_{get_timestamp()} | 
| 184 | 
             
                    if outpath is None
         | 
| 185 | 
             
                    else Path(outpath)
         | 
| 186 | 
             
                )
         | 
|  | |
| 156 | 
             
                for keyword in keywords:
         | 
| 157 | 
             
                    if not any(fuzz.ratio(keyword, other) > 70 for other in final_keywords):
         | 
| 158 | 
             
                        final_keywords.append(keyword)
         | 
| 159 | 
            +
                logger.debug(f"Keywords (final):\t{final_keywords}")
         | 
| 160 | 
             
                return final_keywords
         | 
| 161 |  | 
| 162 |  | 
|  | |
| 178 | 
             
                full_summary = "\n".join(sum_text)
         | 
| 179 |  | 
| 180 | 
             
                keywords = "_".join(extract_keywords(full_summary))
         | 
| 181 | 
            +
                logger.debug(f"kw:\t{keywords}")
         | 
| 182 | 
             
                outpath = (
         | 
| 183 | 
            +
                    Path.cwd() / f"document_summary_{keywords}_{get_timestamp()}.txt"
         | 
| 184 | 
             
                    if outpath is None
         | 
| 185 | 
             
                    else Path(outpath)
         | 
| 186 | 
             
                )
         | 
