Hyphonical commited on
Commit
3fa127c
·
1 Parent(s): cac9909

✨ Update PurifyHtml function: change model loading to use 'jinaai/ReaderLM-v2' for improved performance.

Browse files
Files changed (2) hide show
  1. Purify.py +2 -2
  2. app.py +1 -1
Purify.py CHANGED
@@ -86,8 +86,8 @@ def PurifyHtml(Url: str) -> str: # type: ignore
86
  for Line in Summary:
87
  print(Line)
88
 
89
- Tokenizer = AutoTokenizer.from_pretrained('jinaai/jina-readerLM-1.5B')
90
- Model = AutoModelForCausalLM.from_pretrained('jinaai/jina-readerLM-1.5B', torch_dtype=torch.float32, device_map='cpu')
91
 
92
  Prompt = f'Convert this HTML to markdown:\n\n{CleanedHtml}'
93
  Inputs = Tokenizer(Prompt, return_tensors='pt', truncation=True, max_length=8192)
 
86
  for Line in Summary:
87
  print(Line)
88
 
89
+ Tokenizer = AutoTokenizer.from_pretrained('jinaai/ReaderLM-v2')
90
+ Model = AutoModelForCausalLM.from_pretrained('jinaai/ReaderLM-v2', torch_dtype=torch.float32, device_map='cpu')
91
 
92
  Prompt = f'Convert this HTML to markdown:\n\n{CleanedHtml}'
93
  Inputs = Tokenizer(Prompt, return_tensors='pt', truncation=True, max_length=8192)
app.py CHANGED
@@ -419,7 +419,7 @@ with gradio.Blocks(
419
 
420
  with gradio.TabItem('Web Scraping & Purification 🌐'):
421
  with gradio.Group():
422
- PurifyInput = gradio.Textbox(label='URL to Purify 🌐', placeholder='Enter URL to fetch and purify HTML', lines=1, max_lines=1)
423
  PurifyOutput = gradio.Text(label='Purified HTML Content 📝', interactive=False)
424
  PurifyBtn = gradio.Button('Purify HTML 🧹', variant='primary')
425
  PurifyBtn.click(Purify, inputs=PurifyInput, outputs=PurifyOutput)
 
419
 
420
  with gradio.TabItem('Web Scraping & Purification 🌐'):
421
  with gradio.Group():
422
+ PurifyInput = gradio.Textbox(label='URL to Purify 🌐', placeholder='Enter URL to fetch and purify HTML (e.g., https://huggingface.co)', lines=1, max_lines=1)
423
  PurifyOutput = gradio.Text(label='Purified HTML Content 📝', interactive=False)
424
  PurifyBtn = gradio.Button('Purify HTML 🧹', variant='primary')
425
  PurifyBtn.click(Purify, inputs=PurifyInput, outputs=PurifyOutput)