Spaces:
Runtime error
Runtime error
| import requests | |
| from pathlib import Path | |
| from utils.enver import enver | |
| from utils.logger import logger | |
| from networks.filepath_converter import UrlToFilepathConverter | |
| class HTMLFetcher: | |
| def __init__(self): | |
| self.enver = enver | |
| self.enver.set_envs(proxies=True) | |
| def send_request(self): | |
| logger.note(f"Fetching: [{self.url}]") | |
| self.request_response = requests.get( | |
| url=self.url, | |
| headers={ | |
| "User-Agent": "Mozilla/5.0 (Windows NT 10.0; Win64; x64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/111.0.0.0 Safari/537.36 Edg/111.0.1661.62", | |
| }, | |
| proxies=self.enver.requests_proxies, | |
| ) | |
| def save_response(self): | |
| self.output_path = UrlToFilepathConverter().convert(self.url) | |
| if not self.output_path.exists(): | |
| self.output_path.parent.mkdir(parents=True, exist_ok=True) | |
| logger.success(f"Saving to: [{self.output_path}]") | |
| with open(self.output_path, "wb") as wf: | |
| wf.write(self.request_response.content) | |
| def fetch(self, url): | |
| self.url = url | |
| self.send_request() | |
| self.save_response() | |
| return self.output_path | |
| if __name__ == "__main__": | |
| url = ( | |
| # "https://stackoverflow.com/questions/295135/turn-a-string-into-a-valid-filename" | |
| # "https://www.liaoxuefeng.com/wiki/1016959663602400/1017495723838528" | |
| "https://docs.python.org/zh-cn/3/tutorial/interpreter.html" | |
| ) | |
| fetcher = HTMLFetcher() | |
| fetcher.fetch(url) | |