Spaces:
Running
Running
Update app2.py
Browse files
app2.py
CHANGED
@@ -18,7 +18,6 @@ import gradio as gr
|
|
18 |
from bs4 import BeautifulSoup
|
19 |
from fake_useragent import UserAgent
|
20 |
from cleantext import clean
|
21 |
-
import qrcode # Added missing import
|
22 |
|
23 |
# Setup logging
|
24 |
logging.basicConfig(
|
@@ -34,10 +33,22 @@ logger = logging.getLogger(__name__)
|
|
34 |
# Ensure output directories exist
|
35 |
Path('output/qr_codes').mkdir(parents=True, exist_ok=True)
|
36 |
|
|
|
|
|
|
|
|
|
|
|
37 |
class URLProcessor:
|
38 |
def __init__(self):
|
39 |
self.session = requests.Session()
|
40 |
-
self.timeout = 10
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
41 |
self.session.headers.update({
|
42 |
'User-Agent': UserAgent().random,
|
43 |
'Accept': 'text/html,application/xhtml+xml,application/xml;q=0.9,image/webp,*/*;q=0.8',
|
@@ -46,6 +57,55 @@ class URLProcessor:
|
|
46 |
'Connection': 'keep-alive',
|
47 |
'Upgrade-Insecure-Requests': '1'
|
48 |
})
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
49 |
|
50 |
def advanced_text_cleaning(self, text: str) -> str:
|
51 |
"""Robust text cleaning with version compatibility"""
|
@@ -67,7 +127,7 @@ class URLProcessor:
|
|
67 |
return cleaned_text
|
68 |
except Exception as e:
|
69 |
logger.warning(f"Text cleaning error: {e}. Using fallback method.")
|
70 |
-
text = re.sub(r'[\x00-\x1F\x7F-\x9F]', '', text)
|
71 |
text = text.encode('ascii', 'ignore').decode('ascii')
|
72 |
text = re.sub(r'\s+', ' ', text)
|
73 |
return text.strip()
|
@@ -166,7 +226,7 @@ class URLProcessor:
|
|
166 |
class FileProcessor:
|
167 |
"""Class to handle file processing"""
|
168 |
|
169 |
-
def __init__(self, max_file_size: int = 2 * 1024 * 1024 * 1024):
|
170 |
self.max_file_size = max_file_size
|
171 |
self.supported_text_extensions = {'.txt', '.md', '.csv', '.json', '.xml'}
|
172 |
|
@@ -175,7 +235,7 @@ class FileProcessor:
|
|
175 |
try:
|
176 |
mime_type, _ = mimetypes.guess_type(filepath)
|
177 |
return (mime_type and mime_type.startswith('text/')) or \
|
178 |
-
(
|
179 |
except Exception:
|
180 |
return False
|
181 |
|
@@ -220,7 +280,7 @@ class FileProcessor:
|
|
220 |
"source": "file",
|
221 |
"filename": filename,
|
222 |
"content": content,
|
223 |
-
"timestamp": datetime.now().isoformat()
|
224 |
})
|
225 |
except Exception as e:
|
226 |
logger.error(f"Error reading file {filename}: {str(e)}")
|
@@ -259,7 +319,6 @@ class FileProcessor:
|
|
259 |
logger.error(f"File processing error: {e}")
|
260 |
return []
|
261 |
|
262 |
-
@staticmethod
|
263 |
def clean_json(data: Union[str, Dict]) -> Optional[Dict]:
|
264 |
"""Clean and validate JSON data"""
|
265 |
try:
|
@@ -276,7 +335,6 @@ class FileProcessor:
|
|
276 |
logger.error(f"Unexpected error while cleaning JSON: {e}")
|
277 |
return None
|
278 |
|
279 |
-
@staticmethod
|
280 |
def generate_qr_code(data: Union[str, Dict], combined: bool = True) -> List[str]:
|
281 |
"""Generate QR code(s) from data"""
|
282 |
try:
|
@@ -284,7 +342,7 @@ class FileProcessor:
|
|
284 |
output_dir.mkdir(parents=True, exist_ok=True)
|
285 |
|
286 |
if combined:
|
287 |
-
cleaned_data =
|
288 |
if cleaned_data:
|
289 |
qr = qrcode.QRCode(
|
290 |
version=None,
|
@@ -301,10 +359,10 @@ class FileProcessor:
|
|
301 |
img.save(str(output_path))
|
302 |
return [str(output_path)]
|
303 |
else:
|
304 |
-
paths = []
|
305 |
if isinstance(data, list):
|
|
|
306 |
for idx, item in enumerate(data):
|
307 |
-
cleaned_item =
|
308 |
if cleaned_item:
|
309 |
qr = qrcode.QRCode(
|
310 |
version=None,
|
@@ -320,8 +378,9 @@ class FileProcessor:
|
|
320 |
output_path = output_dir / f'item_{idx}_qr_{int(time.time())}.png'
|
321 |
img.save(str(output_path))
|
322 |
paths.append(str(output_path))
|
|
|
323 |
else:
|
324 |
-
cleaned_item =
|
325 |
if cleaned_item:
|
326 |
qr = qrcode.QRCode(
|
327 |
version=None,
|
@@ -333,35 +392,66 @@ class FileProcessor:
|
|
333 |
qr.add_data(json_str)
|
334 |
qr.make(fit=True)
|
335 |
|
336 |
-
img =
|
337 |
output_path = output_dir / f'single_qr_{int(time.time())}.png'
|
338 |
img.save(str(output_path))
|
339 |
-
|
340 |
-
|
|
|
341 |
except Exception as e:
|
342 |
logger.error(f"QR generation error: {e}")
|
343 |
return []
|
344 |
|
345 |
-
def
|
346 |
-
|
347 |
-
|
348 |
-
|
349 |
-
|
350 |
-
|
351 |
-
|
352 |
-
|
353 |
-
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
354 |
|
355 |
def datachat_trained(data_input: str, query: str) -> str:
|
356 |
"""Handle trained data interaction logic"""
|
357 |
-
data =
|
358 |
if not data:
|
359 |
return "Invalid JSON data provided."
|
360 |
return f"[Trained Mode]\nData: {json.dumps(data, indent=2)}\nQuery: {query}"
|
361 |
|
362 |
def datachat_simple(data_input: str, query: str) -> str:
|
363 |
"""Handle simple chat interaction logic"""
|
364 |
-
data =
|
365 |
if not data:
|
366 |
return "Invalid JSON data provided."
|
367 |
return f"[Chat Mode]\nData: {json.dumps(data, indent=2)}\nQuestion: {query}"
|
@@ -369,14 +459,15 @@ def datachat_simple(data_input: str, query: str) -> str:
|
|
369 |
def datachat_interface(mode: str, data_source: str, json_input: str, qr_image: str, query: str) -> str:
|
370 |
"""Interface for DataChat functionality"""
|
371 |
data = None
|
|
|
372 |
if data_source == "JSON Input":
|
373 |
data = json_input
|
374 |
elif data_source == "QR Code":
|
375 |
-
|
376 |
-
|
377 |
-
data = decoded_data
|
378 |
-
|
379 |
-
return "Invalid QR code data provided"
|
380 |
else:
|
381 |
return "No valid data source selected."
|
382 |
|
@@ -405,29 +496,24 @@ def create_interface():
|
|
405 |
json_input = gr.Textbox(lines=8, label="JSON Data")
|
406 |
qr_image = gr.Image(label="QR Code Image", type="filepath")
|
407 |
query = gr.Textbox(label="Query")
|
|
|
408 |
submit_btn = gr.Button("Submit")
|
409 |
output = gr.Textbox(label="Response")
|
|
|
410 |
submit_btn.click(datachat_interface, [mode, data_source, json_input, qr_image, query], output)
|
411 |
|
412 |
with gr.Tab("QR Generator"):
|
413 |
qr_input = gr.Textbox(lines=8, label="Input JSON for QR")
|
414 |
generate_btn = gr.Button("Generate QR")
|
415 |
qr_output = gr.Image(label="Generated QR Code")
|
416 |
-
|
417 |
-
|
418 |
-
|
419 |
-
|
420 |
-
|
421 |
-
|
422 |
-
|
423 |
-
|
424 |
-
decode_btn = gr.Button("Decode QR")
|
425 |
-
decoded_output = gr.Textbox(label="Decoded Data")
|
426 |
-
decode_btn.click(
|
427 |
-
lambda x: "\n".join(decode_qr(x)),
|
428 |
-
inputs=qr_upload,
|
429 |
-
outputs=decoded_output
|
430 |
-
)
|
431 |
|
432 |
return interface
|
433 |
|
@@ -435,7 +521,11 @@ def main():
|
|
435 |
mimetypes.init()
|
436 |
Path('output/qr_codes').mkdir(parents=True, exist_ok=True)
|
437 |
interface = create_interface()
|
438 |
-
interface.launch(
|
439 |
-
|
440 |
-
|
441 |
-
|
|
|
|
|
|
|
|
|
|
18 |
from bs4 import BeautifulSoup
|
19 |
from fake_useragent import UserAgent
|
20 |
from cleantext import clean
|
|
|
21 |
|
22 |
# Setup logging
|
23 |
logging.basicConfig(
|
|
|
33 |
# Ensure output directories exist
|
34 |
Path('output/qr_codes').mkdir(parents=True, exist_ok=True)
|
35 |
|
36 |
+
# At the top of the file, remove these imports:
|
37 |
+
# from config import Config
|
38 |
+
# from proxy_handler import ProxyHandler
|
39 |
+
# from robots_handler import RobotsHandler
|
40 |
+
|
41 |
class URLProcessor:
|
42 |
def __init__(self):
|
43 |
self.session = requests.Session()
|
44 |
+
self.timeout = 10
|
45 |
+
self.max_retries = 3
|
46 |
+
self.request_delay = 1.0
|
47 |
+
self.respect_robots = True
|
48 |
+
self.use_proxy = False
|
49 |
+
self.proxy_url = None
|
50 |
+
|
51 |
+
# Update session headers
|
52 |
self.session.headers.update({
|
53 |
'User-Agent': UserAgent().random,
|
54 |
'Accept': 'text/html,application/xhtml+xml,application/xml;q=0.9,image/webp,*/*;q=0.8',
|
|
|
57 |
'Connection': 'keep-alive',
|
58 |
'Upgrade-Insecure-Requests': '1'
|
59 |
})
|
60 |
+
|
61 |
+
if self.use_proxy and self.proxy_url:
|
62 |
+
self.session.proxies = {
|
63 |
+
'http': self.proxy_url,
|
64 |
+
'https': self.proxy_url
|
65 |
+
}
|
66 |
+
|
67 |
+
def check_robots_txt(self, url: str) -> bool:
|
68 |
+
"""Check if URL is allowed by robots.txt"""
|
69 |
+
if not self.respect_robots:
|
70 |
+
return True
|
71 |
+
|
72 |
+
try:
|
73 |
+
from urllib.parse import urlparse
|
74 |
+
from urllib.robotparser import RobotFileParser
|
75 |
+
|
76 |
+
parsed_url = urlparse(url)
|
77 |
+
robots_url = f"{parsed_url.scheme}://{parsed_url.netloc}/robots.txt"
|
78 |
+
|
79 |
+
rp = RobotFileParser()
|
80 |
+
rp.set_url(robots_url)
|
81 |
+
rp.read()
|
82 |
+
|
83 |
+
return rp.can_fetch(self.session.headers['User-Agent'], url)
|
84 |
+
except Exception as e:
|
85 |
+
logger.warning(f"Error checking robots.txt: {e}")
|
86 |
+
return True
|
87 |
+
|
88 |
+
def fetch_content(self, url: str) -> Optional[Dict]:
|
89 |
+
"""Fetch content with built-in rate limiting and robots.txt checking"""
|
90 |
+
if not self.check_robots_txt(url):
|
91 |
+
logger.warning(f"URL {url} is disallowed by robots.txt")
|
92 |
+
return None
|
93 |
+
|
94 |
+
time.sleep(self.request_delay) # Basic rate limiting
|
95 |
+
|
96 |
+
for attempt in range(self.max_retries):
|
97 |
+
try:
|
98 |
+
if 'drive.google.com' in url:
|
99 |
+
return self._handle_google_drive(url)
|
100 |
+
if 'calendar.google.com' in url:
|
101 |
+
return self._handle_google_calendar(url)
|
102 |
+
return self._fetch_html_content(url)
|
103 |
+
except Exception as e:
|
104 |
+
logger.error(f"Attempt {attempt + 1} failed: {e}")
|
105 |
+
if attempt < self.max_retries - 1:
|
106 |
+
time.sleep(self.request_delay * (attempt + 1))
|
107 |
+
|
108 |
+
return None
|
109 |
|
110 |
def advanced_text_cleaning(self, text: str) -> str:
|
111 |
"""Robust text cleaning with version compatibility"""
|
|
|
127 |
return cleaned_text
|
128 |
except Exception as e:
|
129 |
logger.warning(f"Text cleaning error: {e}. Using fallback method.")
|
130 |
+
text = re.sub(r'[\x00-\x1F\x7F-\x9F]', '', text)
|
131 |
text = text.encode('ascii', 'ignore').decode('ascii')
|
132 |
text = re.sub(r'\s+', ' ', text)
|
133 |
return text.strip()
|
|
|
226 |
class FileProcessor:
|
227 |
"""Class to handle file processing"""
|
228 |
|
229 |
+
def __init__(self, max_file_size: int = 2 * 1024 * 1024 * 1024): # 2GB default
|
230 |
self.max_file_size = max_file_size
|
231 |
self.supported_text_extensions = {'.txt', '.md', '.csv', '.json', '.xml'}
|
232 |
|
|
|
235 |
try:
|
236 |
mime_type, _ = mimetypes.guess_type(filepath)
|
237 |
return (mime_type and mime_type.startswith('text/')) or \
|
238 |
+
(os.path.splitext(filepath)[1].lower() in self.supported_text_extensions)
|
239 |
except Exception:
|
240 |
return False
|
241 |
|
|
|
280 |
"source": "file",
|
281 |
"filename": filename,
|
282 |
"content": content,
|
283 |
+
"timestamp": datetime.now ().isoformat()
|
284 |
})
|
285 |
except Exception as e:
|
286 |
logger.error(f"Error reading file {filename}: {str(e)}")
|
|
|
319 |
logger.error(f"File processing error: {e}")
|
320 |
return []
|
321 |
|
|
|
322 |
def clean_json(data: Union[str, Dict]) -> Optional[Dict]:
|
323 |
"""Clean and validate JSON data"""
|
324 |
try:
|
|
|
335 |
logger.error(f"Unexpected error while cleaning JSON: {e}")
|
336 |
return None
|
337 |
|
|
|
338 |
def generate_qr_code(data: Union[str, Dict], combined: bool = True) -> List[str]:
|
339 |
"""Generate QR code(s) from data"""
|
340 |
try:
|
|
|
342 |
output_dir.mkdir(parents=True, exist_ok=True)
|
343 |
|
344 |
if combined:
|
345 |
+
cleaned_data = clean_json(data)
|
346 |
if cleaned_data:
|
347 |
qr = qrcode.QRCode(
|
348 |
version=None,
|
|
|
359 |
img.save(str(output_path))
|
360 |
return [str(output_path)]
|
361 |
else:
|
|
|
362 |
if isinstance(data, list):
|
363 |
+
paths = []
|
364 |
for idx, item in enumerate(data):
|
365 |
+
cleaned_item = clean_json(item)
|
366 |
if cleaned_item:
|
367 |
qr = qrcode.QRCode(
|
368 |
version=None,
|
|
|
378 |
output_path = output_dir / f'item_{idx}_qr_{int(time.time())}.png'
|
379 |
img.save(str(output_path))
|
380 |
paths.append(str(output_path))
|
381 |
+
return paths
|
382 |
else:
|
383 |
+
cleaned_item = clean_json(data)
|
384 |
if cleaned_item:
|
385 |
qr = qrcode.QRCode(
|
386 |
version=None,
|
|
|
392 |
qr.add_data(json_str)
|
393 |
qr.make(fit=True)
|
394 |
|
395 |
+
img = qrcode.make_image(fill_color="black", back_color="white")
|
396 |
output_path = output_dir / f'single_qr_{int(time.time())}.png'
|
397 |
img.save(str(output_path))
|
398 |
+
return [str(output_path)]
|
399 |
+
|
400 |
+
return []
|
401 |
except Exception as e:
|
402 |
logger.error(f"QR generation error: {e}")
|
403 |
return []
|
404 |
|
405 |
+
def decode_qr_code(image_path: str) -> Optional[str]:
|
406 |
+
"""Decode QR code from an image file using ZXing"""
|
407 |
+
try:
|
408 |
+
reader = zxing.BarCodeReader()
|
409 |
+
result = reader.decode(image_path)
|
410 |
+
|
411 |
+
if result and result.parsed:
|
412 |
+
return result.parsed
|
413 |
+
logger.warning("No QR code found in image")
|
414 |
+
return None
|
415 |
+
except Exception as e:
|
416 |
+
logger.error(f"QR decoding error: {e}")
|
417 |
+
return None
|
418 |
+
|
419 |
+
def decode_qr(image) -> List[str]:
|
420 |
+
"""Decode all QR codes found in an image using ZXing"""
|
421 |
+
try:
|
422 |
+
if isinstance(image, str):
|
423 |
+
image_path = image
|
424 |
+
else:
|
425 |
+
# Save temporary image if input is not a path
|
426 |
+
with tempfile.NamedTemporaryFile(suffix='.png', delete=False) as tmp:
|
427 |
+
Image.fromarray(image).save(tmp.name)
|
428 |
+
image_path = tmp.name
|
429 |
+
|
430 |
+
reader = zxing.BarCodeReader()
|
431 |
+
result = reader.decode(image_path)
|
432 |
+
|
433 |
+
if result and result.parsed:
|
434 |
+
return [result.parsed]
|
435 |
+
return []
|
436 |
+
except Exception as e:
|
437 |
+
logger.error(f"QR decoding error: {e}")
|
438 |
+
return []
|
439 |
+
|
440 |
+
raise ValueError("Unable to decode QR code")
|
441 |
+
except Exception as e:
|
442 |
+
logger.error(f"QR decoding error: {e}")
|
443 |
+
return None, None # Return None for both data and resolution in case of error
|
444 |
|
445 |
def datachat_trained(data_input: str, query: str) -> str:
|
446 |
"""Handle trained data interaction logic"""
|
447 |
+
data = clean_json(data_input)
|
448 |
if not data:
|
449 |
return "Invalid JSON data provided."
|
450 |
return f"[Trained Mode]\nData: {json.dumps(data, indent=2)}\nQuery: {query}"
|
451 |
|
452 |
def datachat_simple(data_input: str, query: str) -> str:
|
453 |
"""Handle simple chat interaction logic"""
|
454 |
+
data = clean_json(data_input)
|
455 |
if not data:
|
456 |
return "Invalid JSON data provided."
|
457 |
return f"[Chat Mode]\nData: {json.dumps(data, indent=2)}\nQuestion: {query}"
|
|
|
459 |
def datachat_interface(mode: str, data_source: str, json_input: str, qr_image: str, query: str) -> str:
|
460 |
"""Interface for DataChat functionality"""
|
461 |
data = None
|
462 |
+
resolution = None # Initialize resolution variable
|
463 |
if data_source == "JSON Input":
|
464 |
data = json_input
|
465 |
elif data_source == "QR Code":
|
466 |
+
try:
|
467 |
+
decoded_data, resolution = decode_qr_code(qr_image) # Get both data and resolution
|
468 |
+
data = decoded_data
|
469 |
+
except Exception as e:
|
470 |
+
return f"Invalid QR code data provided: {e}"
|
471 |
else:
|
472 |
return "No valid data source selected."
|
473 |
|
|
|
496 |
json_input = gr.Textbox(lines=8, label="JSON Data")
|
497 |
qr_image = gr.Image(label="QR Code Image", type="filepath")
|
498 |
query = gr.Textbox(label="Query")
|
499 |
+
|
500 |
submit_btn = gr.Button("Submit")
|
501 |
output = gr.Textbox(label="Response")
|
502 |
+
|
503 |
submit_btn.click(datachat_interface, [mode, data_source, json_input, qr_image, query], output)
|
504 |
|
505 |
with gr.Tab("QR Generator"):
|
506 |
qr_input = gr.Textbox(lines=8, label="Input JSON for QR")
|
507 |
generate_btn = gr.Button("Generate QR")
|
508 |
qr_output = gr.Image(label="Generated QR Code")
|
509 |
+
|
510 |
+
def generate_qr(json_data):
|
511 |
+
data = clean_json(json_data)
|
512 |
+
if data:
|
513 |
+
return generate_qr_code(data)
|
514 |
+
return None
|
515 |
+
|
516 |
+
generate_btn.click(generate_qr, qr_input, qr_output)
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
517 |
|
518 |
return interface
|
519 |
|
|
|
521 |
mimetypes.init()
|
522 |
Path('output/qr_codes').mkdir(parents=True, exist_ok=True)
|
523 |
interface = create_interface()
|
524 |
+
interface.launch(
|
525 |
+
server_name="0.0.0.0",
|
526 |
+
server_port=7860,
|
527 |
+
show_error=True,
|
528 |
+
share=False,
|
529 |
+
inbrowser=True,
|
530 |
+
debug=True
|
531 |
+
)
|