File size: 5,031 Bytes
681e106
a6af380
 
 
 
 
 
 
 
 
681e106
 
 
962c5f0
 
 
 
681e106
 
 
a6af380
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
405abe1
a6af380
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
405abe1
a6af380
405abe1
a6af380
 
 
 
 
 
405abe1
 
 
 
 
 
a6af380
 
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
from fastapi import HTTPException
import requests
import re
from bs4 import BeautifulSoup
import os
import json

class ETSIDocFinder:
    def __init__(self):
        self.main_ftp_url = "https://docbox.etsi.org/SET"
        req_data = self.connect()
        print(req_data['message'])
        self.session = req_data['session']
    
    def connect(self):
        session = requests.Session()
        req = session.post("https://portal.etsi.org/ETSIPages/LoginEOL.ashx", verify=False, headers={"User-Agent": "Mozilla/5.0 (Windows NT 10.0; Win64; x64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/136.0.0.0 Safari/537.36"}, data=json.dumps({"username": os.environ.get("EOL_USER"), "password": os.environ.get("EOL_PASSWORD")}))
        if req.text == "Failed":
            return {"error": True, "session": session, "message": "Login failed ! Check your credentials"}
        return {"error": False, "session": session, "message": "Login successful"}
    
    def get_workgroup(self, doc: str):
        main_tsg = "SET-WG-R" if any(doc.startswith(kw) for kw in ["SETREQ", "SCPREQ"]) else "SET-WG-T" if any(doc.startswith(kw) for kw in ["SETTEC", "SCPTEC"]) else "SET" if any(doc.startswith(kw) for kw in ["SET", "SCP"]) else None
        if main_tsg is None:
            return None, None, None
        regex = re.search(r'\(([^)]+)\)', doc)
        workgroup = "20" + regex.group(1)
        return main_tsg, workgroup, doc

    def find_workgroup_url(self, main_tsg, workgroup):
        response = self.session.get(f"{self.main_ftp_url}/{main_tsg}/05-CONTRIBUTIONS", verify=False)
        soup = BeautifulSoup(response.text, 'html.parser')
        for item in soup.find_all("tr"):
            link = item.find("a")
            if link and workgroup in link.get_text():
                return f"{self.main_ftp_url}/{main_tsg}/05-CONTRIBUTIONS/{link.get_text()}"

        return f"{self.main_ftp_url}/{main_tsg}/05-CONTRIBUTIONS/{workgroup}"
    
    def get_docs_from_url(self, url):
        try:
            response = self.session.get(url, verify=False, timeout=15)
            soup = BeautifulSoup(response.text, "html.parser")
            return [item.get_text() for item in soup.select("tr td a")]
        except Exception as e:
            print(f"Error accessing {url}: {e}")
            return []
    
    def search_document(self, doc_id: str):
        original = doc_id
        
        main_tsg, workgroup, doc = self.get_workgroup(doc_id)
        urls = []
        if main_tsg:
            wg_url = self.find_workgroup_url(main_tsg, workgroup)
            print(wg_url)
            if wg_url:
                files = self.get_docs_from_url(wg_url)
                print(files)
                for f in files:
                    if doc in f.lower() or original in f:
                        print(f)
                        doc_url = f"{wg_url}/{f}"
                        urls.append(doc_url)
        return urls[0] if len(urls) == 1 else urls[-2] if len(urls) > 1 else f"Document {doc_id} not found"

class ETSISpecFinder:
    def __init__(self):
        self.main_url = "https://www.etsi.org/deliver/etsi_ts"
        self.second_url = "https://www.etsi.org/deliver/etsi_tr"
        self.headers = {"User-Agent": "Mozilla/5.0 (Windows NT 10.0; Win64; x64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/136.0.0.0 Safari/537.36"}
        
    def get_spec_path(self, doc_id: str):
        if "-" in doc_id:
            position, part = doc_id.split("-")
        else:
            position, part = doc_id, None
        
        position = position.replace(" ", "")
        if part:
            if len(part) == 1:
                part = "0" + part
        spec_folder = position + part if part is not None else position
        return f"{int(position) - (int(position)%100)}_{int(position) - (int(position)%100) + 99}/{spec_folder}"

    def get_docs_from_url(self, url):
        try:
            response = requests.get(url, verify=False, timeout=15)
            soup = BeautifulSoup(response.text, "html.parser")
            docs = [item.get_text() for item in soup.find_all("a")][1:]
            return docs
        except Exception as e:
            print(f"Error accessing {url}: {e}")
            return []
    
    def search_document(self, doc_id: str):
        # Example : 103 666[-2 opt]
        original = doc_id

        url = f"{self.main_url}/{self.get_spec_path(original)}/"
        url2 = f"{self.second_url}/{self.get_spec_path(original)}/"
        print(url)
        print(url2)
        
        releases = self.get_docs_from_url(url)
        files = self.get_docs_from_url(url + releases[-1])
        for f in files:
            if f.endswith(".pdf"):
                return url + releases[-1] + "/" + f
        
        releases = self.get_docs_from_url(url2)
        files = self.get_docs_from_url(url + releases[-1])
        for f in files:
            if f.endswith('.pdf'):
                return url + releases[-1] + "/" + f
                    
        return f"Specification {doc_id} not found"