Pedro Bento commited on
Commit
6de4240
·
1 Parent(s): 3e2bf63

Improved requests so that it can use different methods

Browse files
Files changed (2) hide show
  1. app.py +5 -4
  2. tdagent/tools/get_url_content.py +65 -25
app.py CHANGED
@@ -11,7 +11,7 @@ from tdagent.tools.get_domain_information import (
11
  geo_location_tool,
12
  scrap_subdomains_tool,
13
  )
14
- from tdagent.tools.get_url_content import gr_get_url_http_content
15
  from tdagent.tools.internal_company_user_search import gr_internal_company
16
  from tdagent.tools.lookup_company_cloud_account_information import (
17
  gr_lookup_company_cloud_account_information,
@@ -37,7 +37,7 @@ def _read_markdown_body_as_html(path: str = "README.md") -> str:
37
  if lines and lines[0].strip() == "---":
38
  for i in range(1, len(lines)):
39
  if lines[i].strip() == "---":
40
- lines = lines[i + 1 :] # skip metadata block
41
  break
42
 
43
  markdown_body = "".join(lines).strip()
@@ -52,7 +52,7 @@ class ToolInfo(NamedTuple):
52
 
53
 
54
  TOOLS = (
55
- ToolInfo("Get URL Content", gr_get_url_http_content),
56
  ToolInfo("Query AbuseIPDB", gr_query_abuseipdb),
57
  # Whois does not work from Spaces (port 43 blocked)
58
  # ToolInfo("Query WHOIS", gr_query_whois),
@@ -125,7 +125,8 @@ with (
125
  """,
126
  )
127
  gr.HTML(
128
- """<iframe width="560" height="315" src="https://youtube.com/embed/c7Yg_jOD6J0" frameborder="0" allowfullscreen></iframe>""", # noqa: E501
 
129
  )
130
 
131
  if __name__ == "__main__":
 
11
  geo_location_tool,
12
  scrap_subdomains_tool,
13
  )
14
+ from tdagent.tools.get_url_content import gr_get_url_http_content, gr_make_http_request
15
  from tdagent.tools.internal_company_user_search import gr_internal_company
16
  from tdagent.tools.lookup_company_cloud_account_information import (
17
  gr_lookup_company_cloud_account_information,
 
37
  if lines and lines[0].strip() == "---":
38
  for i in range(1, len(lines)):
39
  if lines[i].strip() == "---":
40
+ lines = lines[i + 1:] # skip metadata block
41
  break
42
 
43
  markdown_body = "".join(lines).strip()
 
52
 
53
 
54
  TOOLS = (
55
+ ToolInfo("Make an HTTP request to a URL with specified method and parameters", gr_make_http_request),
56
  ToolInfo("Query AbuseIPDB", gr_query_abuseipdb),
57
  # Whois does not work from Spaces (port 43 blocked)
58
  # ToolInfo("Query WHOIS", gr_query_whois),
 
125
  """,
126
  )
127
  gr.HTML(
128
+ """<iframe width="560" height="315" src="https://youtube.com/embed/c7Yg_jOD6J0" frameborder="0" allowfullscreen></iframe>""",
129
+ # noqa: E501
130
  )
131
 
132
  if __name__ == "__main__":
tdagent/tools/get_url_content.py CHANGED
@@ -1,66 +1,106 @@
1
  from collections.abc import Sequence
 
2
 
3
  import gradio as gr
4
  import requests
5
 
6
  from tdagent.constants import HttpContentType
7
 
 
 
8
 
9
- def get_url_http_content(
 
10
  url: str,
 
11
  content_type: Sequence[HttpContentType] | None = None,
 
12
  timeout: int = 30,
 
13
  ) -> tuple[str, str]:
14
- """Get the content of a URL using an HTTP GET request.
15
 
16
  Args:
17
- url: The URL to fetch the content from.
 
18
  content_type: If given it should contain the expected
19
- content types in the response body. The server may
20
- not honor the requested content types.
21
  timeout: Request timeout in seconds. Defaults to 30.
 
22
 
23
  Returns:
24
  A pair of strings (content, error_message). If there is an
25
- error getting content from the URL the `content` will be
26
- empty and `error_message` will, usually, contain the error
27
- cause. Otherwise, `error_message` will be empty and the
28
- content will be filled with data fetched from the URL.
29
  """
30
- headers = {}
31
 
32
  if content_type:
33
  headers["Accept"] = ",".join(content_type)
34
 
 
 
 
 
 
 
 
 
 
 
 
35
  try:
36
- response = requests.get(
37
- url,
38
- headers=headers,
39
- timeout=timeout,
40
- )
41
  except requests.exceptions.MissingSchema as err:
42
  return "", str(err)
 
 
43
 
44
  try:
45
  response.raise_for_status()
46
  except requests.HTTPError as err:
47
  return "", str(err)
48
 
 
 
 
 
49
  return response.text, ""
50
 
51
 
52
- gr_get_url_http_content = gr.Interface(
53
- fn=get_url_http_content,
54
- inputs=[gr.Textbox(label="url"), gr.Textbox(label="content type")],
55
- outputs=gr.Text(label="content"),
56
- title="Get the content of a URL using an HTTP GET request.",
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
57
  description=(
58
- "Get the content of a URL in one of the specified content types."
59
- " The server may not honor the content type and if it fails the"
60
- " reason should also be returned with the corresponding HTTP"
61
- " error code. Be wary of retrieving the content of malicious urls."
 
62
  ),
63
  examples=[
64
- ["https://google.com", "html"],
 
 
65
  ],
66
  )
 
1
  from collections.abc import Sequence
2
+ from typing import Literal, Optional, Dict, Any
3
 
4
  import gradio as gr
5
  import requests
6
 
7
  from tdagent.constants import HttpContentType
8
 
9
+ # Define valid HTTP methods
10
+ HttpMethod = Literal["GET", "POST", "PUT", "DELETE", "PATCH", "HEAD"]
11
 
12
+
13
+ def make_http_request(
14
  url: str,
15
+ method: HttpMethod = "GET",
16
  content_type: Sequence[HttpContentType] | None = None,
17
+ body: Optional[str] = None,
18
  timeout: int = 30,
19
+ custom_headers: Optional[Dict[str, str]] = None
20
  ) -> tuple[str, str]:
21
+ """Make an HTTP request to a URL with specified method and parameters.
22
 
23
  Args:
24
+ url: The URL to make the request to.
25
+ method: HTTP method to use (GET, POST, PUT, DELETE, PATCH, HEAD).
26
  content_type: If given it should contain the expected
27
+ content types in the response body.
28
+ body: Request body for methods that support it (POST, PUT, PATCH).
29
  timeout: Request timeout in seconds. Defaults to 30.
30
+ custom_headers: Additional headers to include in the request.
31
 
32
  Returns:
33
  A pair of strings (content, error_message). If there is an
34
+ error making the request, the `content` will be empty and
35
+ `error_message` will contain the error cause. Otherwise,
36
+ `error_message` will be empty and the content will be filled
37
+ with data fetched from the URL.
38
  """
39
+ headers = custom_headers or {}
40
 
41
  if content_type:
42
  headers["Accept"] = ",".join(content_type)
43
 
44
+ # Prepare request parameters
45
+ request_params: Dict[str, Any] = {
46
+ "url": url,
47
+ "headers": headers,
48
+ "timeout": timeout,
49
+ }
50
+
51
+ # Add body for methods that support it
52
+ if method in ["POST", "PUT", "PATCH"] and body:
53
+ request_params["data"] = body
54
+
55
  try:
56
+ response = requests.request(method, **request_params)
 
 
 
 
57
  except requests.exceptions.MissingSchema as err:
58
  return "", str(err)
59
+ except requests.exceptions.RequestException as err:
60
+ return "", str(err)
61
 
62
  try:
63
  response.raise_for_status()
64
  except requests.HTTPError as err:
65
  return "", str(err)
66
 
67
+ # For HEAD requests, return headers as content
68
+ if method == "HEAD":
69
+ return str(dict(response.headers)), ""
70
+
71
  return response.text, ""
72
 
73
 
74
+ # Create the Gradio interface
75
+ gr_make_http_request = gr.Interface(
76
+ fn=make_http_request,
77
+ inputs=[
78
+ gr.Textbox(label="URL"),
79
+ gr.Dropdown(
80
+ choices=["GET", "POST", "PUT", "DELETE", "PATCH", "HEAD"],
81
+ label="HTTP Method",
82
+ value="GET"
83
+ ),
84
+ gr.Textbox(label="Content Type (comma-separated)"),
85
+ gr.Textbox(label="Request Body (for POST/PUT/PATCH)", lines=3),
86
+ gr.Number(label="Timeout (seconds)", value=30),
87
+ gr.Textbox(
88
+ label="Custom Headers (JSON format)",
89
+ placeholder='{"Authorization": "Bearer token"}'
90
+ )
91
+ ],
92
+ outputs=gr.Text(label="Response"),
93
+ title="Make HTTP Requests",
94
  description=(
95
+ "Make HTTP requests with different methods and parameters. "
96
+ "Supports GET, POST, PUT, DELETE, PATCH, and HEAD methods. "
97
+ "For POST, PUT, and PATCH requests, you can include a request body. "
98
+ "Custom headers can be added in JSON format. "
99
+ "Be cautious when accessing unknown URLs."
100
  ),
101
  examples=[
102
+ ["https://google.com", "GET", "text/html", "", 30, ""],
103
+ ["https://api.example.com/data", "POST", "application/json", '{"key": "value"}', 30,
104
+ '{"Authorization": "Bearer token"}'],
105
  ],
106
  )