ahmed-eisa commited on
Commit
6d73c15
·
1 Parent(s): a537e5e

talk to web page app

Browse files
Files changed (6) hide show
  1. dependencies.py +15 -0
  2. main.py +10 -6
  3. requirements.txt +4 -1
  4. schemas.py +56 -0
  5. scrapper.py +36 -0
  6. utils.py +2 -1
dependencies.py ADDED
@@ -0,0 +1,15 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ from fastapi import Body
2
+ from loguru import logger
3
+
4
+ from schemas import TextModelRequest
5
+ from scraper import extract_urls, fetch_all
6
+
7
+ async def get_urls_content(body: TextModelRequest ) -> str:
8
+ urls = extract_urls(body.prompt)
9
+ if urls:
10
+ try:
11
+ urls_content = await fetch_all(urls)
12
+ return urls_content
13
+ except Exception as e:
14
+ logger.warning(f"Failed to fetch one or several URls - Error: {e}")
15
+ return ""
main.py CHANGED
@@ -1,5 +1,5 @@
1
  # main.py
2
- from fastapi import FastAPI,status,Response,Request
3
  from fastapi.responses import StreamingResponse,FileResponse
4
  from models import load_text_model,generate_text,load_audio_model,generate_audio,load_image_model, generate_image
5
  from schemas import VoicePresets
@@ -10,12 +10,15 @@ from uuid import uuid4
10
  import time
11
  from datetime import datetime, timezone
12
  import csv
 
 
13
 
14
  models = {}
15
 
16
  @asynccontextmanager
17
  async def lifespan(_: FastAPI) -> AsyncIterator[None]:
18
  # models["text2image"] = load_image_model()
 
19
  yield
20
  models.clear()
21
 
@@ -59,16 +62,17 @@ async def monitor_service(
59
 
60
 
61
  # app = FastAPI()
62
-
63
  @app.get("/")
64
  def root_controller():
65
  return {"status": "healthy"}
66
 
67
  @app.get("/generate/text")
68
- def serve_language_model_controller(prompt: str) -> str:
69
- pipe = load_text_model()
70
- output = generate_text(pipe, prompt)
71
- return output
 
 
72
 
73
  @app.get("/logs")
74
  def get_logs():
 
1
  # main.py
2
+ from fastapi import FastAPI,status,Response,Request,Depends
3
  from fastapi.responses import StreamingResponse,FileResponse
4
  from models import load_text_model,generate_text,load_audio_model,generate_audio,load_image_model, generate_image
5
  from schemas import VoicePresets
 
10
  import time
11
  from datetime import datetime, timezone
12
  import csv
13
+ from dependencies import get_urls_content
14
+ from schemas import TextModelResponse,TextModelRequest
15
 
16
  models = {}
17
 
18
  @asynccontextmanager
19
  async def lifespan(_: FastAPI) -> AsyncIterator[None]:
20
  # models["text2image"] = load_image_model()
21
+ models["text"]=load_text_model()
22
  yield
23
  models.clear()
24
 
 
62
 
63
 
64
  # app = FastAPI()
 
65
  @app.get("/")
66
  def root_controller():
67
  return {"status": "healthy"}
68
 
69
  @app.get("/generate/text")
70
+ async def serve_language_model_controller(request: Request,
71
+ body: TextModelRequest ,
72
+ urls_content: str = Depends(get_urls_content)) -> TextModelResponse:
73
+ prompt = body.prompt + " " + urls_content
74
+ output = generate_text(models["text"], prompt, body.temperature)
75
+ return TextModelResponse(content=output, ip=request.client.host)
76
 
77
  @app.get("/logs")
78
  def get_logs():
requirements.txt CHANGED
@@ -6,4 +6,7 @@ pydantic
6
  bitsandbytes
7
  soundfile
8
  diffusers
9
- loguru
 
 
 
 
6
  bitsandbytes
7
  soundfile
8
  diffusers
9
+ loguru
10
+ beautifulsoup4
11
+ lxml
12
+ aiohttp
schemas.py CHANGED
@@ -1,3 +1,59 @@
1
  from typing import Literal
 
 
 
 
 
2
 
3
  VoicePresets = Literal["v2/en_speaker_1", "v2/en_speaker_9"]
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
  from typing import Literal
2
+ from datetime import datetime
3
+ from typing import Annotated, Literal
4
+ from uuid import uuid4
5
+ from pydantic import BaseModel, Field, HttpUrl, IPvAnyAddress, PositiveInt,AfterValidator,validate_call
6
+
7
 
8
  VoicePresets = Literal["v2/en_speaker_1", "v2/en_speaker_9"]
9
+
10
+ class ModelRequest(BaseModel):
11
+ prompt: Annotated[str, Field(min_length=1, max_length=10000)]
12
+
13
+
14
+ class ModelResponse(BaseModel):
15
+ request_id: Annotated[str, Field(default_factory=lambda: uuid4().hex)]
16
+ ip: Annotated[str, IPvAnyAddress] | None
17
+ content: Annotated[str | None, Field(min_length=0, max_length=10000)]
18
+ created_at: datetime = datetime.now()
19
+
20
+
21
+ class TextModelRequest(ModelRequest):
22
+ model: Literal["gpt-3.5-turbo", "gpt-4o"]
23
+ temperature: Annotated[float, Field(ge=0.0, le=1.0, default=0.0)]
24
+
25
+ class TextModelResponse(ModelResponse):
26
+ tokens: Annotated[int|None, Field(ge=0)]
27
+
28
+ ImageSize = Annotated[tuple[PositiveInt, PositiveInt], "Width and height of an image in pixels"]
29
+ SupportedModels = Annotated[
30
+ Literal["tinysd", "sd1.5"], "Supported Image Generation Models"
31
+ ]
32
+
33
+ @validate_call
34
+ def is_square_image(value: ImageSize) -> ImageSize:
35
+ if value[0] / value[1] != 1:
36
+ raise ValueError("Only square images are supported")
37
+ if value[0] not in [512, 1024]:
38
+ raise ValueError(f"Invalid output size: {value} - expected 512 or 1024")
39
+ return value
40
+
41
+ @validate_call
42
+ def is_valid_inference_step(
43
+ num_inference_steps: int, model: SupportedModels
44
+ ) -> int:
45
+ if model == "tinysd" and num_inference_steps > 2000:
46
+ raise ValueError(
47
+ "TinySD model cannot have more than 2000 inference steps"
48
+ )
49
+ return num_inference_steps
50
+
51
+ class ImageModelRequest(ModelRequest):
52
+ model: SupportedModels
53
+ output_size: ImageSize
54
+ num_inference_steps: Annotated[int, Field(ge=0, le=2000)] = 200
55
+
56
+ class ImageModelResponse(ModelResponse):
57
+ size: ImageSize
58
+ url: Annotated[str, HttpUrl] | None = None
59
+
scrapper.py ADDED
@@ -0,0 +1,36 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ import asyncio
2
+ import re
3
+
4
+ import aiohttp
5
+ from bs4 import BeautifulSoup
6
+ from loguru import logger
7
+
8
+ def extract_urls(text: str) -> list[str]:
9
+ url_pattern = r"(?P<url>https?:\/\/[^\s]+)"
10
+ urls = re.findall(url_pattern, text)
11
+ return urls
12
+
13
+
14
+ def parse_inner_text(html_string: str) -> str:
15
+ soup = BeautifulSoup(html_string, "lxml")
16
+ if content := soup.find("div", id="bodyContent"):
17
+ return content.get_text()
18
+ logger.warning("Could not parse the HTML content")
19
+ return ""
20
+
21
+
22
+ async def fetch(session: aiohttp.ClientSession, url: str) -> str:
23
+ async with session.get(url) as response:
24
+ html_string = await response.text()
25
+ return parse_inner_text(html_string)
26
+
27
+
28
+ async def fetch_all(urls: list[str]) -> str:
29
+ async with aiohttp.ClientSession() as session:
30
+ results = await asyncio.gather(
31
+ *[fetch(session, url) for url in urls], return_exceptions=True
32
+ )
33
+ success_results = [result for result in results if isinstance(result, str)]
34
+ if len(results) != len(success_results):
35
+ logger.warning("Some URLs could not be fetched")
36
+ return " ".join(success_results)
utils.py CHANGED
@@ -4,6 +4,7 @@ import numpy as np
4
  from PIL import Image
5
  from typing import Literal,TypeAlias
6
  import tiktoken
 
7
 
8
  def audio_array_to_buffer(audio_array: np.array, sample_rate: int) -> BytesIO:
9
  buffer = BytesIO()
@@ -42,7 +43,7 @@ def calculate_usage_costs(
42
  if model not in price_table:
43
  # raise at runtime - in case someone ignores type errors
44
  raise ValueError(f"Cost calculation is not supported for {model} model.")
45
- price = price_table[model] 9
46
  req_costs = price * count_tokens(prompt) / 1000
47
  res_costs = price * count_tokens(response) / 1000
48
  total_costs = req_costs + res_costs
 
4
  from PIL import Image
5
  from typing import Literal,TypeAlias
6
  import tiktoken
7
+ from loguru import logger
8
 
9
  def audio_array_to_buffer(audio_array: np.array, sample_rate: int) -> BytesIO:
10
  buffer = BytesIO()
 
43
  if model not in price_table:
44
  # raise at runtime - in case someone ignores type errors
45
  raise ValueError(f"Cost calculation is not supported for {model} model.")
46
+ price = price_table[model]
47
  req_costs = price * count_tokens(prompt) / 1000
48
  res_costs = price * count_tokens(response) / 1000
49
  total_costs = req_costs + res_costs