Ggggggg

Paused

App Files Files Community

Hjgugugjhuhjggg commited on Feb 6

Commit

64be9ea

verified ·

1 Parent(s): 458211a

Update app.py

Browse files

Files changed (1) hide show

app.py +7 -4

app.py CHANGED Viewed

@@ -14,6 +14,7 @@ from duckduckgo_search import DDGS
 MODEL_NAME = "lilmeaty/my_xdd"
 global_model = None
 global_tokenizer = None
 async def cleanup_memory(device: str):
     gc.collect()
@@ -41,7 +42,7 @@ app = FastAPI()
 @app.on_event("startup")
 async def load_global_model():
-    global global_model, global_tokenizer
     config = AutoConfig.from_pretrained(MODEL_NAME)
     global_tokenizer = AutoTokenizer.from_pretrained(MODEL_NAME, config=config)
     global_model = AutoModelForCausalLM.from_pretrained(MODEL_NAME, config=config, torch_dtype=torch.float16)
@@ -49,6 +50,8 @@ async def load_global_model():
         global_tokenizer.pad_token_id = config.pad_token_id or global_tokenizer.eos_token_id
     device = "cuda" if torch.cuda.is_available() else "cpu"
     global_model.to(device)
     print(f"Modelo {MODEL_NAME} cargado correctamente en {device}.")
 async def perform_duckasgo_search(query: str, max_results: int = 3) -> str:
@@ -68,7 +71,7 @@ async def perform_duckasgo_search(query: str, max_results: int = 3) -> str:
     return summary
 async def stream_text(request: GenerateRequest, device: str):
-    global global_model, global_tokenizer
     encoded_input = global_tokenizer(request.input_text, return_tensors="pt").to(device)
     input_ids = encoded_input.input_ids
     accumulated_text = ""
@@ -119,7 +122,7 @@ async def stream_text(request: GenerateRequest, device: str):
             chunk_token_count = 0
         await asyncio.sleep(0)
         input_ids = next_token
-        if token_id == global_tokenizer.eos_token_id:
             break
     if current_chunk:
         yield current_chunk
@@ -130,7 +133,7 @@ async def stream_text(request: GenerateRequest, device: str):
 @app.post("/generate")
 async def generate_text(request: GenerateRequest):
-    global global_model, global_tokenizer
     if global_model is None or global_tokenizer is None:
         raise HTTPException(status_code=500, detail="El modelo no se ha cargado correctamente.")
     device = "cuda" if torch.cuda.is_available() else "cpu"

 MODEL_NAME = "lilmeaty/my_xdd"
 global_model = None
 global_tokenizer = None
+global_tokens = {}
 async def cleanup_memory(device: str):
     gc.collect()
 @app.on_event("startup")
 async def load_global_model():
+    global global_model, global_tokenizer, global_tokens
     config = AutoConfig.from_pretrained(MODEL_NAME)
     global_tokenizer = AutoTokenizer.from_pretrained(MODEL_NAME, config=config)
     global_model = AutoModelForCausalLM.from_pretrained(MODEL_NAME, config=config, torch_dtype=torch.float16)
         global_tokenizer.pad_token_id = config.pad_token_id or global_tokenizer.eos_token_id
     device = "cuda" if torch.cuda.is_available() else "cpu"
     global_model.to(device)
+    global_tokens["eos_token_id"] = global_tokenizer.eos_token_id
+    global_tokens["pad_token_id"] = global_tokenizer.pad_token_id
     print(f"Modelo {MODEL_NAME} cargado correctamente en {device}.")
 async def perform_duckasgo_search(query: str, max_results: int = 3) -> str:
     return summary
 async def stream_text(request: GenerateRequest, device: str):
+    global global_model, global_tokenizer, global_tokens
     encoded_input = global_tokenizer(request.input_text, return_tensors="pt").to(device)
     input_ids = encoded_input.input_ids
     accumulated_text = ""
             chunk_token_count = 0
         await asyncio.sleep(0)
         input_ids = next_token
+        if token_id == global_tokens["eos_token_id"]:
             break
     if current_chunk:
         yield current_chunk
 @app.post("/generate")
 async def generate_text(request: GenerateRequest):
+    global global_model, global_tokenizer, global_tokens
     if global_model is None or global_tokenizer is None:
         raise HTTPException(status_code=500, detail="El modelo no se ha cargado correctamente.")
     device = "cuda" if torch.cuda.is_available() else "cpu"