Spaces:

habulaj
/

newapi-clone

Running

App Files Files Community

habulaj commited on 2 days ago

Commit

47a6536

verified ·

1 Parent(s): ca15e7a

Update routers/inference.py

Browse files

Files changed (1) hide show

routers/inference.py +119 -9

routers/inference.py CHANGED Viewed

@@ -10,6 +10,7 @@ from datetime import datetime
 from zoneinfo import ZoneInfo
 import locale
 import re
 # Configurar logging
 logger = logging.getLogger(__name__)
@@ -105,15 +106,118 @@ def get_brazilian_date_string():
 def download_sources_file(url: str) -> str:
     """
-    Baixa o arquivo fontes.txt da URL fornecida.
     """
-    try:
-        response = requests.get(url, timeout=30)
-        response.raise_for_status()
-        return response.text
-    except Exception as e:
-        logger.error(f"Erro ao baixar arquivo de fontes: {e}")
-        raise HTTPException(status_code=400, detail=f"Erro ao baixar arquivo de fontes: {str(e)}")
 def extract_text_from_response(response):
     """
@@ -187,8 +291,10 @@ async def rewrite_news(news: NewsRequest):
         if not api_key:
             raise HTTPException(status_code=500, detail="API key não configurada")
-        # Baixar arquivo de fontes
         sources_content = download_sources_file(news.sources_url)
         client = genai.Client(api_key=api_key)
         model = "gemini-2.5-pro"
@@ -287,6 +393,7 @@ News base: Ed Helms revealed in an interview that he was nervous about his paren
         ]
         # Gerar conteúdo
         response = client.models.generate_content(
             model=model,
             contents=contents,
@@ -296,6 +403,8 @@ News base: Ed Helms revealed in an interview that he was nervous about his paren
         # Extrair texto e fontes
         response_text = extract_text_from_response(response)
         sources = extract_sources_from_response(response)
         # Verificar se o texto está vazio
         if not response_text or response_text.strip() == "":
@@ -321,6 +430,7 @@ News base: Ed Helms revealed in an interview that he was nervous about his paren
             else:
                 content = "Conteúdo não encontrado"
         return NewsResponse(title=title, subhead=subhead, content=content, sources=sources)
     except HTTPException:

 from zoneinfo import ZoneInfo
 import locale
 import re
+import time
 # Configurar logging
 logger = logging.getLogger(__name__)
 def download_sources_file(url: str) -> str:
     """
+    Baixa o arquivo fontes.txt da URL fornecida com retry e headers apropriados.
     """
+    max_retries = 3
+    base_timeout = 45
+    # Headers que simulam um navegador real
+    headers = {
+        'User-Agent': 'Mozilla/5.0 (Windows NT 10.0; Win64; x64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/120.0.0.0 Safari/537.36',
+        'Accept': 'text/plain,text/html,application/xhtml+xml,application/xml;q=0.9,image/webp,*/*;q=0.8',
+        'Accept-Language': 'pt-BR,pt;q=0.9,en;q=0.8',
+        'Accept-Encoding': 'gzip, deflate, br',
+        'Connection': 'keep-alive',
+        'Upgrade-Insecure-Requests': '1',
+        'Sec-Fetch-Dest': 'document',
+        'Sec-Fetch-Mode': 'navigate',
+        'Sec-Fetch-Site': 'none',
+        'Cache-Control': 'max-age=0'
+    }
+    for attempt in range(max_retries):
+        try:
+            logger.info(f"Tentativa {attempt + 1} de download do arquivo: {url}")
+            # Timeout progressivo: 45s, 60s, 90s
+            timeout = base_timeout + (attempt * 15)
+            # Configuração de sessão com retry automático
+            session = requests.Session()
+            # Adapter com retry automático
+            from requests.adapters import HTTPAdapter
+            from urllib3.util.retry import Retry
+            retry_strategy = Retry(
+                total=2,
+                backoff_factor=1,
+                status_forcelist=[429, 500, 502, 503, 504],
+            )
+            adapter = HTTPAdapter(max_retries=retry_strategy)
+            session.mount("http://", adapter)
+            session.mount("https://", adapter)
+            # Fazer a requisição
+            response = session.get(
+                url,
+                headers=headers,
+                timeout=timeout,
+                allow_redirects=True,
+                stream=False  # Não usar stream para arquivos pequenos
+            )
+            response.raise_for_status()
+            content = response.text
+            logger.info(f"Download bem-sucedido na tentativa {attempt + 1}. Tamanho: {len(content)} caracteres")
+            # Validação básica do conteúdo
+            if len(content.strip()) < 10:
+                raise ValueError("Conteúdo do arquivo muito pequeno ou vazio")
+            # Verifica se é um JSON válido (assumindo que o arquivo contém JSON)
+            try:
+                json.loads(content)
+                logger.info("Arquivo JSON válido confirmado")
+            except json.JSONDecodeError:
+                logger.warning("Arquivo não é um JSON válido, mas continuando...")
+            return content
+        except requests.exceptions.Timeout as e:
+            logger.warning(f"Timeout na tentativa {attempt + 1}: {e}")
+            if attempt == max_retries - 1:
+                raise HTTPException(
+                    status_code=408,
+                    detail=f"Timeout ao baixar arquivo após {max_retries} tentativas. O servidor pode estar sobrecarregado."
+                )
+        except requests.exceptions.ConnectionError as e:
+            logger.warning(f"Erro de conexão na tentativa {attempt + 1}: {e}")
+            if attempt == max_retries - 1:
+                raise HTTPException(
+                    status_code=503,
+                    detail=f"Erro de conexão após {max_retries} tentativas. Verifique se a URL está correta: {url}"
+                )
+        except requests.exceptions.HTTPError as e:
+            status_code = e.response.status_code if e.response else 500
+            logger.error(f"Erro HTTP {status_code} na tentativa {attempt + 1}: {e}")
+            if status_code == 404:
+                raise HTTPException(status_code=404, detail="Arquivo não encontrado. Verifique se a URL está correta.")
+            elif status_code in [500, 502, 503, 504]:
+                if attempt == max_retries - 1:
+                    raise HTTPException(status_code=status_code, detail=f"Erro do servidor ({status_code}) após {max_retries} tentativas.")
+            else:
+                raise HTTPException(status_code=status_code, detail=f"Erro HTTP {status_code}: {str(e)}")
+        except ValueError as e:
+            logger.error(f"Erro de validação na tentativa {attempt + 1}: {e}")
+            raise HTTPException(status_code=422, detail=f"Conteúdo do arquivo inválido: {str(e)}")
+        except Exception as e:
+            logger.error(f"Erro inesperado na tentativa {attempt + 1}: {e}")
+            if attempt == max_retries - 1:
+                raise HTTPException(status_code=500, detail=f"Erro inesperado ao baixar arquivo: {str(e)}")
+        # Wait before retry (exponential backoff)
+        if attempt < max_retries - 1:
+            wait_time = (attempt + 1) * 2
+            logger.info(f"Aguardando {wait_time}s antes da próxima tentativa...")
+            time.sleep(wait_time)
 def extract_text_from_response(response):
     """
         if not api_key:
             raise HTTPException(status_code=500, detail="API key não configurada")
+        # Baixar arquivo de fontes com retry melhorado
+        logger.info(f"Iniciando download do arquivo de fontes: {news.sources_url}")
         sources_content = download_sources_file(news.sources_url)
+        logger.info("Download do arquivo de fontes concluído com sucesso")
         client = genai.Client(api_key=api_key)
         model = "gemini-2.5-pro"
         ]
         # Gerar conteúdo
+        logger.info("Iniciando geração de conteúdo com Gemini...")
         response = client.models.generate_content(
             model=model,
             contents=contents,
         # Extrair texto e fontes
         response_text = extract_text_from_response(response)
         sources = extract_sources_from_response(response)
+        logger.info("Conteúdo gerado com sucesso pelo Gemini")
         # Verificar se o texto está vazio
         if not response_text or response_text.strip() == "":
             else:
                 content = "Conteúdo não encontrado"
+        logger.info("Processamento concluído com sucesso")
         return NewsResponse(title=title, subhead=subhead, content=content, sources=sources)
     except HTTPException: