Update summarizer/summarizer.py
Browse files- summarizer/summarizer.py +26 -12
summarizer/summarizer.py
CHANGED
@@ -1,10 +1,9 @@
|
|
1 |
import os
|
2 |
import requests
|
3 |
-
from summarizer.translator import TextTranslator
|
4 |
-
|
5 |
class Summarizer:
|
6 |
def __init__(self):
|
7 |
-
self.
|
|
|
8 |
self.api_token = os.getenv("API_KEY")
|
9 |
if not self.api_token:
|
10 |
raise ValueError("Hugging Face API Key not found in environment variables.")
|
@@ -12,7 +11,6 @@ class Summarizer:
|
|
12 |
"Authorization": f"Bearer {self.api_token}",
|
13 |
"Content-Type": "application/json"
|
14 |
}
|
15 |
-
self.translator = TextTranslator()
|
16 |
def clean_text(self, text: str) -> str:
|
17 |
"""Remove unnecessary spaces and line breaks."""
|
18 |
return ' '.join(text.replace('\n', ' ').split())
|
@@ -27,33 +25,49 @@ class Summarizer:
|
|
27 |
if "antibiotic" in summary.lower() and "supportive" in summary.lower() and "treatment" not in summary.lower():
|
28 |
summary += " Treatment includes antibiotics and supportive care."
|
29 |
return summary
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
30 |
def summarize_text(self, text: str, target_lang: str = 'en') -> str:
|
31 |
"""Detect language, translate if needed, summarize, format, and retranslate if needed."""
|
32 |
try:
|
33 |
-
# Detect and translate input if not in English
|
34 |
-
detected_lang = self.translator.detect_language(text)
|
35 |
-
if detected_lang != 'en':
|
36 |
-
text = self.translator.translate_to_english(text)
|
37 |
cleaned_text = self.clean_text(text)
|
|
|
|
|
|
|
|
|
38 |
payload = {
|
39 |
"inputs": f"summarize the clinical case with diagnosis, comorbidities, and treatment plan: {cleaned_text}"
|
40 |
}
|
41 |
-
|
|
|
42 |
response.raise_for_status()
|
|
|
43 |
response_data = response.json()
|
44 |
-
|
45 |
if isinstance(response_data, list) and "generated_text" in response_data[0]:
|
46 |
summary = response_data[0]["generated_text"]
|
47 |
else:
|
48 |
return "Unexpected response format from Hugging Face API."
|
|
|
49 |
formatted_summary = self.format_summary(summary)
|
|
|
50 |
# Translate back to target language if needed
|
51 |
if target_lang != 'en':
|
52 |
-
formatted_summary = self.
|
|
|
53 |
return formatted_summary
|
|
|
54 |
except requests.exceptions.Timeout:
|
55 |
return "Summarization request timed out."
|
56 |
except requests.exceptions.RequestException as e:
|
57 |
return f"Summarization request failed: {str(e)}"
|
58 |
except Exception as e:
|
59 |
-
return f"An error occurred during summarization: {str(e)}"
|
|
|
1 |
import os
|
2 |
import requests
|
|
|
|
|
3 |
class Summarizer:
|
4 |
def __init__(self):
|
5 |
+
self.summarizer_api_url = "https://api-inference.huggingface.co/models/Aadityaramrame/carecompanion-summarizer"
|
6 |
+
self.translation_api_url = "https://carecompanion-summarizer.onrender.com/translate" # Replace with actual Render translation API URL
|
7 |
self.api_token = os.getenv("API_KEY")
|
8 |
if not self.api_token:
|
9 |
raise ValueError("Hugging Face API Key not found in environment variables.")
|
|
|
11 |
"Authorization": f"Bearer {self.api_token}",
|
12 |
"Content-Type": "application/json"
|
13 |
}
|
|
|
14 |
def clean_text(self, text: str) -> str:
|
15 |
"""Remove unnecessary spaces and line breaks."""
|
16 |
return ' '.join(text.replace('\n', ' ').split())
|
|
|
25 |
if "antibiotic" in summary.lower() and "supportive" in summary.lower() and "treatment" not in summary.lower():
|
26 |
summary += " Treatment includes antibiotics and supportive care."
|
27 |
return summary
|
28 |
+
def translate_text(self, text: str, target_lang: str) -> str:
|
29 |
+
"""Translate text using the Render translation API."""
|
30 |
+
try:
|
31 |
+
payload = {"text": text, "target_lang": target_lang}
|
32 |
+
response = requests.post(self.translation_api_url, headers=self.headers, json=payload)
|
33 |
+
response.raise_for_status()
|
34 |
+
response_data = response.json()
|
35 |
+
return response_data.get("translated_text", text)
|
36 |
+
except requests.exceptions.RequestException as e:
|
37 |
+
return f"Translation failed: {str(e)}"
|
38 |
def summarize_text(self, text: str, target_lang: str = 'en') -> str:
|
39 |
"""Detect language, translate if needed, summarize, format, and retranslate if needed."""
|
40 |
try:
|
|
|
|
|
|
|
|
|
41 |
cleaned_text = self.clean_text(text)
|
42 |
+
# Translate if not in English
|
43 |
+
if target_lang != 'en':
|
44 |
+
cleaned_text = self.translate_text(cleaned_text, "en")
|
45 |
+
# Summarize using Hugging Face API
|
46 |
payload = {
|
47 |
"inputs": f"summarize the clinical case with diagnosis, comorbidities, and treatment plan: {cleaned_text}"
|
48 |
}
|
49 |
+
|
50 |
+
response = requests.post(self.summarizer_api_url, headers=self.headers, json=payload, timeout=30)
|
51 |
response.raise_for_status()
|
52 |
+
|
53 |
response_data = response.json()
|
54 |
+
|
55 |
if isinstance(response_data, list) and "generated_text" in response_data[0]:
|
56 |
summary = response_data[0]["generated_text"]
|
57 |
else:
|
58 |
return "Unexpected response format from Hugging Face API."
|
59 |
+
|
60 |
formatted_summary = self.format_summary(summary)
|
61 |
+
|
62 |
# Translate back to target language if needed
|
63 |
if target_lang != 'en':
|
64 |
+
formatted_summary = self.translate_text(formatted_summary, target_lang)
|
65 |
+
|
66 |
return formatted_summary
|
67 |
+
|
68 |
except requests.exceptions.Timeout:
|
69 |
return "Summarization request timed out."
|
70 |
except requests.exceptions.RequestException as e:
|
71 |
return f"Summarization request failed: {str(e)}"
|
72 |
except Exception as e:
|
73 |
+
return f"An error occurred during summarization: {str(e)}"
|