Spaces:

ginigen
/

Sign-language

Building

App Files Files Community

ginipick commited on Jan 26

Commit

c082dde

verified ·

1 Parent(s): aa40f07

Update src/main.py

Browse files

Files changed (1) hide show

src/main.py +66 -62

src/main.py CHANGED Viewed

@@ -21,37 +21,33 @@ def clean_quotes(text):
     """따옴표 정리 함수"""
     # 연속된 따옴표 제거
     text = re.sub(r"'+", "'", text)
-    # 단어 중간의 따옴표 제거
-    text = re.sub(r"(\w)'(\w)", r"\1\2", text)
     return text
 def normalize_quotes(text):
     """따옴표 형식을 정규화하는 함수"""
     # 먼저 모든 따옴표를 정리
     text = clean_quotes(text)
-    # 한글 또는 영어 단어를 찾아서 처리
-    pattern = r'([가-힣A-Za-z]+)'
-    def process_match(match):
-        word = match.group(1)
-        # 이미 따옴표로 둘러싸인 경우는 처리하지 않음
-        if not re.match(r"'.*'", word):
-            return f"'{word}'"
-        return word
-    # 단어 단위로 처리
-    words = text.split()
-    processed_words = []
-    for word in words:
-        if re.search(pattern, word):
-            # 이미 따옴표가 있는 경우는 그대로 두고, 없는 경우만 추가
-            if not word.startswith("'") and not word.endswith("'"):
-                word = f"'{word}'"
-        processed_words.append(word)
-    return ' '.join(processed_words)
 def find_quoted_words(text):
     """작은따옴표로 묶인 단어들을 찾는 함수"""
@@ -63,8 +59,13 @@ def spell_out_word(word):
 def is_english(text):
     """텍스트가 영어인지 확인하는 함수"""
-    english_pattern = re.compile(r'^[A-Za-z\s\'".,!?-]+$')
-    return bool(english_pattern.match(text.replace("'", "")))
 def translate_korean_to_english(text):
     """전체 텍스트 번역 함수"""
@@ -74,55 +75,57 @@ def translate_korean_to_english(text):
         # 영어 입력 확인
         if is_english(text):
             return text
-        # 따옴표로 묶인 단어들 찾기
         quoted_words = re.findall(r"'([^']*)'", text)
-        translated_quoted = {}
-        # 따옴표 안의 단어들 먼저 번역
-        for word in quoted_words:
-            if not word.strip():  # 빈 문자열 건너뛰기
-                continue
-            url = "https://translate.googleapis.com/translate_a/single"
-            params = {
-                "client": "gtx",
-                "sl": "ko",
-                "tl": "en",
-                "dt": "t",
-                "q": word
-            }
-            response = requests.get(url, params=params)
-            if response.status_code == 200:
-                translated = response.json()[0][0][0].upper()
-                translated_quoted[word] = translated
-                # 임시 마커로 대체
-                text = text.replace(f"'{word}'", f"QUOTED_{len(translated_quoted)}_")
-        # 전체 문장 번역
         params = {
             "client": "gtx",
             "sl": "ko",
             "tl": "en",
             "dt": "t",
-            "q": text
         }
         response = requests.get(url, params=params)
         if response.status_code == 200:
-            translated_text = ' '.join(item[0] for item in response.json()[0] if item[0])
-            # 번역된 텍스트에서 마커를 번역된 단어로 대체
-            for i, (original, translated) in enumerate(translated_quoted.items(), 1):
-                translated_text = translated_text.replace(f"QUOTED_{i}_", f"'{translated}'")
-            return translated_text
-        else:
-            raise Exception(f"Translation API returned status code: {response.status_code}")
     except Exception as e:
         print(f"Translation error: {e}")
         return text
 @app.route('/')
 def index():
     return render_template('index.html', title=app.config['TITLE'])
@@ -143,8 +146,9 @@ def result():
             if not english_text:
                 raise Exception("Translation failed")
-            # 따옴표로 묶인 단어 추출
-            quoted_words = [word.strip("'") for word in re.findall(r"'([^']*)'", english_text)]
             # ASL 변환을 위해 따옴표 제거
             clean_english = re.sub(r"'([^']*)'", r"\1", english_text)
@@ -157,7 +161,7 @@ def result():
             for word in words:
                 word_upper = word.upper()
-                if any(quoted.upper() == word_upper for quoted in quoted_words):
                     # 고유명사인 경우 철자를 하나씩 분리
                     spelled_word = spell_out_word(word)
                     processed_gloss.extend(['FINGERSPELL-START'] + spelled_word.split() + ['FINGERSPELL-END'])

     """따옴표 정리 함수"""
     # 연속된 따옴표 제거
     text = re.sub(r"'+", "'", text)
+    # 불필요한 공백 제거
+    text = re.sub(r'\s+', ' ', text).strip()
     return text
+def is_korean(text):
+    """한글이 포함되어 있는지 확인"""
+    return bool(re.search('[가-힣]', text))
 def normalize_quotes(text):
     """따옴표 형식을 정규화하는 함수"""
     # 먼저 모든 따옴표를 정리
     text = clean_quotes(text)
+    if is_korean(text):
+        # 한글 문장의 경우, 첫 번째 명사구만 따옴표로 처리
+        words = text.split()
+        first_word = words[0]
+        if not (first_word.startswith("'") and first_word.endswith("'")):
+            words[0] = f"'{first_word}'"
+        return ' '.join(words)
+    else:
+        # 영어 문장의 경우, 첫 단어만 따옴표로 처리
+        words = text.split()
+        if words:
+            if not (words[0].startswith("'") and words[0].endswith("'")):
+                words[0] = f"'{words[0]}'"
+        return ' '.join(words)
 def find_quoted_words(text):
     """작은따옴표로 묶인 단어들을 찾는 함수"""
 def is_english(text):
     """텍스트가 영어인지 확인하는 함수"""
+    # 따옴표와 기본 문장부호를 제거하고 영어 알파벳과 공백만 남김
+    cleaned_text = re.sub(r'[^A-Za-z\s]', '', text)
+    # 알파벳이 하나라도 있는지 확인
+    has_letters = bool(re.search('[A-Za-z]', cleaned_text))
+    # 알파벳과 공백 외의 문자가 없는지 확인
+    is_only_english = bool(re.match(r'^[A-Za-z\s]*$', cleaned_text))
+    return has_letters and is_only_english
 def translate_korean_to_english(text):
     """전체 텍스트 번역 함수"""
         # 영어 입력 확인
         if is_english(text):
+            # 영어 입력의 경우 첫 단어만 따옴표로 처리
+            words = text.split()
+            if words:
+                # 이미 따옴표가 있는 경우는 그대로, 없는 경우 추가
+                if not (words[0].startswith("'") and words[0].endswith("'")):
+                    words[0] = f"'{words[0]}'"
+                return ' '.join(words)
             return text
+        # 한글 입력 처리
+        # 따옴표로 묶인 단어 찾기
         quoted_words = re.findall(r"'([^']*)'", text)
+        if not quoted_words:
+            # 따옴표로 묶인 단어가 없는 경우, 첫 단어를 따옴표로 묶기
+            words = text.split()
+            if words:
+                text = f"'{words[0]}'" + text[len(words[0]):]
+                quoted_words = [words[0]]
+        # 첫 번째 따옴표 단어 번역
+        url = "https://translate.googleapis.com/translate_a/single"
         params = {
             "client": "gtx",
             "sl": "ko",
             "tl": "en",
             "dt": "t",
+            "q": quoted_words[0]
         }
         response = requests.get(url, params=params)
         if response.status_code == 200:
+            translated_word = response.json()[0][0][0].upper()
+            # 임시 마커로 대체
+            text = text.replace(f"'{quoted_words[0]}'", "QUOTED_WORD_MARKER")
+            # 전체 문장 번역
+            params["q"] = text
+            response = requests.get(url, params=params)
+            if response.status_code == 200:
+                translated_text = ' '.join(item[0] for item in response.json()[0] if item[0])
+                # 마커를 번역된 단어로 대체
+                translated_text = translated_text.replace("QUOTED_WORD_MARKER", f"'{translated_word}'")
+                return translated_text
+        return text
     except Exception as e:
         print(f"Translation error: {e}")
         return text
 @app.route('/')
 def index():
     return render_template('index.html', title=app.config['TITLE'])
             if not english_text:
                 raise Exception("Translation failed")
+            # 따옴표로 묶인 단어 추출 (첫 번째 단어만)
+            quoted_words = re.findall(r"'([^']*)'", english_text)
+            first_quoted_word = quoted_words[0] if quoted_words else None
             # ASL 변환을 위해 따옴표 제거
             clean_english = re.sub(r"'([^']*)'", r"\1", english_text)
             for word in words:
                 word_upper = word.upper()
+                if first_quoted_word and word_upper == first_quoted_word.upper():
                     # 고유명사인 경우 철자를 하나씩 분리
                     spelled_word = spell_out_word(word)
                     processed_gloss.extend(['FINGERSPELL-START'] + spelled_word.split() + ['FINGERSPELL-END'])