openfree commited on
Commit
c0cc8cf
·
verified ·
1 Parent(s): f3d6c8c

Update app.py

Browse files
Files changed (1) hide show
  1. app.py +32 -36
app.py CHANGED
@@ -2872,49 +2872,45 @@ if __name__ == "__main__":
2872
  return value.strip() if value else None
2873
  return None
2874
 
 
 
2875
  def _parse_character_profile(self, content: str, role: str) -> CharacterProfile:
2876
- """Parse character profile from content"""
2877
- # Debug logging
2878
  logger.debug(f"Parsing character profile for role: {role}")
2879
  logger.debug(f"Content preview: {content[:200]}...")
2880
-
2881
- # Extract name first - handle various formats
2882
- name = f"Character_{role}" # default
2883
  name_patterns = [
2884
- r'(?:이름|Name)[:\s]*([^,\n]+?)(?:\s*\([^)]+\))?\s*',
2885
- r'^\s*[-*•]\s*([^,\n]+?)(?:\s*\([^)]+\))?\s*',
2886
- r'^([^,\n]+?)(?:\s*\([^)]+\))?\s*'
2887
  ]
2888
-
2889
- for pattern in name_patterns:
2890
- name_match = re.search(pattern, content, re.IGNORECASE | re.MULTILINE)
2891
- if name_match and name_match.group(1):
2892
- extracted_name = name_match.group(1).strip()
2893
- # Remove markdown and extra characters
2894
- extracted_name = re.sub(r'[*:\s]+, '', extracted_name)
2895
- extracted_name = re.sub(r'^[*:\s]+', '', extracted_name)
2896
- if extracted_name and len(extracted_name) > 1:
2897
- name = extracted_name
2898
  break
2899
-
2900
- # Helper function to extract clean fields
2901
- def extract_clean_field(patterns):
2902
- if isinstance(patterns, str):
2903
- patterns = [patterns]
2904
-
2905
- for pattern in patterns:
2906
- # Improved pattern with better capturing groups
2907
- match = re.search(rf'{pattern}[:\s]*([^\n*]+?)(?=\n|$)', content, re.IGNORECASE | re.DOTALL)
2908
- if match and match.group(1):
2909
- value = match.group(1).strip()
2910
- # Clean up the value
2911
- value = re.sub(r'^[-*•:\s]+', '', value)
2912
- value = re.sub(r'[*]+', '', value)
2913
- value = re.sub(r'\s+', ' ', value)
2914
- if value:
2915
- return value
2916
  return ""
2917
-
2918
  # Extract all fields with safer extraction
2919
  profile = CharacterProfile(
2920
  name=name,
 
2872
  return value.strip() if value else None
2873
  return None
2874
 
2875
+ from typing import List
2876
+
2877
  def _parse_character_profile(self, content: str, role: str) -> CharacterProfile:
2878
+ """Parse character profile from content and return CharacterProfile dataclass"""
2879
+ # --- 1. 로그 ---
2880
  logger.debug(f"Parsing character profile for role: {role}")
2881
  logger.debug(f"Content preview: {content[:200]}...")
2882
+
2883
+ # --- 2. 이름 추출 ---
2884
+ name = f"Character_{role}" # fallback
2885
  name_patterns = [
2886
+ r'(?:이름|Name)[:\s]*([^\n,(]+)', # 예: "이름: 홍길동"
2887
+ r'^\s*[-*•]\s*([^\n,(]+)', # 예: "- 홍길동"
2888
+ r'^([^\n,(]+)' # 문단 첫 단어
2889
  ]
2890
+ for pat in name_patterns:
2891
+ m = re.search(pat, content, re.IGNORECASE | re.MULTILINE)
2892
+ if m and m.group(1).strip():
2893
+ extracted = m.group(1).strip()
2894
+ # 마크다운 기호·불필요 문자 제거
2895
+ extracted = re.sub(r'[\*:\s]+', '', extracted)
2896
+ if len(extracted) > 1:
2897
+ name = extracted
 
 
2898
  break
2899
+
2900
+ # --- 3. 필드 추출 헬퍼 ---
2901
+ def extract_clean_field(patterns) -> str:
2902
+ patterns = [patterns] if isinstance(patterns, str) else patterns
2903
+ for p in patterns:
2904
+ m = re.search(rf'{p}[:\s]*([^\n*]+?)(?=\n|$)', content,
2905
+ re.IGNORECASE | re.DOTALL)
2906
+ if m and m.group(1).strip():
2907
+ val = m.group(1).strip()
2908
+ val = re.sub(r'^[-*•:\s]+', '', val)
2909
+ val = re.sub(r'\*+', '', val)
2910
+ val = re.sub(r'\s+', ' ', val)
2911
+ return val
 
 
 
 
2912
  return ""
2913
+
2914
  # Extract all fields with safer extraction
2915
  profile = CharacterProfile(
2916
  name=name,