openfree commited on
Commit
79ca6ae
ยท
verified ยท
1 Parent(s): 2199c30

Update app.py

Browse files
Files changed (1) hide show
  1. app.py +86 -32
app.py CHANGED
@@ -1891,25 +1891,38 @@ You provide feedback that's critical yet encouraging."""
1891
 
1892
  def _parse_character_profile(self, content: str, role: str) -> CharacterProfile:
1893
  """Parse character profile from content"""
1894
- # Extract name first
1895
- name_match = re.search(r'(?:์ด๋ฆ„|Name)[:\s&]*\s*([^,\n]+?)(?:\s*\([^)]+\))?\s*,?\s*\d*์„ธ?', content, re.IGNORECASE)
1896
- if name_match:
1897
- name = name_match.group(1).strip()
1898
- # Remove markdown formatting
1899
- name = re.sub(r'\*+', '', name)
1900
- else:
1901
- name = f"Character_{role}"
 
 
 
1902
 
1903
- # Extract age with better parsing - handle various formats
 
 
 
 
 
 
 
 
 
 
 
1904
  age = 30 # default age
1905
-
1906
- # Try different patterns to extract age
1907
  age_patterns = [
1908
- r'(\d+)\s*์„ธ', # 17์„ธ
1909
- r'(\d+)\s*์‚ด', # 17์‚ด
1910
- r'(\d+)\s*years?\s*old', # 17 years old
1911
- r'[,\s]\s*(\d+)\s*[,\s]', # , 17 ,
1912
- r'\((\d+)\)', # (17)
 
1913
  ]
1914
 
1915
  for pattern in age_patterns:
@@ -1919,33 +1932,74 @@ You provide feedback that's critical yet encouraging."""
1919
  extracted_age = int(age_match.group(1))
1920
  if 10 <= extracted_age <= 100: # Reasonable age range
1921
  age = extracted_age
 
1922
  break
1923
  except ValueError:
1924
  continue
1925
 
1926
- # Extract other fields with cleaner extraction
1927
- def extract_clean_field(pattern):
1928
- match = re.search(rf'{pattern}[:\s]*([^\n*]+)', content, re.IGNORECASE)
1929
- if match:
1930
- value = match.group(1).strip()
1931
- # Remove markdown and trailing punctuation
1932
- value = re.sub(r'\*+', '', value)
1933
- value = re.sub(r'[,.:;]$', '', value)
1934
- return value.strip()
 
 
 
 
 
 
1935
  return ""
1936
 
1937
- return CharacterProfile(
 
1938
  name=name,
1939
  age=age,
1940
  role=role,
1941
- archetype=extract_clean_field(r"(?:์บ๋ฆญํ„ฐ ์•„ํฌํƒ€์ž…|Character Archetype|Archetype)"),
1942
- want=extract_clean_field(r"(?:WANT|์™ธ์  ๋ชฉํ‘œ)"),
1943
- need=extract_clean_field(r"(?:NEED|๋‚ด์  ํ•„์š”)"),
1944
- backstory=extract_clean_field(r"(?:๋ฐฑ์Šคํ† ๋ฆฌ|Backstory|ํ•ต์‹ฌ ์ƒ์ฒ˜)"),
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1945
  personality=self._extract_personality_traits(content),
1946
- speech_pattern=extract_clean_field(r"(?:๋งํˆฌ.*?ํŒจํ„ด|Speech Pattern)"),
1947
- character_arc=extract_clean_field(r"(?:์บ๋ฆญํ„ฐ ์•„ํฌ|Character Arc|Arc)")
 
 
 
 
 
 
 
 
 
 
1948
  )
 
 
 
1949
 
1950
  def _extract_field(self, content: str, field_pattern: str) -> Optional[str]:
1951
  """Extract field value from content with improved parsing"""
 
1891
 
1892
  def _parse_character_profile(self, content: str, role: str) -> CharacterProfile:
1893
  """Parse character profile from content"""
1894
+ # Debug logging
1895
+ logger.debug(f"Parsing character profile for role: {role}")
1896
+ logger.debug(f"Content preview: {content[:200]}...")
1897
+
1898
+ # Extract name first - handle various formats
1899
+ name = f"Character_{role}" # default
1900
+ name_patterns = [
1901
+ r'(?:์ด๋ฆ„|Name)[:\s]*([^,\n]+?)(?:\s*\([^)]+\))?\s*,?\s*\d*์„ธ?',
1902
+ r'^\s*[-*โ€ข]\s*([^,\n]+?)(?:\s*\([^)]+\))?\s*,?\s*\d*์„ธ?',
1903
+ r'^([^,\n]+?)(?:\s*\([^)]+\))?\s*,?\s*\d*์„ธ?'
1904
+ ]
1905
 
1906
+ for pattern in name_patterns:
1907
+ name_match = re.search(pattern, content, re.IGNORECASE | re.MULTILINE)
1908
+ if name_match:
1909
+ extracted_name = name_match.group(1).strip()
1910
+ # Remove markdown and extra characters
1911
+ extracted_name = re.sub(r'[*:\s]+$', '', extracted_name)
1912
+ extracted_name = re.sub(r'^[*:\s]+', '', extracted_name)
1913
+ if extracted_name and len(extracted_name) > 1:
1914
+ name = extracted_name
1915
+ break
1916
+
1917
+ # Extract age with multiple patterns
1918
  age = 30 # default age
 
 
1919
  age_patterns = [
1920
+ r'(\d+)\s*์„ธ',
1921
+ r'(\d+)\s*์‚ด',
1922
+ r',\s*(\d+)\s*[,\s]',
1923
+ r'\((\d+)\)',
1924
+ r'Age[:\s]*(\d+)',
1925
+ r'๋‚˜์ด[:\s]*(\d+)'
1926
  ]
1927
 
1928
  for pattern in age_patterns:
 
1932
  extracted_age = int(age_match.group(1))
1933
  if 10 <= extracted_age <= 100: # Reasonable age range
1934
  age = extracted_age
1935
+ logger.debug(f"Extracted age: {age}")
1936
  break
1937
  except ValueError:
1938
  continue
1939
 
1940
+ # Helper function to extract clean fields
1941
+ def extract_clean_field(patterns):
1942
+ if isinstance(patterns, str):
1943
+ patterns = [patterns]
1944
+
1945
+ for pattern in patterns:
1946
+ match = re.search(rf'{pattern}[:\s]*([^\n*]+?)(?=\n|$)', content, re.IGNORECASE | re.DOTALL)
1947
+ if match:
1948
+ value = match.group(1).strip()
1949
+ # Clean up the value
1950
+ value = re.sub(r'^[-*โ€ข:\s]+', '', value)
1951
+ value = re.sub(r'[*]+', '', value)
1952
+ value = re.sub(r'\s+', ' ', value)
1953
+ if value:
1954
+ return value
1955
  return ""
1956
 
1957
+ # Extract all fields
1958
+ profile = CharacterProfile(
1959
  name=name,
1960
  age=age,
1961
  role=role,
1962
+ archetype=extract_clean_field([
1963
+ r"์บ๋ฆญํ„ฐ ์•„ํฌํƒ€์ž…",
1964
+ r"Character Archetype",
1965
+ r"Archetype",
1966
+ r"์•„ํฌํƒ€์ž…"
1967
+ ]),
1968
+ want=extract_clean_field([
1969
+ r"WANT\s*\(์™ธ์  ๋ชฉํ‘œ\)",
1970
+ r"WANT",
1971
+ r"์™ธ์  ๋ชฉํ‘œ",
1972
+ r"External Goal"
1973
+ ]),
1974
+ need=extract_clean_field([
1975
+ r"NEED\s*\(๋‚ด์  ํ•„์š”\)",
1976
+ r"NEED",
1977
+ r"๋‚ด์  ํ•„์š”",
1978
+ r"Internal Need"
1979
+ ]),
1980
+ backstory=extract_clean_field([
1981
+ r"๋ฐฑ์Šคํ† ๋ฆฌ",
1982
+ r"Backstory",
1983
+ r"ํ•ต์‹ฌ ์ƒ์ฒ˜",
1984
+ r"Core Wound"
1985
+ ]),
1986
  personality=self._extract_personality_traits(content),
1987
+ speech_pattern=extract_clean_field([
1988
+ r"๋งํˆฌ.*?ํŒจํ„ด",
1989
+ r"Speech Pattern",
1990
+ r"์–ธ์–ด ํŒจํ„ด",
1991
+ r"๋งํˆฌ"
1992
+ ]),
1993
+ character_arc=extract_clean_field([
1994
+ r"์บ๋ฆญํ„ฐ ์•„ํฌ",
1995
+ r"Character Arc",
1996
+ r"Arc",
1997
+ r"๋ณ€ํ™”"
1998
+ ])
1999
  )
2000
+
2001
+ logger.debug(f"Parsed character: {profile.name}, age: {profile.age}")
2002
+ return profile
2003
 
2004
  def _extract_field(self, content: str, field_pattern: str) -> Optional[str]:
2005
  """Extract field value from content with improved parsing"""