Spaces:
Running
Running
Update app.py
Browse files
app.py
CHANGED
@@ -18,7 +18,7 @@ import pandas as pd
|
|
18 |
import PyPDF2 # For handling PDF files
|
19 |
from collections import Counter
|
20 |
|
21 |
-
from openai import OpenAI, APIError, APITimeoutError
|
22 |
from gradio_client import Client
|
23 |
from kaggle.api.kaggle_api_extended import KaggleApi
|
24 |
import tempfile
|
@@ -30,10 +30,32 @@ import pyarrow.parquet as pq
|
|
30 |
from sklearn.feature_extraction.text import TfidfVectorizer
|
31 |
from sklearn.metrics.pairwise import cosine_similarity
|
32 |
|
33 |
-
# βββ
|
34 |
import httpx
|
35 |
from httpx import RemoteProtocolError
|
36 |
-
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
37 |
|
38 |
# βββββββββββββββββββββββββββββββ Environment Variables / Constants βββββββββββββββββββββββββ
|
39 |
|
@@ -49,12 +71,9 @@ if not (KAGGLE_USERNAME and KAGGLE_KEY):
|
|
49 |
os.environ["KAGGLE_USERNAME"] = KAGGLE_USERNAME
|
50 |
os.environ["KAGGLE_KEY"] = KAGGLE_KEY
|
51 |
|
52 |
-
# βΆ μΌλΆ νλ‘μμμ HTTP/2 β 1.1 κ°μ μ νμ΄ νμν λ νμ±ν
|
53 |
-
# os.environ["HTTPX_FORCE_HTTP1"] = "true"
|
54 |
-
|
55 |
BRAVE_ENDPOINT = "https://api.search.brave.com/res/v1/web/search"
|
56 |
IMAGE_API_URL = "http://211.233.58.201:7896" # μμ μ΄λ―Έμ§ μμ±μ© API
|
57 |
-
MAX_TOKENS =
|
58 |
|
59 |
# βββββββββββββββββββββββββββββββ Logging βββββββββββββββββββββββββββββββ
|
60 |
logging.basicConfig(
|
@@ -2038,21 +2057,22 @@ def process_example(topic):
|
|
2038 |
def process_input(prompt: str, uploaded_files):
|
2039 |
"""
|
2040 |
λ©μΈ μ±ν
μ
λ ₯μ λ°μ λμμΈ/λ°λͺ
μμ΄λμ΄λ₯Ό μμ±νλ€.
|
2041 |
-
μ€νΈλ¦¬λ°
|
2042 |
-
|
2043 |
"""
|
|
|
2044 |
if not any(m["role"] == "user" and m["content"] == prompt for m in st.session_state.messages):
|
2045 |
st.session_state.messages.append({"role": "user", "content": prompt})
|
2046 |
with st.chat_message("user"):
|
2047 |
st.markdown(prompt)
|
2048 |
|
2049 |
-
# λμΌ ν둬ννΈ-μλ΅ μ€λ³΅ λ°©μ§
|
2050 |
for i in range(len(st.session_state.messages) - 1):
|
2051 |
if (st.session_state.messages[i]["role"] == "user"
|
2052 |
and st.session_state.messages[i]["content"] == prompt
|
2053 |
and st.session_state.messages[i + 1]["role"] == "assistant"):
|
2054 |
return
|
2055 |
|
|
|
2056 |
with st.chat_message("assistant"):
|
2057 |
status = st.status("Preparing to generate invention ideasβ¦")
|
2058 |
stream_placeholder = st.empty()
|
@@ -2064,8 +2084,6 @@ def process_input(prompt: str, uploaded_files):
|
|
2064 |
|
2065 |
selected_cat = st.session_state.get("category_focus", None)
|
2066 |
selected_frameworks = st.session_state.get("selected_frameworks", [])
|
2067 |
-
|
2068 |
-
# λͺ©μ μ΄ "λμμΈ/λ°λͺ
"μ΄λ―λ‘, system prompt λ³κ²½
|
2069 |
sys_prompt = get_idea_system_prompt(
|
2070 |
selected_category=selected_cat,
|
2071 |
selected_frameworks=selected_frameworks
|
@@ -2080,9 +2098,9 @@ def process_input(prompt: str, uploaded_files):
|
|
2080 |
use_kaggle = st.session_state.kaggle_enabled
|
2081 |
has_uploaded = bool(uploaded_files)
|
2082 |
|
2083 |
-
search_content = kaggle_content = file_content = mil_content = None
|
2084 |
|
2085 |
-
# β
|
2086 |
if use_web_search:
|
2087 |
status.update(label="Searching the webβ¦")
|
2088 |
with st.spinner("Searchingβ¦"):
|
@@ -2117,7 +2135,7 @@ def process_input(prompt: str, uploaded_files):
|
|
2117 |
with st.spinner("Processing filesβ¦"):
|
2118 |
file_content = process_uploaded_files(uploaded_files)
|
2119 |
|
2120 |
-
# β£ κ΅°μ¬ μ μ λ°μ΄ν°
|
2121 |
if is_military_query(prompt):
|
2122 |
status.update(label="Searching military tactics datasetβ¦")
|
2123 |
with st.spinner("Loading military insightsβ¦"):
|
@@ -2132,13 +2150,13 @@ def process_input(prompt: str, uploaded_files):
|
|
2132 |
f"**Defense Reasoning:** {row['defense_reasoning']}\n\n---\n"
|
2133 |
)
|
2134 |
|
2135 |
-
#
|
2136 |
user_content = prompt
|
2137 |
-
for
|
2138 |
-
if
|
2139 |
-
user_content += "\n\n" +
|
2140 |
|
2141 |
-
# λ΄λΆ λΆμ
|
2142 |
status.update(label="λΆμ μ€β¦")
|
2143 |
decision_purpose = identify_decision_purpose(prompt)
|
2144 |
relevance_scores = compute_relevance_scores(prompt, PHYS_CATEGORIES)
|
@@ -2173,7 +2191,7 @@ def process_input(prompt: str, uploaded_files):
|
|
2173 |
for c, s in decision_purpose['constraints']:
|
2174 |
purpose_info += f"- **{c}** (κ΄λ ¨μ±: {s})\n"
|
2175 |
|
2176 |
-
#
|
2177 |
framework_contents = []
|
2178 |
for fw in selected_frameworks:
|
2179 |
if fw == "swot":
|
@@ -2188,7 +2206,6 @@ def process_input(prompt: str, uploaded_files):
|
|
2188 |
framework_contents.append(
|
2189 |
format_business_framework_analysis("bcg", analyze_with_bcg(prompt))
|
2190 |
)
|
2191 |
-
# sunzi λ± νμ μ μΆκ°
|
2192 |
|
2193 |
if framework_contents:
|
2194 |
user_content += "\n\n## (Optional) κΈ°ν νλ μμν¬ λΆμ\n\n" + "\n\n".join(framework_contents)
|
@@ -2203,15 +2220,11 @@ def process_input(prompt: str, uploaded_files):
|
|
2203 |
{"role": "user", "content": user_content},
|
2204 |
]
|
2205 |
|
2206 |
-
#
|
2207 |
-
# β¬οΈ 1. μμ ν μ€νΈλ¦¬λ° νΈμΆ (backoff μ¬μλ)
|
2208 |
@backoff.on_exception(
|
2209 |
-
|
2210 |
-
(RemoteProtocolError, APITimeoutError, APIError),
|
2211 |
-
max_tries=3,
|
2212 |
-
jitter=None
|
2213 |
)
|
2214 |
-
def
|
2215 |
return client.chat.completions.create(
|
2216 |
model="gpt-4.1-mini",
|
2217 |
messages=api_messages,
|
@@ -2222,13 +2235,13 @@ def process_input(prompt: str, uploaded_files):
|
|
2222 |
)
|
2223 |
|
2224 |
try:
|
2225 |
-
stream =
|
2226 |
for chunk in stream:
|
2227 |
if chunk.choices and chunk.choices[0].delta.content:
|
2228 |
full_response += chunk.choices[0].delta.content
|
2229 |
stream_placeholder.markdown(full_response + "β")
|
2230 |
except (RemoteProtocolError, APITimeoutError, APIError) as stream_err:
|
2231 |
-
logging.warning(f"
|
2232 |
resp = client.chat.completions.create(
|
2233 |
model="gpt-4.1-mini",
|
2234 |
messages=api_messages,
|
@@ -2239,11 +2252,10 @@ def process_input(prompt: str, uploaded_files):
|
|
2239 |
)
|
2240 |
full_response = resp.choices[0].message.content
|
2241 |
stream_placeholder.markdown(full_response)
|
2242 |
-
# βββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββ
|
2243 |
|
2244 |
status.update(label="Invention ideas created!", state="complete")
|
2245 |
|
2246 |
-
#
|
2247 |
img_data = img_caption = None
|
2248 |
if st.session_state.generate_image and full_response:
|
2249 |
match = re.search(r"###\s*μ΄λ―Έμ§\s*ν둬ννΈ\s*\n+([^\n]+)", full_response, re.I)
|
@@ -2256,7 +2268,7 @@ def process_input(prompt: str, uploaded_files):
|
|
2256 |
if img_data:
|
2257 |
st.image(img_data, caption=f"Visualized Concept β {img_caption}")
|
2258 |
|
2259 |
-
# μΈμ
λ©μμ§
|
2260 |
answer_msg = {"role": "assistant", "content": full_response}
|
2261 |
if img_data:
|
2262 |
answer_msg["image"] = img_data
|
@@ -2264,7 +2276,7 @@ def process_input(prompt: str, uploaded_files):
|
|
2264 |
st.session_state["_skip_dup_idx"] = len(st.session_state.messages)
|
2265 |
st.session_state.messages.append(answer_msg)
|
2266 |
|
2267 |
-
# λ€μ΄λ‘λ
|
2268 |
st.subheader("Download This Output")
|
2269 |
col_md, col_html = st.columns(2)
|
2270 |
col_md.download_button(
|
@@ -2292,6 +2304,7 @@ def process_input(prompt: str, uploaded_files):
|
|
2292 |
{"role": "assistant", "content": f"β οΈ μ€λ₯: {e}"}
|
2293 |
)
|
2294 |
|
|
|
2295 |
def main():
|
2296 |
idea_generator_app()
|
2297 |
|
|
|
18 |
import PyPDF2 # For handling PDF files
|
19 |
from collections import Counter
|
20 |
|
21 |
+
from openai import OpenAI, APIError, APITimeoutError
|
22 |
from gradio_client import Client
|
23 |
from kaggle.api.kaggle_api_extended import KaggleApi
|
24 |
import tempfile
|
|
|
30 |
from sklearn.feature_extraction.text import TfidfVectorizer
|
31 |
from sklearn.metrics.pairwise import cosine_similarity
|
32 |
|
33 |
+
# βββ λ€νΈμν¬ μμ νμ© λΌμ΄λΈλ¬λ¦¬ ββββββββββββββββββββββββββββββββββββββ
|
34 |
import httpx
|
35 |
from httpx import RemoteProtocolError
|
36 |
+
|
37 |
+
# βΈ backoff λͺ¨λμ΄ μμΌλ©΄ μ¦μμμ λ체 ꡬν
|
38 |
+
try:
|
39 |
+
import backoff
|
40 |
+
except ImportError: # β μ΅μ΄ μ€ν νκ²½μμ λ°μ
|
41 |
+
logging.warning("`backoff` λͺ¨λμ΄ μμ΄ κ°λ¨ λ체 λ°μ½λ μ΄ν°λ₯Ό μ¬μ©ν©λλ€.")
|
42 |
+
def _simple_backoff_on_exception(exc_tuple, max_tries=3, base=2):
|
43 |
+
def decorator(fn):
|
44 |
+
def wrapper(*args, **kwargs):
|
45 |
+
for attempt in range(1, max_tries + 1):
|
46 |
+
try:
|
47 |
+
return fn(*args, **kwargs)
|
48 |
+
except exc_tuple as e:
|
49 |
+
if attempt == max_tries:
|
50 |
+
raise
|
51 |
+
sleep = base ** attempt
|
52 |
+
logging.info(f"Retry {attempt}/{max_tries} after {sleep}s ({e})")
|
53 |
+
time.sleep(sleep)
|
54 |
+
return wrapper
|
55 |
+
return decorator
|
56 |
+
class _DummyBackoff:
|
57 |
+
on_exception = _simple_backoff_on_exception
|
58 |
+
backoff = _DummyBackoff() # λμΌ API μ 곡
|
59 |
|
60 |
# βββββββββββββββββββββββββββββββ Environment Variables / Constants βββββββββββββββββββββββββ
|
61 |
|
|
|
71 |
os.environ["KAGGLE_USERNAME"] = KAGGLE_USERNAME
|
72 |
os.environ["KAGGLE_KEY"] = KAGGLE_KEY
|
73 |
|
|
|
|
|
|
|
74 |
BRAVE_ENDPOINT = "https://api.search.brave.com/res/v1/web/search"
|
75 |
IMAGE_API_URL = "http://211.233.58.201:7896" # μμ μ΄λ―Έμ§ μμ±μ© API
|
76 |
+
MAX_TOKENS = 4096 # μμ ν ν ν° νλ
|
77 |
|
78 |
# βββββββββββββββββββββββββββββββ Logging βββββββββββββββββββββββββββββββ
|
79 |
logging.basicConfig(
|
|
|
2057 |
def process_input(prompt: str, uploaded_files):
|
2058 |
"""
|
2059 |
λ©μΈ μ±ν
μ
λ ₯μ λ°μ λμμΈ/λ°λͺ
μμ΄λμ΄λ₯Ό μμ±νλ€.
|
2060 |
+
μ€νΈλ¦¬λ° μ€ν¨(RemoteProtocolError λ±) μ backoff μ¬μλ ν
|
2061 |
+
μ΅μ’
μ μΌλ‘ non-stream νΈμΆλ‘ ν΄λ°±.
|
2062 |
"""
|
2063 |
+
# βββ λν κΈ°λ‘ μ€λ³΅ λ°©μ§ ββββββββββββββββββββββββββββββ
|
2064 |
if not any(m["role"] == "user" and m["content"] == prompt for m in st.session_state.messages):
|
2065 |
st.session_state.messages.append({"role": "user", "content": prompt})
|
2066 |
with st.chat_message("user"):
|
2067 |
st.markdown(prompt)
|
2068 |
|
|
|
2069 |
for i in range(len(st.session_state.messages) - 1):
|
2070 |
if (st.session_state.messages[i]["role"] == "user"
|
2071 |
and st.session_state.messages[i]["content"] == prompt
|
2072 |
and st.session_state.messages[i + 1]["role"] == "assistant"):
|
2073 |
return
|
2074 |
|
2075 |
+
# βββ κ²°κ³Ό μμ± βββββββββββββββββββββββββββββββββββββββ
|
2076 |
with st.chat_message("assistant"):
|
2077 |
status = st.status("Preparing to generate invention ideasβ¦")
|
2078 |
stream_placeholder = st.empty()
|
|
|
2084 |
|
2085 |
selected_cat = st.session_state.get("category_focus", None)
|
2086 |
selected_frameworks = st.session_state.get("selected_frameworks", [])
|
|
|
|
|
2087 |
sys_prompt = get_idea_system_prompt(
|
2088 |
selected_category=selected_cat,
|
2089 |
selected_frameworks=selected_frameworks
|
|
|
2098 |
use_kaggle = st.session_state.kaggle_enabled
|
2099 |
has_uploaded = bool(uploaded_files)
|
2100 |
|
2101 |
+
search_content = kaggle_content = file_content = mil_content = None
|
2102 |
|
2103 |
+
# β μΉ κ²μ
|
2104 |
if use_web_search:
|
2105 |
status.update(label="Searching the webβ¦")
|
2106 |
with st.spinner("Searchingβ¦"):
|
|
|
2135 |
with st.spinner("Processing filesβ¦"):
|
2136 |
file_content = process_uploaded_files(uploaded_files)
|
2137 |
|
2138 |
+
# β£ κ΅°μ¬ μ μ λ°μ΄ν°
|
2139 |
if is_military_query(prompt):
|
2140 |
status.update(label="Searching military tactics datasetβ¦")
|
2141 |
with st.spinner("Loading military insightsβ¦"):
|
|
|
2150 |
f"**Defense Reasoning:** {row['defense_reasoning']}\n\n---\n"
|
2151 |
)
|
2152 |
|
2153 |
+
# βββ μ μ μ½ν
μΈ κ΅¬μ± ββββββββββββββββββββββββββ
|
2154 |
user_content = prompt
|
2155 |
+
for extra in (search_content, kaggle_content, file_content, mil_content):
|
2156 |
+
if extra:
|
2157 |
+
user_content += "\n\n" + extra
|
2158 |
|
2159 |
+
# βββ λ΄λΆ λΆμ βββββββββββββββββββββββββββββββ
|
2160 |
status.update(label="λΆμ μ€β¦")
|
2161 |
decision_purpose = identify_decision_purpose(prompt)
|
2162 |
relevance_scores = compute_relevance_scores(prompt, PHYS_CATEGORIES)
|
|
|
2191 |
for c, s in decision_purpose['constraints']:
|
2192 |
purpose_info += f"- **{c}** (κ΄λ ¨μ±: {s})\n"
|
2193 |
|
2194 |
+
# βββ νλ μμν¬ λΆμ (μ΅μ
) ββββββββββββββββββββ
|
2195 |
framework_contents = []
|
2196 |
for fw in selected_frameworks:
|
2197 |
if fw == "swot":
|
|
|
2206 |
framework_contents.append(
|
2207 |
format_business_framework_analysis("bcg", analyze_with_bcg(prompt))
|
2208 |
)
|
|
|
2209 |
|
2210 |
if framework_contents:
|
2211 |
user_content += "\n\n## (Optional) κΈ°ν νλ μμν¬ λΆμ\n\n" + "\n\n".join(framework_contents)
|
|
|
2220 |
{"role": "user", "content": user_content},
|
2221 |
]
|
2222 |
|
2223 |
+
# βββ OpenAI Chat νΈμΆ (backoff μ¬μλ) βββββββββββββββββ
|
|
|
2224 |
@backoff.on_exception(
|
2225 |
+
(RemoteProtocolError, APITimeoutError, APIError), max_tries=3
|
|
|
|
|
|
|
2226 |
)
|
2227 |
+
def safe_stream():
|
2228 |
return client.chat.completions.create(
|
2229 |
model="gpt-4.1-mini",
|
2230 |
messages=api_messages,
|
|
|
2235 |
)
|
2236 |
|
2237 |
try:
|
2238 |
+
stream = safe_stream()
|
2239 |
for chunk in stream:
|
2240 |
if chunk.choices and chunk.choices[0].delta.content:
|
2241 |
full_response += chunk.choices[0].delta.content
|
2242 |
stream_placeholder.markdown(full_response + "β")
|
2243 |
except (RemoteProtocolError, APITimeoutError, APIError) as stream_err:
|
2244 |
+
logging.warning(f"μ€νΈλ¦¬λ° μ€ν¨, non-stream ν΄λ°±: {stream_err}")
|
2245 |
resp = client.chat.completions.create(
|
2246 |
model="gpt-4.1-mini",
|
2247 |
messages=api_messages,
|
|
|
2252 |
)
|
2253 |
full_response = resp.choices[0].message.content
|
2254 |
stream_placeholder.markdown(full_response)
|
|
|
2255 |
|
2256 |
status.update(label="Invention ideas created!", state="complete")
|
2257 |
|
2258 |
+
# βββ μ΄λ―Έμ§ μμ± ββββββββββββββββββββββββββββββββ
|
2259 |
img_data = img_caption = None
|
2260 |
if st.session_state.generate_image and full_response:
|
2261 |
match = re.search(r"###\s*μ΄λ―Έμ§\s*ν둬ννΈ\s*\n+([^\n]+)", full_response, re.I)
|
|
|
2268 |
if img_data:
|
2269 |
st.image(img_data, caption=f"Visualized Concept β {img_caption}")
|
2270 |
|
2271 |
+
# βββ μΈμ
λ©μμ§ μ μ₯ βββββββββββββββββββββββββββββ
|
2272 |
answer_msg = {"role": "assistant", "content": full_response}
|
2273 |
if img_data:
|
2274 |
answer_msg["image"] = img_data
|
|
|
2276 |
st.session_state["_skip_dup_idx"] = len(st.session_state.messages)
|
2277 |
st.session_state.messages.append(answer_msg)
|
2278 |
|
2279 |
+
# βββ λ€μ΄λ‘λ μ΅μ
ββββββββββββββββββββββββββββββ
|
2280 |
st.subheader("Download This Output")
|
2281 |
col_md, col_html = st.columns(2)
|
2282 |
col_md.download_button(
|
|
|
2304 |
{"role": "assistant", "content": f"β οΈ μ€λ₯: {e}"}
|
2305 |
)
|
2306 |
|
2307 |
+
|
2308 |
def main():
|
2309 |
idea_generator_app()
|
2310 |
|