Spaces:
Running
Running
Update app.py
Browse files
app.py
CHANGED
@@ -15,10 +15,10 @@ from typing import Iterator
|
|
15 |
|
16 |
import streamlit as st
|
17 |
import pandas as pd
|
18 |
-
import PyPDF2
|
19 |
from collections import Counter
|
20 |
|
21 |
-
from openai import OpenAI
|
22 |
from gradio_client import Client
|
23 |
from kaggle.api.kaggle_api_extended import KaggleApi
|
24 |
import tempfile
|
@@ -30,6 +30,11 @@ import pyarrow.parquet as pq
|
|
30 |
from sklearn.feature_extraction.text import TfidfVectorizer
|
31 |
from sklearn.metrics.pairwise import cosine_similarity
|
32 |
|
|
|
|
|
|
|
|
|
|
|
33 |
# βββββββββββββββββββββββββββββββ Environment Variables / Constants βββββββββββββββββββββββββ
|
34 |
|
35 |
OPENAI_API_KEY = os.getenv("OPENAI_API_KEY", "")
|
@@ -44,9 +49,12 @@ if not (KAGGLE_USERNAME and KAGGLE_KEY):
|
|
44 |
os.environ["KAGGLE_USERNAME"] = KAGGLE_USERNAME
|
45 |
os.environ["KAGGLE_KEY"] = KAGGLE_KEY
|
46 |
|
|
|
|
|
|
|
47 |
BRAVE_ENDPOINT = "https://api.search.brave.com/res/v1/web/search"
|
48 |
IMAGE_API_URL = "http://211.233.58.201:7896" # μμ μ΄λ―Έμ§ μμ±μ© API
|
49 |
-
MAX_TOKENS = 7999
|
50 |
|
51 |
# βββββββββββββββββββββββββββββββ Logging βββββββββββββββββββββββββββββββ
|
52 |
logging.basicConfig(
|
@@ -54,6 +62,7 @@ logging.basicConfig(
|
|
54 |
format="%(asctime)s - %(levelname)s - %(message)s"
|
55 |
)
|
56 |
|
|
|
57 |
# βββββββββββββββββββββββββββββββ κ΅°μ¬(λ°λ¦¬ν°λ¦¬) μ μ λ°μ΄ν°μ
λ‘λ βββββββββββββββββ
|
58 |
@st.cache_resource
|
59 |
def load_military_dataset():
|
@@ -2029,12 +2038,15 @@ def process_example(topic):
|
|
2029 |
def process_input(prompt: str, uploaded_files):
|
2030 |
"""
|
2031 |
λ©μΈ μ±ν
μ
λ ₯μ λ°μ λμμΈ/λ°λͺ
μμ΄λμ΄λ₯Ό μμ±νλ€.
|
|
|
|
|
2032 |
"""
|
2033 |
if not any(m["role"] == "user" and m["content"] == prompt for m in st.session_state.messages):
|
2034 |
st.session_state.messages.append({"role": "user", "content": prompt})
|
2035 |
with st.chat_message("user"):
|
2036 |
st.markdown(prompt)
|
2037 |
|
|
|
2038 |
for i in range(len(st.session_state.messages) - 1):
|
2039 |
if (st.session_state.messages[i]["role"] == "user"
|
2040 |
and st.session_state.messages[i]["content"] == prompt
|
@@ -2050,7 +2062,7 @@ def process_input(prompt: str, uploaded_files):
|
|
2050 |
client = get_openai_client()
|
2051 |
status.update(label="Initializing modelβ¦")
|
2052 |
|
2053 |
-
selected_cat
|
2054 |
selected_frameworks = st.session_state.get("selected_frameworks", [])
|
2055 |
|
2056 |
# λͺ©μ μ΄ "λμμΈ/λ°λͺ
"μ΄λ―λ‘, system prompt λ³κ²½
|
@@ -2068,9 +2080,7 @@ def process_input(prompt: str, uploaded_files):
|
|
2068 |
use_kaggle = st.session_state.kaggle_enabled
|
2069 |
has_uploaded = bool(uploaded_files)
|
2070 |
|
2071 |
-
search_content
|
2072 |
-
kaggle_content = None
|
2073 |
-
file_content = None
|
2074 |
|
2075 |
# β μΉκ²μ
|
2076 |
if use_web_search:
|
@@ -2108,7 +2118,6 @@ def process_input(prompt: str, uploaded_files):
|
|
2108 |
file_content = process_uploaded_files(uploaded_files)
|
2109 |
|
2110 |
# β£ κ΅°μ¬ μ μ λ°μ΄ν° (νμ μ)
|
2111 |
-
mil_content = None
|
2112 |
if is_military_query(prompt):
|
2113 |
status.update(label="Searching military tactics datasetβ¦")
|
2114 |
with st.spinner("Loading military insightsβ¦"):
|
@@ -2123,15 +2132,11 @@ def process_input(prompt: str, uploaded_files):
|
|
2123 |
f"**Defense Reasoning:** {row['defense_reasoning']}\n\n---\n"
|
2124 |
)
|
2125 |
|
|
|
2126 |
user_content = prompt
|
2127 |
-
|
2128 |
-
|
2129 |
-
|
2130 |
-
user_content += "\n\n" + kaggle_content
|
2131 |
-
if file_content:
|
2132 |
-
user_content += "\n\n" + file_content
|
2133 |
-
if mil_content:
|
2134 |
-
user_content += "\n\n" + mil_content
|
2135 |
|
2136 |
# λ΄λΆ λΆμ
|
2137 |
status.update(label="λΆμ μ€β¦")
|
@@ -2168,22 +2173,22 @@ def process_input(prompt: str, uploaded_files):
|
|
2168 |
for c, s in decision_purpose['constraints']:
|
2169 |
purpose_info += f"- **{c}** (κ΄λ ¨μ±: {s})\n"
|
2170 |
|
2171 |
-
# νλ μμν¬
|
2172 |
framework_contents = []
|
2173 |
for fw in selected_frameworks:
|
2174 |
if fw == "swot":
|
2175 |
-
|
2176 |
-
|
|
|
2177 |
elif fw == "porter":
|
2178 |
-
|
2179 |
-
|
|
|
2180 |
elif fw == "bcg":
|
2181 |
-
|
2182 |
-
|
2183 |
-
|
2184 |
-
|
2185 |
-
# (μ€μ λ‘λ λ³λ λ‘μ§μ΄ νμνλ μ¬κΈ°μ μλ΅)
|
2186 |
-
pass
|
2187 |
|
2188 |
if framework_contents:
|
2189 |
user_content += "\n\n## (Optional) κΈ°ν νλ μμν¬ λΆμ\n\n" + "\n\n".join(framework_contents)
|
@@ -2191,29 +2196,54 @@ def process_input(prompt: str, uploaded_files):
|
|
2191 |
user_content += f"\n\n## μΉ΄ν
κ³ λ¦¬ λ§€νΈλ¦μ€ λΆμ{purpose_info}\n{combos_table}"
|
2192 |
|
2193 |
status.update(label="Generating final design/invention ideasβ¦")
|
|
|
2194 |
api_messages = [
|
2195 |
{"role": "system", "content": sys_prompt},
|
2196 |
{"role": "system", "name": "category_db", "content": category_context(selected_cat)},
|
2197 |
{"role": "user", "content": user_content},
|
2198 |
]
|
2199 |
-
|
2200 |
-
|
2201 |
-
|
2202 |
-
|
2203 |
-
|
2204 |
-
|
2205 |
-
|
|
|
2206 |
)
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
2207 |
|
2208 |
-
|
2209 |
-
|
2210 |
-
|
2211 |
-
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
2212 |
|
2213 |
-
stream_placeholder.markdown(full_response)
|
2214 |
status.update(label="Invention ideas created!", state="complete")
|
2215 |
|
2216 |
-
# μ΄λ―Έμ§ μμ±
|
2217 |
img_data = img_caption = None
|
2218 |
if st.session_state.generate_image and full_response:
|
2219 |
match = re.search(r"###\s*μ΄λ―Έμ§\s*ν둬ννΈ\s*\n+([^\n]+)", full_response, re.I)
|
@@ -2226,6 +2256,7 @@ def process_input(prompt: str, uploaded_files):
|
|
2226 |
if img_data:
|
2227 |
st.image(img_data, caption=f"Visualized Concept β {img_caption}")
|
2228 |
|
|
|
2229 |
answer_msg = {"role": "assistant", "content": full_response}
|
2230 |
if img_data:
|
2231 |
answer_msg["image"] = img_data
|
|
|
15 |
|
16 |
import streamlit as st
|
17 |
import pandas as pd
|
18 |
+
import PyPDF2 # For handling PDF files
|
19 |
from collections import Counter
|
20 |
|
21 |
+
from openai import OpenAI, APIError, APITimeoutError # β¬
οΈ OpenAI μ€λ₯ν ν¬ν¨
|
22 |
from gradio_client import Client
|
23 |
from kaggle.api.kaggle_api_extended import KaggleApi
|
24 |
import tempfile
|
|
|
30 |
from sklearn.feature_extraction.text import TfidfVectorizer
|
31 |
from sklearn.metrics.pairwise import cosine_similarity
|
32 |
|
33 |
+
# βββ NEW : λ€νΈμν¬ μμ νμ© λΌμ΄λΈλ¬λ¦¬ ββββββββββββββββββββββββββββββ
|
34 |
+
import httpx
|
35 |
+
from httpx import RemoteProtocolError
|
36 |
+
import backoff # β¬
οΈ μλ μ¬μλ λ°μ½λ μ΄ν°
|
37 |
+
|
38 |
# βββββββββββββββββββββββββββββββ Environment Variables / Constants βββββββββββββββββββββββββ
|
39 |
|
40 |
OPENAI_API_KEY = os.getenv("OPENAI_API_KEY", "")
|
|
|
49 |
os.environ["KAGGLE_USERNAME"] = KAGGLE_USERNAME
|
50 |
os.environ["KAGGLE_KEY"] = KAGGLE_KEY
|
51 |
|
52 |
+
# βΆ μΌλΆ νλ‘μμμ HTTP/2 β 1.1 κ°μ μ νμ΄ νμν λ νμ±ν
|
53 |
+
# os.environ["HTTPX_FORCE_HTTP1"] = "true"
|
54 |
+
|
55 |
BRAVE_ENDPOINT = "https://api.search.brave.com/res/v1/web/search"
|
56 |
IMAGE_API_URL = "http://211.233.58.201:7896" # μμ μ΄λ―Έμ§ μμ±μ© API
|
57 |
+
MAX_TOKENS = 7999
|
58 |
|
59 |
# βββββββββββββββββββββββββββββββ Logging βββββββββββββββββββββββββββββββ
|
60 |
logging.basicConfig(
|
|
|
62 |
format="%(asctime)s - %(levelname)s - %(message)s"
|
63 |
)
|
64 |
|
65 |
+
|
66 |
# βββββββββββββββββββββββββββββββ κ΅°μ¬(λ°λ¦¬ν°λ¦¬) μ μ λ°μ΄ν°μ
λ‘λ βββββββββββββββββ
|
67 |
@st.cache_resource
|
68 |
def load_military_dataset():
|
|
|
2038 |
def process_input(prompt: str, uploaded_files):
|
2039 |
"""
|
2040 |
λ©μΈ μ±ν
μ
λ ₯μ λ°μ λμμΈ/λ°λͺ
μμ΄λμ΄λ₯Ό μμ±νλ€.
|
2041 |
+
μ€νΈλ¦¬λ° μ€λ¨(HTTPX RemoteProtocolError λ±) μ μλ μ¬μλ,
|
2042 |
+
μ΅μ’
μ€ν¨ μ non-stream νΈμΆλ‘ ν΄λ°±νλλ‘ κ°μ .
|
2043 |
"""
|
2044 |
if not any(m["role"] == "user" and m["content"] == prompt for m in st.session_state.messages):
|
2045 |
st.session_state.messages.append({"role": "user", "content": prompt})
|
2046 |
with st.chat_message("user"):
|
2047 |
st.markdown(prompt)
|
2048 |
|
2049 |
+
# λμΌ ν둬ννΈ-μλ΅ μ€λ³΅ λ°©μ§
|
2050 |
for i in range(len(st.session_state.messages) - 1):
|
2051 |
if (st.session_state.messages[i]["role"] == "user"
|
2052 |
and st.session_state.messages[i]["content"] == prompt
|
|
|
2062 |
client = get_openai_client()
|
2063 |
status.update(label="Initializing modelβ¦")
|
2064 |
|
2065 |
+
selected_cat = st.session_state.get("category_focus", None)
|
2066 |
selected_frameworks = st.session_state.get("selected_frameworks", [])
|
2067 |
|
2068 |
# λͺ©μ μ΄ "λμμΈ/λ°λͺ
"μ΄λ―λ‘, system prompt λ³κ²½
|
|
|
2080 |
use_kaggle = st.session_state.kaggle_enabled
|
2081 |
has_uploaded = bool(uploaded_files)
|
2082 |
|
2083 |
+
search_content = kaggle_content = file_content = mil_content = None # μ΄κΈ°ν
|
|
|
|
|
2084 |
|
2085 |
# β μΉκ²μ
|
2086 |
if use_web_search:
|
|
|
2118 |
file_content = process_uploaded_files(uploaded_files)
|
2119 |
|
2120 |
# β£ κ΅°μ¬ μ μ λ°μ΄ν° (νμ μ)
|
|
|
2121 |
if is_military_query(prompt):
|
2122 |
status.update(label="Searching military tactics datasetβ¦")
|
2123 |
with st.spinner("Loading military insightsβ¦"):
|
|
|
2132 |
f"**Defense Reasoning:** {row['defense_reasoning']}\n\n---\n"
|
2133 |
)
|
2134 |
|
2135 |
+
# μ¬μ©μ ν둬ννΈ + μ°Έμ‘°μλ£ κ²°ν©
|
2136 |
user_content = prompt
|
2137 |
+
for _extra in (search_content, kaggle_content, file_content, mil_content):
|
2138 |
+
if _extra:
|
2139 |
+
user_content += "\n\n" + _extra
|
|
|
|
|
|
|
|
|
|
|
2140 |
|
2141 |
# λ΄λΆ λΆμ
|
2142 |
status.update(label="λΆμ μ€β¦")
|
|
|
2173 |
for c, s in decision_purpose['constraints']:
|
2174 |
purpose_info += f"- **{c}** (κ΄λ ¨μ±: {s})\n"
|
2175 |
|
2176 |
+
# κΈ°ν νλ μμν¬
|
2177 |
framework_contents = []
|
2178 |
for fw in selected_frameworks:
|
2179 |
if fw == "swot":
|
2180 |
+
framework_contents.append(
|
2181 |
+
format_business_framework_analysis("swot", analyze_with_swot(prompt))
|
2182 |
+
)
|
2183 |
elif fw == "porter":
|
2184 |
+
framework_contents.append(
|
2185 |
+
format_business_framework_analysis("porter", analyze_with_porter(prompt))
|
2186 |
+
)
|
2187 |
elif fw == "bcg":
|
2188 |
+
framework_contents.append(
|
2189 |
+
format_business_framework_analysis("bcg", analyze_with_bcg(prompt))
|
2190 |
+
)
|
2191 |
+
# sunzi λ± νμ μ μΆκ°
|
|
|
|
|
2192 |
|
2193 |
if framework_contents:
|
2194 |
user_content += "\n\n## (Optional) κΈ°ν νλ μμν¬ λΆμ\n\n" + "\n\n".join(framework_contents)
|
|
|
2196 |
user_content += f"\n\n## μΉ΄ν
κ³ λ¦¬ λ§€νΈλ¦μ€ λΆμ{purpose_info}\n{combos_table}"
|
2197 |
|
2198 |
status.update(label="Generating final design/invention ideasβ¦")
|
2199 |
+
|
2200 |
api_messages = [
|
2201 |
{"role": "system", "content": sys_prompt},
|
2202 |
{"role": "system", "name": "category_db", "content": category_context(selected_cat)},
|
2203 |
{"role": "user", "content": user_content},
|
2204 |
]
|
2205 |
+
|
2206 |
+
# βββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββ
|
2207 |
+
# β¬οΈ 1. μμ ν μ€νΈλ¦¬λ° νΈμΆ (backoff μ¬μλ)
|
2208 |
+
@backoff.on_exception(
|
2209 |
+
backoff.expo,
|
2210 |
+
(RemoteProtocolError, APITimeoutError, APIError),
|
2211 |
+
max_tries=3,
|
2212 |
+
jitter=None
|
2213 |
)
|
2214 |
+
def safe_stream_chat():
|
2215 |
+
return client.chat.completions.create(
|
2216 |
+
model="gpt-4.1-mini",
|
2217 |
+
messages=api_messages,
|
2218 |
+
temperature=1,
|
2219 |
+
max_tokens=MAX_TOKENS,
|
2220 |
+
top_p=1,
|
2221 |
+
stream=True
|
2222 |
+
)
|
2223 |
|
2224 |
+
try:
|
2225 |
+
stream = safe_stream_chat()
|
2226 |
+
for chunk in stream:
|
2227 |
+
if chunk.choices and chunk.choices[0].delta.content:
|
2228 |
+
full_response += chunk.choices[0].delta.content
|
2229 |
+
stream_placeholder.markdown(full_response + "β")
|
2230 |
+
except (RemoteProtocolError, APITimeoutError, APIError) as stream_err:
|
2231 |
+
logging.warning(f"π‘ μ€νΈλ¦¬λ° μ€λ¨, non-streamμΌλ‘ ν΄λ°±: {stream_err}")
|
2232 |
+
resp = client.chat.completions.create(
|
2233 |
+
model="gpt-4.1-mini",
|
2234 |
+
messages=api_messages,
|
2235 |
+
temperature=1,
|
2236 |
+
max_tokens=MAX_TOKENS,
|
2237 |
+
top_p=1,
|
2238 |
+
stream=False
|
2239 |
+
)
|
2240 |
+
full_response = resp.choices[0].message.content
|
2241 |
+
stream_placeholder.markdown(full_response)
|
2242 |
+
# ββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββοΏ½οΏ½οΏ½
|
2243 |
|
|
|
2244 |
status.update(label="Invention ideas created!", state="complete")
|
2245 |
|
2246 |
+
# μλ μ΄λ―Έμ§ μμ±
|
2247 |
img_data = img_caption = None
|
2248 |
if st.session_state.generate_image and full_response:
|
2249 |
match = re.search(r"###\s*μ΄λ―Έμ§\s*ν둬ννΈ\s*\n+([^\n]+)", full_response, re.I)
|
|
|
2256 |
if img_data:
|
2257 |
st.image(img_data, caption=f"Visualized Concept β {img_caption}")
|
2258 |
|
2259 |
+
# μΈμ
λ©μμ§ μ
λ°μ΄νΈ
|
2260 |
answer_msg = {"role": "assistant", "content": full_response}
|
2261 |
if img_data:
|
2262 |
answer_msg["image"] = img_data
|