Spaces:
Sleeping
Sleeping
Commit
·
1bf0035
1
Parent(s):
3949ea1
progress more (back to 3.21)
Browse files
app.py
CHANGED
|
@@ -133,42 +133,43 @@ def fuzzy_deduplicate(df, column, threshold=65):
|
|
| 133 |
seen_texts.append(text)
|
| 134 |
indices_to_keep.append(i)
|
| 135 |
return df.iloc[indices_to_keep]
|
|
|
|
|
|
|
| 136 |
def translate_text(llm, text):
|
| 137 |
try:
|
| 138 |
-
# Debug print
|
| 139 |
-
st.write(f"Debug - Model type: {type(llm)}")
|
| 140 |
-
st.write(f"Debug - Model attributes: {dir(llm)}")
|
| 141 |
-
|
| 142 |
-
messages = [
|
| 143 |
-
{"role": "system", "content": "You are a translator. Translate the given Russian text to English accurately and concisely."},
|
| 144 |
-
{"role": "user", "content": f"Translate this Russian text to English: {text}"}
|
| 145 |
-
]
|
| 146 |
-
|
| 147 |
-
# For different model types, we'll use different approaches
|
| 148 |
if isinstance(llm, ChatOpenAI):
|
| 149 |
-
|
| 150 |
-
|
| 151 |
-
|
| 152 |
-
|
| 153 |
-
|
| 154 |
-
|
| 155 |
-
|
| 156 |
-
|
| 157 |
-
|
| 158 |
-
|
| 159 |
-
|
| 160 |
-
|
| 161 |
-
|
| 162 |
-
except Exception as e:
|
| 163 |
-
st.error(f"Translation API error: {str(e)}")
|
| 164 |
-
return text
|
| 165 |
else:
|
| 166 |
-
|
| 167 |
-
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 168 |
|
| 169 |
except Exception as e:
|
| 170 |
st.error(f"Translation error: {str(e)}")
|
| 171 |
-
return text
|
|
|
|
|
|
|
| 172 |
|
| 173 |
def init_langchain_llm(model_choice):
|
| 174 |
try:
|
|
@@ -190,22 +191,19 @@ def init_langchain_llm(model_choice):
|
|
| 190 |
st.stop()
|
| 191 |
|
| 192 |
return ChatOpenAI(
|
| 193 |
-
model="gpt-
|
| 194 |
openai_api_key=st.secrets['openai_key'],
|
| 195 |
temperature=0.0
|
| 196 |
)
|
| 197 |
|
| 198 |
-
else: #
|
| 199 |
-
|
| 200 |
-
|
| 201 |
-
|
| 202 |
-
|
| 203 |
-
|
| 204 |
-
base_url="https://integrate.api.nvidia.com/v1",
|
| 205 |
-
model="nvidia/llama-3.1-nemotron-70b-instruct",
|
| 206 |
-
openai_api_key=st.secrets['nvapi'],
|
| 207 |
-
temperature=0.0
|
| 208 |
)
|
|
|
|
| 209 |
|
| 210 |
except Exception as e:
|
| 211 |
st.error(f"Error initializing the LLM: {str(e)}")
|
|
@@ -476,12 +474,12 @@ def create_output_file(df, uploaded_file, llm):
|
|
| 476 |
|
| 477 |
def main():
|
| 478 |
with st.sidebar:
|
| 479 |
-
st.title("::: AI-анализ мониторинга новостей (v.3.
|
| 480 |
st.subheader("по материалам СКАН-ИНТЕРФАКС ")
|
| 481 |
|
| 482 |
model_choice = st.radio(
|
| 483 |
"Выберите модель для анализа:",
|
| 484 |
-
["Groq (llama-3.1-70b)", "ChatGPT-4-mini", "
|
| 485 |
key="model_selector"
|
| 486 |
)
|
| 487 |
|
|
|
|
| 133 |
seen_texts.append(text)
|
| 134 |
indices_to_keep.append(i)
|
| 135 |
return df.iloc[indices_to_keep]
|
| 136 |
+
|
| 137 |
+
|
| 138 |
def translate_text(llm, text):
|
| 139 |
try:
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 140 |
if isinstance(llm, ChatOpenAI):
|
| 141 |
+
# Handle OpenAI-compatible API calls (Groq, OpenAI)
|
| 142 |
+
messages = [
|
| 143 |
+
{"role": "system", "content": "You are a translator. Translate the given Russian text to English accurately and concisely."},
|
| 144 |
+
{"role": "user", "content": f"Translate this Russian text to English: {text}"}
|
| 145 |
+
]
|
| 146 |
+
response = llm.invoke(messages)
|
| 147 |
+
|
| 148 |
+
if hasattr(response, 'content'):
|
| 149 |
+
return response.content.strip()
|
| 150 |
+
elif isinstance(response, str):
|
| 151 |
+
return response.strip()
|
| 152 |
+
else:
|
| 153 |
+
return str(response).strip()
|
|
|
|
|
|
|
|
|
|
| 154 |
else:
|
| 155 |
+
# For Qwen pipeline
|
| 156 |
+
messages = [
|
| 157 |
+
{"role": "system", "content": "You are a translator. Translate the given Russian text to English accurately and concisely."},
|
| 158 |
+
{"role": "user", "content": f"Translate this Russian text to English: {text}"}
|
| 159 |
+
]
|
| 160 |
+
|
| 161 |
+
# Generate response using pipeline
|
| 162 |
+
response = llm(messages, max_length=512, num_return_sequences=1)[0]['generated_text']
|
| 163 |
+
|
| 164 |
+
# Extract the relevant part of the response (after the prompt)
|
| 165 |
+
response_text = response.split("English:")[-1].strip()
|
| 166 |
+
return response_text
|
| 167 |
|
| 168 |
except Exception as e:
|
| 169 |
st.error(f"Translation error: {str(e)}")
|
| 170 |
+
return text
|
| 171 |
+
|
| 172 |
+
|
| 173 |
|
| 174 |
def init_langchain_llm(model_choice):
|
| 175 |
try:
|
|
|
|
| 191 |
st.stop()
|
| 192 |
|
| 193 |
return ChatOpenAI(
|
| 194 |
+
model="gpt-4",
|
| 195 |
openai_api_key=st.secrets['openai_key'],
|
| 196 |
temperature=0.0
|
| 197 |
)
|
| 198 |
|
| 199 |
+
else: # Qwen model
|
| 200 |
+
# Initialize Qwen pipeline
|
| 201 |
+
pipe = pipeline(
|
| 202 |
+
"text-generation",
|
| 203 |
+
model="Qwen/Qwen2.5-7B-Instruct-GPTQ-Int8",
|
| 204 |
+
device_map="auto"
|
|
|
|
|
|
|
|
|
|
|
|
|
| 205 |
)
|
| 206 |
+
return pipe
|
| 207 |
|
| 208 |
except Exception as e:
|
| 209 |
st.error(f"Error initializing the LLM: {str(e)}")
|
|
|
|
| 474 |
|
| 475 |
def main():
|
| 476 |
with st.sidebar:
|
| 477 |
+
st.title("::: AI-анализ мониторинга новостей (v.3.21):::")
|
| 478 |
st.subheader("по материалам СКАН-ИНТЕРФАКС ")
|
| 479 |
|
| 480 |
model_choice = st.radio(
|
| 481 |
"Выберите модель для анализа:",
|
| 482 |
+
["Groq (llama-3.1-70b)", "ChatGPT-4-mini", "Qwen 2.5-7B (GPTQ-Int8)"],
|
| 483 |
key="model_selector"
|
| 484 |
)
|
| 485 |
|