Tesneem commited on
Commit
da5e58a
·
verified ·
1 Parent(s): a747844

Update app.py

Browse files
Files changed (1) hide show
  1. app.py +26 -9
app.py CHANGED
@@ -8,6 +8,7 @@
8
  # app.py
9
  import os
10
  import re
 
11
  import json
12
  from huggingface_hub import HfApi
13
  import streamlit as st
@@ -130,17 +131,33 @@ def init_vector_search() -> MongoDBAtlasVectorSearch:
130
  # return headers + questions
131
  def extract_with_llm(text: str) -> List[str]:
132
  client = InferenceClient(api_key=HF_TOKEN.strip())
133
- prompt = (
134
- "Extract a list of grant application headers and questions from the following text. "
135
- "Include section titles, prompts, or any questions that ask for a response. "
136
- "Return them as a numbered list.\n\n"
137
- f"{text[:3000]}" # Optional: limit input to avoid token overflow
138
- )
139
  try:
140
- response = client.text_generation(prompt=prompt, max_new_tokens=500)
141
- return [line.strip("-•1234567890. ") for line in response.split("\n") if line.strip()]
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
142
  except Exception as e:
143
- st.error("❌ Failed to extract questions with LLM")
144
  st.error(str(e))
145
  return []
146
 
 
8
  # app.py
9
  import os
10
  import re
11
+ from huggingface_hub import InferenceClient
12
  import json
13
  from huggingface_hub import HfApi
14
  import streamlit as st
 
131
  # return headers + questions
132
  def extract_with_llm(text: str) -> List[str]:
133
  client = InferenceClient(api_key=HF_TOKEN.strip())
 
 
 
 
 
 
134
  try:
135
+ response = client.chat.completions.create(
136
+ model="mistralai/Mistral-Nemo-Instruct-2407", # or "HuggingFaceH4/zephyr-7b-beta"
137
+ messages=[
138
+ {
139
+ "role": "system",
140
+ "content": "You are an assistant helping extract questions and headers from grant applications.",
141
+ },
142
+ {
143
+ "role": "user",
144
+ "content": (
145
+ "Please extract all the grant application headers and questions from the following text. "
146
+ "Include section titles, prompts, and any question-like content. Return them as a numbered list.\n\n"
147
+ f"{text[:3000]}"
148
+ ),
149
+ },
150
+ ],
151
+ temperature=0.2,
152
+ max_tokens=512,
153
+ )
154
+ return [
155
+ line.strip("•-1234567890. ").strip()
156
+ for line in response.choices[0].message.content.strip().split("\n")
157
+ if line.strip()
158
+ ]
159
  except Exception as e:
160
+ st.error("❌ LLM extraction failed")
161
  st.error(str(e))
162
  return []
163