ajnx014 commited on
Commit
80344cf
Β·
verified Β·
1 Parent(s): e6cd443

Update app.py

Browse files
Files changed (1) hide show
  1. app.py +43 -64
app.py CHANGED
@@ -9,13 +9,10 @@ from langchain_community.vectorstores import FAISS
9
  from langchain.chains import RetrievalQA
10
  from langchain.chat_models import ChatOpenAI
11
 
12
- # βœ… Read OpenRouter API key from Hugging Face secret
13
  OPENROUTER_API_KEY = os.environ.get("ArjunHF")
14
 
15
  class OpenRouterChatModel(ChatOpenAI):
16
- """
17
- A wrapper around ChatOpenAI to use OpenRouter with a Mistral 3.2 model.
18
- """
19
  def __init__(self, **kwargs):
20
  super().__init__(
21
  openai_api_base="https://openrouter.ai/api/v1",
@@ -24,60 +21,52 @@ class OpenRouterChatModel(ChatOpenAI):
24
  **kwargs
25
  )
26
 
27
- def clean_math(text: str) -> str:
 
 
 
 
28
  """
29
- Cleans and standardizes LaTeX/math formulas in LLM outputs for Gradio Markdown.
30
-
31
- Features:
32
- - Converts `\( ... \)` β†’ `$ ... $` for inline math.
33
- - Converts `\[ ... \]` β†’ `$$ ... $$` for display math.
34
- - Replaces malformed `-$...$` β†’ `$...$`.
35
- - Removes trailing backslashes in both `$...$` and `$$...$$`.
36
- - Removes unnecessary spaces inside inline or display math.
37
- - Cleans math inside lists, paragraphs, or any text context.
38
  """
39
- if not text:
40
- return text
41
-
42
- # Convert \( ... \) -> $ ... $
43
- text = re.sub(r"\\\\\(([\s\S]+?)\\\\\)", r"$\1$", text)
44
-
45
- # Convert \[ ... \] -> $$ ... $$
46
  text = re.sub(r"\\\\\[([\s\S]+?)\\\\\]", r"$$\1$$", text)
47
-
48
- # Replace -$ ... $ with $ ... $
49
- text = re.sub(r"-\$(.+?)\$", r"$\1$", text)
50
-
51
- # Remove trailing backslashes before $ or $$
52
- text = re.sub(r"\\\$\$", "$$", text)
53
- text = re.sub(r"\\\$", "$", text)
54
 
55
- # Remove extra spaces inside inline and display math
56
- text = re.sub(r"\$\$\s+([^\$]+?)\s+\$\$", r"$$\1$$", text)
57
- text = re.sub(r"\$\s+([^\$]+?)\s+\$", r"$\1$", text)
58
 
59
- # Clean math in lists or paragraphs, e.g., "- $ y_i $" -> "- $y_i$"
60
- text = re.sub(r"(\s)-\$\s*([^\$]+?)\s*\$", r"\1$\2$", text)
61
- text = re.sub(r"(\s)\$\s*([^\$]+?)\s*\$", r"\1$\2$", text)
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
62
 
63
  return text
64
 
65
- def qa_on_url(url: str, question: str) -> str:
66
- """
67
- Performs a question-answer retrieval on the content of a given webpage URL.
68
-
69
- Steps:
70
- 1. Loads the webpage content.
71
- 2. Splits the text into manageable chunks.
72
- 3. Embeds chunks using HuggingFace embeddings.
73
- 4. Builds a FAISS vector store for semantic search.
74
- 5. Uses OpenRouter Mistral 3.2 LLM to answer the query.
75
- 6. Cleans and formats all LaTeX/math formulas for Gradio Markdown.
76
- """
77
  try:
78
  loader = WebBaseLoader(url)
79
  docs = loader.load()
80
-
81
  splitter = RecursiveCharacterTextSplitter(chunk_size=1000, chunk_overlap=100)
82
  split_docs = splitter.split_documents(docs)
83
 
@@ -89,30 +78,20 @@ def qa_on_url(url: str, question: str) -> str:
89
  qa_chain = RetrievalQA.from_chain_type(llm, retriever=retriever)
90
  raw_answer = qa_chain.run(question)
91
 
92
- # Clean all math formulas for proper rendering in Gradio Markdown
93
- formatted_answer = clean_math(raw_answer)
94
  return formatted_answer
95
 
96
  except Exception as e:
97
  return f"❌ Error: {e}"
98
 
99
- # --- Gradio Interface ---
100
  iface = gr.Interface(
101
  fn=qa_on_url,
102
- inputs=[
103
- gr.Textbox(label="Enter Web URL", placeholder="https://example.com"),
104
- gr.Textbox(label="Your Question", placeholder="Ask anything about the webpage")
105
- ],
106
- outputs=gr.Markdown(), # βœ… Allows LaTeX/math rendering
107
- title="πŸ”Ž Ask Questions About Any Webpage",
108
- description=(
109
- "Use this tool to ask questions about any webpage content. "
110
- "It fetches the page, creates semantic embeddings, and uses a "
111
- "Mistral 3.2 LLM via OpenRouter to answer your question. "
112
- "Inline and display math formulas are automatically cleaned for proper rendering. May crash sometimes."
113
- "⚠️ Depending on page length and LLM response, this may take 10–20 seconds."
114
- )
115
  )
116
 
117
  if __name__ == "__main__":
118
- iface.launch()
 
9
  from langchain.chains import RetrievalQA
10
  from langchain.chat_models import ChatOpenAI
11
 
12
+ # βœ… Read OpenRouter API key from HF secret
13
  OPENROUTER_API_KEY = os.environ.get("ArjunHF")
14
 
15
  class OpenRouterChatModel(ChatOpenAI):
 
 
 
16
  def __init__(self, **kwargs):
17
  super().__init__(
18
  openai_api_base="https://openrouter.ai/api/v1",
 
21
  **kwargs
22
  )
23
 
24
+ import re
25
+
26
+ import re
27
+
28
+ def format_math(text):
29
  """
30
+ Convert LaTeX/math patterns to proper Markdown for Gradio.
31
+ Cleans up trailing backslashes and ensures formulas render correctly.
 
 
 
 
 
 
 
32
  """
33
+ # 1️⃣ Fix \[ ... \] -> $$ ... $$
 
 
 
 
 
 
34
  text = re.sub(r"\\\\\[([\s\S]+?)\\\\\]", r"$$\1$$", text)
 
 
 
 
 
 
 
35
 
36
+ # 2️⃣ Fix \( ... \) -> $ ... $
37
+ text = re.sub(r"\\\\\(([\s\S]+?)\\\\\)", r"$\1$", text)
 
38
 
39
+ # 3️⃣ Fix [ ... ] -> $$ ... $$ if contains math symbols
40
+ def replace_brackets(match):
41
+ content = match.group(1)
42
+ if re.search(r"[\\^_{}=]", content):
43
+ return f"$${content}$$"
44
+ return match.group(0)
45
+ text = re.sub(r"\[([^\[\]]+)\]", replace_brackets, text)
46
+
47
+ # 4️⃣ Fix ( ... ) -> $ ... $ if contains math symbols
48
+ def replace_parentheses(match):
49
+ content = match.group(1)
50
+ if re.search(r"[\\^_{}=]", content):
51
+ return f"${content}$"
52
+ return match.group(0)
53
+ text = re.sub(r"\(([^()]+)\)", replace_parentheses, text)
54
+
55
+ # 5️⃣ Remove trailing backslash before $ in both inline and display math
56
+ text = re.sub(r"\\\$\$", "$$", text) # trailing \$$ -> $$
57
+ text = re.sub(r"\\\$", "$", text) # trailing \$ -> $
58
+
59
+ # 6️⃣ Clean up extra spaces around $$
60
+ text = re.sub(r"\$\$\s+([^\$]+)\s+\$\$", r"$$\1$$", text)
61
+ text = re.sub(r"\$\s+([^\$]+)\s+\$", r"$\1$", text)
62
 
63
  return text
64
 
65
+
66
+ def qa_on_url(url, question):
 
 
 
 
 
 
 
 
 
 
67
  try:
68
  loader = WebBaseLoader(url)
69
  docs = loader.load()
 
70
  splitter = RecursiveCharacterTextSplitter(chunk_size=1000, chunk_overlap=100)
71
  split_docs = splitter.split_documents(docs)
72
 
 
78
  qa_chain = RetrievalQA.from_chain_type(llm, retriever=retriever)
79
  raw_answer = qa_chain.run(question)
80
 
81
+ # Format all math formulas nicely
82
+ formatted_answer = format_math(raw_answer)
83
  return formatted_answer
84
 
85
  except Exception as e:
86
  return f"❌ Error: {e}"
87
 
 
88
  iface = gr.Interface(
89
  fn=qa_on_url,
90
+ inputs=[gr.Textbox(label="Enter Web URL"), gr.Textbox(label="Your Question")],
91
+ outputs=gr.Markdown(), # Markdown allows LaTeX rendering
92
+ title="πŸ”Ž Ask Questions About Any Webpage (Mistral 3.2 via OpenRouter + LangChain)",
93
+ description="⚠️ This may take 10–20 seconds depending on the page length and LLM response time. Please be patient!"
 
 
 
 
 
 
 
 
 
94
  )
95
 
96
  if __name__ == "__main__":
97
+ iface.launch()