tomas.helmfridsson commited on
Commit
3b838f7
Β·
1 Parent(s): 405b739
Files changed (1) hide show
  1. app.py +21 -35
app.py CHANGED
@@ -8,7 +8,7 @@ from langchain_huggingface.llms import HuggingFacePipeline
8
  from langchain.chains import RetrievalQA
9
  from langchain.text_splitter import RecursiveCharacterTextSplitter
10
 
11
- # ── 1) Ladda & chunka PDF:er ───────────────────────────────────
12
  all_docs, files = [], []
13
  splitter = RecursiveCharacterTextSplitter(chunk_size=250, chunk_overlap=30)
14
 
@@ -16,84 +16,71 @@ for fn in os.listdir("document"):
16
  if fn.lower().endswith(".pdf"):
17
  path = os.path.join("document", fn)
18
  loader = PyPDFLoader(path)
19
- pages = loader.load() # lista av Document-objekt
20
- chunks = splitter.split_documents(pages) # dela upp i kortare chunkar
21
  all_docs.extend(chunks)
22
  files.append(fn)
23
 
24
- # ── 2) Skapa embedding + FAISS──────────────────────────────────
25
  emb = HuggingFaceEmbeddings(model_name="KBLab/sentence-bert-swedish-cased")
26
  vs = FAISS.from_documents(all_docs, emb)
27
 
28
- # ── 3) Initiera LLM & RetrievalQA──────────────────────────────
29
  pipe = pipeline(
30
  "text-generation",
31
  model="tiiuae/falcon-rw-1b",
32
- device=-1, # CPU-only pΓ₯ gratis Space
33
- max_new_tokens=64 # kortare svar fΓΆr snabbare kΓΆrning
34
  )
35
  llm = HuggingFacePipeline(
36
  pipeline=pipe,
37
- model_kwargs={"temperature": 0.3},
38
- streaming=True # aktivera live-streaming av svar
39
  )
40
 
41
- # Retrievern hΓ€mtar bara 1 chunk fΓΆr max snabbhet
42
  retriever = vs.as_retriever(search_kwargs={"k": 1})
43
-
44
  qa = RetrievalQA.from_chain_type(
45
  llm=llm,
46
  retriever=retriever,
47
  chain_type="stuff"
48
  )
49
 
50
- # ── 4) Chat-funktion i β€œmessages”-format─────────────────────────
51
  def chat_fn(message, temperature, history):
52
  history = history or []
53
  if not message.strip():
54
- history.append({"role": "assistant", "content": "⚠️ Du mΓ₯ste skriva en frΓ₯ga."})
55
  return history
56
 
57
- history.append({"role": "user", "content": message})
58
 
59
  if len(message) > 1000:
60
  history.append({
61
- "role": "assistant",
62
- "content": f"⚠️ FrΓ₯gan Γ€r fΓΆr lΓ₯ng ({len(message)} tecken)."
63
  })
64
  return history
65
 
66
  llm.model_kwargs["temperature"] = temperature
67
-
68
  try:
69
- svar = qa.invoke({"query": message})["result"]
70
  except Exception as e:
71
  svar = f"❌ Ett fel uppstod: {e}"
72
 
73
- history.append({"role": "assistant", "content": svar})
74
  return history
75
 
76
- # ── 5) Bygg Gradio-UI & publicera────────────────────────────────
77
  with gr.Blocks() as demo:
78
  gr.Markdown("## 🌟 Dokumentassistent (Svenska)")
79
- gr.Markdown(
80
- "**βœ… Laddade PDF-filer:**\n\n" +
81
- "\n".join(f"- {f}" for f in files)
82
- )
83
 
84
  with gr.Row():
85
- txt = gr.Textbox(
86
- lines=2,
87
- label="Din frΓ₯ga:",
88
- placeholder="Exempel: Vad anges fΓΆrberedelser infΓΆr mΓΆte?"
89
- )
90
- temp = gr.Slider(
91
- 0.0, 1.0, value=0.3, step=0.05,
92
- label="Temperatur"
93
- )
94
  send = gr.Button("Skicka")
95
 
96
- chatbot = gr.Chatbot(value=[], type="messages", streaming=True)
97
  chat_state = gr.State([])
98
 
99
  send.click(
@@ -103,5 +90,4 @@ with gr.Blocks() as demo:
103
  )
104
 
105
  if __name__ == "__main__":
106
- # share=True ger en publik lΓ€nk till ditt Space
107
  demo.launch(share=True)
 
8
  from langchain.chains import RetrievalQA
9
  from langchain.text_splitter import RecursiveCharacterTextSplitter
10
 
11
+ # ── 1) Ladda & dela upp PDF:er ────────────────────────────────
12
  all_docs, files = [], []
13
  splitter = RecursiveCharacterTextSplitter(chunk_size=250, chunk_overlap=30)
14
 
 
16
  if fn.lower().endswith(".pdf"):
17
  path = os.path.join("document", fn)
18
  loader = PyPDFLoader(path)
19
+ pages = loader.load()
20
+ chunks = splitter.split_documents(pages)
21
  all_docs.extend(chunks)
22
  files.append(fn)
23
 
24
+ # ── 2) Bygg FAISS med svenska-embedding────────────────────────
25
  emb = HuggingFaceEmbeddings(model_name="KBLab/sentence-bert-swedish-cased")
26
  vs = FAISS.from_documents(all_docs, emb)
27
 
28
+ # ── 3) Initiera CPU‐pipeline fΓΆr Falcon-1B───────────────────
29
  pipe = pipeline(
30
  "text-generation",
31
  model="tiiuae/falcon-rw-1b",
32
+ device=-1,
33
+ max_new_tokens=64
34
  )
35
  llm = HuggingFacePipeline(
36
  pipeline=pipe,
37
+ model_kwargs={"temperature": 0.3}
 
38
  )
39
 
 
40
  retriever = vs.as_retriever(search_kwargs={"k": 1})
 
41
  qa = RetrievalQA.from_chain_type(
42
  llm=llm,
43
  retriever=retriever,
44
  chain_type="stuff"
45
  )
46
 
47
+ # ── 4) Chat‐funktion i β€œmessages”─format────────────────────────
48
  def chat_fn(message, temperature, history):
49
  history = history or []
50
  if not message.strip():
51
+ history.append({"role":"assistant","content":"⚠️ Du mΓ₯ste skriva en frΓ₯ga."})
52
  return history
53
 
54
+ history.append({"role":"user","content":message})
55
 
56
  if len(message) > 1000:
57
  history.append({
58
+ "role":"assistant",
59
+ "content":f"⚠️ FrΓ₯gan Γ€r fΓΆr lΓ₯ng ({len(message)} tecken)."
60
  })
61
  return history
62
 
63
  llm.model_kwargs["temperature"] = temperature
 
64
  try:
65
+ svar = qa.invoke({"query":message})["result"]
66
  except Exception as e:
67
  svar = f"❌ Ett fel uppstod: {e}"
68
 
69
+ history.append({"role":"assistant","content":svar})
70
  return history
71
 
72
+ # ── 5) Gradio‐UI & public link──────────────────────────────────
73
  with gr.Blocks() as demo:
74
  gr.Markdown("## 🌟 Dokumentassistent (Svenska)")
75
+ gr.Markdown("**βœ… Laddade PDF-filer:**\n\n" + "\n".join(f"- {f}" for f in files))
 
 
 
76
 
77
  with gr.Row():
78
+ txt = gr.Textbox(lines=2, label="Din frΓ₯ga:",
79
+ placeholder="Ex: Vad anges fΓΆr krav?")
80
+ temp = gr.Slider(0.0,1.0,value=0.3,step=0.05,label="Temperatur")
 
 
 
 
 
 
81
  send = gr.Button("Skicka")
82
 
83
+ chatbot = gr.Chatbot(value=[], type="messages")
84
  chat_state = gr.State([])
85
 
86
  send.click(
 
90
  )
91
 
92
  if __name__ == "__main__":
 
93
  demo.launch(share=True)