tomas.helmfridsson commited on
Commit
ad7b39c
Β·
1 Parent(s): 3b838f7
Files changed (1) hide show
  1. app.py +35 -22
app.py CHANGED
@@ -8,35 +8,36 @@ from langchain_huggingface.llms import HuggingFacePipeline
8
  from langchain.chains import RetrievalQA
9
  from langchain.text_splitter import RecursiveCharacterTextSplitter
10
 
11
- # ── 1) Ladda & dela upp PDF:er ────────────────────────────────
12
  all_docs, files = [], []
13
  splitter = RecursiveCharacterTextSplitter(chunk_size=250, chunk_overlap=30)
14
 
15
  for fn in os.listdir("document"):
16
  if fn.lower().endswith(".pdf"):
17
- path = os.path.join("document", fn)
18
  loader = PyPDFLoader(path)
19
- pages = loader.load()
20
- chunks = splitter.split_documents(pages)
21
  all_docs.extend(chunks)
22
  files.append(fn)
23
 
24
- # ── 2) Bygg FAISS med svenska-embedding────────────────────────
25
  emb = HuggingFaceEmbeddings(model_name="KBLab/sentence-bert-swedish-cased")
26
  vs = FAISS.from_documents(all_docs, emb)
27
 
28
- # ── 3) Initiera CPU‐pipeline fΓΆr Falcon-1B───────────────────
29
  pipe = pipeline(
30
  "text-generation",
31
  model="tiiuae/falcon-rw-1b",
32
- device=-1,
33
- max_new_tokens=64
34
  )
35
  llm = HuggingFacePipeline(
36
  pipeline=pipe,
37
  model_kwargs={"temperature": 0.3}
38
  )
39
 
 
40
  retriever = vs.as_retriever(search_kwargs={"k": 1})
41
  qa = RetrievalQA.from_chain_type(
42
  llm=llm,
@@ -44,40 +45,51 @@ qa = RetrievalQA.from_chain_type(
44
  chain_type="stuff"
45
  )
46
 
47
- # ── 4) Chat‐funktion i β€œmessages”─format────────────────────────
48
  def chat_fn(message, temperature, history):
49
  history = history or []
50
  if not message.strip():
51
- history.append({"role":"assistant","content":"⚠️ Du mΓ₯ste skriva en frΓ₯ga."})
52
- return history
53
 
54
- history.append({"role":"user","content":message})
 
55
 
 
56
  if len(message) > 1000:
57
  history.append({
58
- "role":"assistant",
59
- "content":f"⚠️ FrΓ₯gan Γ€r fΓΆr lΓ₯ng ({len(message)} tecken)."
60
  })
61
- return history
62
 
 
63
  llm.model_kwargs["temperature"] = temperature
 
 
64
  try:
65
- svar = qa.invoke({"query":message})["result"]
66
  except Exception as e:
67
  svar = f"❌ Ett fel uppstod: {e}"
68
 
69
- history.append({"role":"assistant","content":svar})
70
- return history
71
 
72
- # ── 5) Gradio‐UI & public link──────────────────────────────────
73
  with gr.Blocks() as demo:
74
  gr.Markdown("## 🌟 Dokumentassistent (Svenska)")
75
  gr.Markdown("**βœ… Laddade PDF-filer:**\n\n" + "\n".join(f"- {f}" for f in files))
76
 
77
  with gr.Row():
78
- txt = gr.Textbox(lines=2, label="Din frΓ₯ga:",
79
- placeholder="Ex: Vad anges fΓΆr krav?")
80
- temp = gr.Slider(0.0,1.0,value=0.3,step=0.05,label="Temperatur")
 
 
 
 
 
 
81
  send = gr.Button("Skicka")
82
 
83
  chatbot = gr.Chatbot(value=[], type="messages")
@@ -90,4 +102,5 @@ with gr.Blocks() as demo:
90
  )
91
 
92
  if __name__ == "__main__":
 
93
  demo.launch(share=True)
 
8
  from langchain.chains import RetrievalQA
9
  from langchain.text_splitter import RecursiveCharacterTextSplitter
10
 
11
+ # ── 1) Ladda PDF:er och dela upp i korta chunkar ────────────
12
  all_docs, files = [], []
13
  splitter = RecursiveCharacterTextSplitter(chunk_size=250, chunk_overlap=30)
14
 
15
  for fn in os.listdir("document"):
16
  if fn.lower().endswith(".pdf"):
17
+ path = os.path.join("document", fn)
18
  loader = PyPDFLoader(path)
19
+ pages = loader.load() # en lista av Document-objekt
20
+ chunks = splitter.split_documents(pages) # dela upp i mindre bitar
21
  all_docs.extend(chunks)
22
  files.append(fn)
23
 
24
+ # ── 2) Skapa vektorer med svenska embeddings ────────────────
25
  emb = HuggingFaceEmbeddings(model_name="KBLab/sentence-bert-swedish-cased")
26
  vs = FAISS.from_documents(all_docs, emb)
27
 
28
+ # ── 3) Initiera LLM-pipeline (CPU-only) ───────────────────────
29
  pipe = pipeline(
30
  "text-generation",
31
  model="tiiuae/falcon-rw-1b",
32
+ device=-1, # CPU
33
+ max_new_tokens=64 # kortare svar β†’ snabbare
34
  )
35
  llm = HuggingFacePipeline(
36
  pipeline=pipe,
37
  model_kwargs={"temperature": 0.3}
38
  )
39
 
40
+ # ── 4) Bygg RetrievalQA med bara 1 chunk ────────────────────
41
  retriever = vs.as_retriever(search_kwargs={"k": 1})
42
  qa = RetrievalQA.from_chain_type(
43
  llm=llm,
 
45
  chain_type="stuff"
46
  )
47
 
48
+ # ── 5) Chat-funktion som returnerar bΓ₯de history & state ─────
49
  def chat_fn(message, temperature, history):
50
  history = history or []
51
  if not message.strip():
52
+ history.append({"role": "assistant", "content": "⚠️ Du mΓ₯ste skriva en frΓ₯ga."})
53
+ return history, history
54
 
55
+ # LΓ€gg till anvΓ€ndarens frΓ₯ga
56
+ history.append({"role": "user", "content": message})
57
 
58
+ # FΓΆr lΓ₯nga frΓ₯gor
59
  if len(message) > 1000:
60
  history.append({
61
+ "role": "assistant",
62
+ "content": f"⚠️ FrΓ₯gan Γ€r fΓΆr lΓ₯ng ({len(message)} tecken)."
63
  })
64
+ return history, history
65
 
66
+ # Justera temperatur
67
  llm.model_kwargs["temperature"] = temperature
68
+
69
+ # KΓΆr RAG-kedjan
70
  try:
71
+ svar = qa.invoke({"query": message})["result"]
72
  except Exception as e:
73
  svar = f"❌ Ett fel uppstod: {e}"
74
 
75
+ history.append({"role": "assistant", "content": svar})
76
+ return history, history
77
 
78
+ # ── 6) Bygg Gradio-UI & publicera ─────────────────────────────
79
  with gr.Blocks() as demo:
80
  gr.Markdown("## 🌟 Dokumentassistent (Svenska)")
81
  gr.Markdown("**βœ… Laddade PDF-filer:**\n\n" + "\n".join(f"- {f}" for f in files))
82
 
83
  with gr.Row():
84
+ txt = gr.Textbox(
85
+ lines=2,
86
+ label="Din frΓ₯ga:",
87
+ placeholder="Exempel: Vad anges fΓΆrberedelser infΓΆr mΓΆte?"
88
+ )
89
+ temp = gr.Slider(
90
+ 0.0, 1.0, value=0.3, step=0.05,
91
+ label="Temperatur"
92
+ )
93
  send = gr.Button("Skicka")
94
 
95
  chatbot = gr.Chatbot(value=[], type="messages")
 
102
  )
103
 
104
  if __name__ == "__main__":
105
+ # share=True ger en publik lΓ€nk till ditt Space
106
  demo.launch(share=True)