Spaces:
Sleeping
Sleeping
File size: 7,714 Bytes
1e5b35e 1b048ee 1e5b35e 505fff5 b944ab4 405b739 1e5b35e effe83f e69fcb5 a04ba24 4416daf cd31712 f9a8906 1e5b35e ee59dc1 e69fcb5 a04ba24 1e5b35e effe83f 1e5b35e a04ba24 1e5b35e a04ba24 1e5b35e effe83f a04ba24 1e5b35e a04ba24 1e5b35e f9a8906 a04ba24 f9a8906 1e5b35e f9a8906 1e5b35e e69fcb5 f9a8906 1e5b35e 3ebf529 1e5b35e 3ebf529 1e5b35e 849e3ea 3ebf529 effe83f 487fc40 1e5b35e 3ebf529 1e5b35e 487fc40 1e5b35e 43f53c1 effe83f 43f53c1 1e5b35e ad7b39c 487fc40 1e5b35e 1b048ee 1e5b35e 4416daf 1b048ee 7894f40 1e5b35e babd2a7 a04ba24 43f53c1 f9a8906 2ad3344 1e5b35e 30881d9 f9a8906 7894f40 1e5b35e 2ad3344 9b4fbc6 fb858f0 1e5b35e |
1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20 21 22 23 24 25 26 27 28 29 30 31 32 33 34 35 36 37 38 39 40 41 42 43 44 45 46 47 48 49 50 51 52 53 54 55 56 57 58 59 60 61 62 63 64 65 66 67 68 69 70 71 72 73 74 75 76 77 78 79 80 81 82 83 84 85 86 87 88 89 90 91 92 93 94 95 96 97 98 99 100 101 102 103 104 105 106 107 108 109 110 111 112 113 114 115 116 117 118 119 120 121 122 123 124 125 126 127 128 129 130 131 132 133 134 135 136 137 138 139 140 141 142 143 144 145 146 147 148 149 150 151 152 153 154 155 156 157 158 159 160 161 162 163 164 165 166 167 168 |
# ββ app.py βββββββββββββββββββββββββββββββββββββββββββββββββββββββββββ
import os, logging, textwrap
import gradio as gr
from transformers import pipeline, AutoTokenizer
from langchain_community.document_loaders import PyPDFLoader
from langchain_community.vectorstores import FAISS
from langchain_huggingface.embeddings import HuggingFaceEmbeddings
from langchain.text_splitter import RecursiveCharacterTextSplitter
# ββ KONFIG βββββββββββββββββββββββββββββββββββββββββββββββββββββββββββ
DOCS_DIR = "document"
INDEX_DIR = "faiss_index"
EMB_MODEL = "KBLab/sentence-bert-swedish-cased"
#LLM_MODEL = "tiiuae/falcon-rw-1b" # DΓ₯lig
#LLM_MODEL = "google/flan-t5-base" # DΓ₯lig
#LLM_MODEL = "bigscience/bloom-560m" # DΓ₯lig
#LLM_MODEL = "NbAiLab/nb-gpt-j-6B" #- Restricted
#LLM_MODEL = "datificate/gpt2-small-swedish" # Finns ej pΓ₯ Hugging face
LLM_MODEL = "TinyLlama/TinyLlama-1.1B-Chat-v1.0"
CHUNK_SIZE = 400
CHUNK_OVERLAP = 40
CTX_TOK_MAX = 750 # sparar marginal till frΓ₯ga + svar
MAX_NEW_TOKENS = 128
K = 3
DEFAULT_TEMP = 0.3
# ββ LOGGING ββββββββββββββββββββββββββββββββββββββββββββββββββββββββββ
logging.basicConfig(level=logging.INFO, format="%(asctime)s %(levelname)s %(message)s")
log = logging.getLogger(__name__)
# ββ 1) Index (bygg eller ladda) βββββββββββββββββββββββββββββββββββββ
emb = HuggingFaceEmbeddings(model_name=EMB_MODEL)
if os.path.isdir(INDEX_DIR):
log.info(f"π Laddar index frΓ₯n {INDEX_DIR}")
vs = FAISS.load_local(INDEX_DIR, emb)
else:
splitter = RecursiveCharacterTextSplitter(chunk_size=CHUNK_SIZE, chunk_overlap=CHUNK_OVERLAP)
docs, pdfs = [], []
for fn in os.listdir(DOCS_DIR):
if fn.lower().endswith(".pdf"):
chunks = splitter.split_documents(PyPDFLoader(os.path.join(DOCS_DIR, fn)).load())
for c in chunks:
c.metadata["source"] = fn
docs.extend(chunks); pdfs.append(fn)
vs = FAISS.from_documents(docs, emb); vs.save_local(INDEX_DIR)
log.info(f"β
Byggt index β {len(pdfs)}β―PDF / {len(docs)}β―chunkar")
retriever = vs.as_retriever(search_kwargs={"k": K})
# ββ 2) LLMβpipeline & tokenizer βββββββββββββββββββββββββββββββββββββ
log.info("π Initierar LLM β¦")
gen_pipe = pipeline("text-generation", model=LLM_MODEL, device=-1, max_new_tokens=MAX_NEW_TOKENS)
tokenizer = AutoTokenizer.from_pretrained(LLM_MODEL)
log.info("β
LLM klar")
# ββ 3) HjΓ€lpfunktioner ββββββββββββββββββββββββββββββββββββββββββββββ
def build_prompt(query: str, docs):
"""
Tar sΓ₯ mΓ₯nga chunkar som ryms i CTX_TOK_MAX token
"""
context_parts = []
total_ctx_tok = 0
for d in docs:
tok_len = len(tokenizer.encode(d.page_content))
if total_ctx_tok + tok_len > CTX_TOK_MAX:
break
context_parts.append(d.page_content)
total_ctx_tok += tok_len
context = "\n\n---\n\n".join(context_parts)
return textwrap.dedent(f"""\
Du Γ€r en hjΓ€lpsam assistent som svarar pΓ₯ svenska.
Kontext (hΓ€mtat ur PDFβdokument):
{context}
FrΓ₯ga: {query}
Svar (svenska):""").strip()
def test_retrieval(q): # snabbβtest utan AI
docs = retriever.invoke(q)
return "\n\n".join([f"{i+1}. ({d.metadata['source']}) {d.page_content[:160]}β¦" for i, d in enumerate(docs)]) or "π« Inga trΓ€ffar"
def chat_fn(q, temp, max_new_tokens, k, ctx_tok_max, history):
history = history or []
history.append({"role": "user", "content": q})
# Skapa en ny retriever med valt k
retriever = vs.as_retriever(search_kwargs={"k": int(k)})
docs = retriever.invoke(q)
if not docs:
history.append({"role": "assistant", "content": "π« Hittade inget relevant."})
return history, history
# Visa chunkar som anvΓ€nds
chunk_info = "\n\n".join([f"{i+1}. ({d.metadata['source']}) {d.page_content[:160]}β¦" for i, d in enumerate(docs)])
history.append({"role": "system", "content": f"π Chunkar som anvΓ€nds:\n{chunk_info}"})
# Bygg prompt med valt ctx_tok_max
def build_prompt_dynamic(query, docs, ctx_tok_max):
context_parts = []
total_ctx_tok = 0
for d in docs:
tok_len = len(tokenizer.encode(d.page_content))
if total_ctx_tok + tok_len > int(ctx_tok_max):
break
context_parts.append(d.page_content)
total_ctx_tok += tok_len
context = "\n\n---\n\n".join(context_parts)
return textwrap.dedent(f"""\
Du Γ€r en hjΓ€lpsam assistent som svarar pΓ₯ svenska.
Kontext (hΓ€mtat ur PDFβdokument):
{context}
FrΓ₯ga: {query}
Svar (svenska):""").strip()
prompt = build_prompt_dynamic(q, docs, ctx_tok_max)
log.info(f"Prompt tokens={len(tokenizer.encode(prompt))} temp={temp} max_new_tokens={max_new_tokens} k={k} ctx_tok_max={ctx_tok_max}")
try:
ans = gen_pipe(
prompt,
temperature=float(temp),
max_new_tokens=int(max_new_tokens),
pad_token_id=tokenizer.eos_token_id,
eos_token_id=tokenizer.eos_token_id,
do_sample=True,
return_full_text=False
)[0]["generated_text"]
except Exception as e:
log.exception("Genereringsβfel")
ans = f"β Fel vid generering: {type(e).__name__}: {e}\n\nPrompt:\n{prompt}"
src_hint = docs[0].metadata["source"] if docs else "Ingen kΓ€lla"
history.append({"role": "assistant", "content": f"**(KΓ€lla: {src_hint})**\n\n{ans}"})
return history, history
# ββ 4) Gradio UI ββββββββββββββββββββββββββββββββββββββββββββββββββββ
with gr.Blocks() as demo:
gr.Markdown("# π Svensk RAGβchat")
gr.Markdown(f"**PDFβfiler:** {', '.join(os.listdir(DOCS_DIR)) or 'β'}")
gr.Markdown(f"**LLM-modell som anvΓ€nds:** `{LLM_MODEL}`", elem_id="llm-info")
with gr.Row():
q_test = gr.Textbox(label="π TestβRetrieval")
b_test = gr.Button("Testa")
o_test = gr.Textbox(label="Chunkar")
with gr.Row():
q_in = gr.Textbox(label="FrΓ₯ga", placeholder="Ex: Vad Γ€r fΓΆrvaltningsΓΆverlΓ€mnande?")
temp = gr.Slider(0, 1, value=DEFAULT_TEMP, step=0.05, label="Temperatur")
max_new_tokens = gr.Slider(32, 256, value=MAX_NEW_TOKENS, step=8, label="Max svarslΓ€ngd (tokens)")
k = gr.Slider(1, 10, value=K, step=1, label="Antal chunkar (K)")
ctx_tok_max = gr.Slider(100, 2000, value=CTX_TOK_MAX, step=50, label="Max kontexttokens")
b_send = gr.Button("Skicka")
chat = gr.Chatbot(type="messages", label="Chat")
chat_hist = gr.State([])
b_test.click(test_retrieval, inputs=[q_test], outputs=[o_test])
b_send.click(chat_fn, inputs=[q_in, temp, max_new_tokens, k, ctx_tok_max, chat_hist], outputs=[chat, chat_hist])
if __name__ == "__main__":
demo.launch(share=True) # ta bort share=True om du vill hΓ₯lla den privat
|