File size: 2,706 Bytes
514204a
13a4ba2
ca8334f
13a4ba2
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
cdd85c7
 
13a4ba2
69217a0
13a4ba2
 
 
 
 
69217a0
13a4ba2
 
69217a0
13a4ba2
 
e900f04
69217a0
13a4ba2
e900f04
 
 
cdd85c7
13a4ba2
e900f04
 
 
 
69217a0
13a4ba2
 
69217a0
13a4ba2
 
 
 
 
1a5536b
13a4ba2
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
ca8334f
cdd85c7
a0bc52e
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
import streamlit as st
import json
import os
from sentence_transformers import SentenceTransformer, util
import torch

# Load the processed legal code data
@st.cache_resource
def load_data(file_path):
    with open(file_path, 'r', encoding='utf-8') as f:
        return json.load(f)

# Initialize the sentence transformer model
@st.cache_resource
def load_model():
    return SentenceTransformer('distiluse-base-multilingual-cased-v1')

def search_relevant_chunks(query, chunks, model, top_k=3):
    query_embedding = model.encode(query, convert_to_tensor=True)
    chunk_embeddings = model.encode([chunk['text'] for chunk in chunks], convert_to_tensor=True)
    
    cos_scores = util.pytorch_cos_sim(query_embedding, chunk_embeddings)[0]
    top_results = torch.topk(cos_scores, k=top_k)
    
    return [chunks[idx] for idx in top_results.indices]

def main():
    st.title("Chatbot Prawny")

    # Load data and model
    data_file = "processed_kodeksy.json"
    if not os.path.exists(data_file):
        st.error(f"Plik {data_file} nie istnieje. Najpierw przetw贸rz dane kodeks贸w.")
        return

    chunks = load_data(data_file)
    model = load_model()

    # Initialize chat history
    if "messages" not in st.session_state:
        st.session_state.messages = []

    # Display chat history
    for message in st.session_state.messages:
        with st.chat_message(message["role"]):
            st.markdown(message["content"])

    # User input
    if prompt := st.chat_input("Zadaj pytanie dotycz膮ce prawa..."):
        st.session_state.messages.append({"role": "user", "content": prompt})
        with st.chat_message("user"):
            st.markdown(prompt)

        # Search for relevant chunks
        relevant_chunks = search_relevant_chunks(prompt, chunks, model)

        # Generate response
        response = "Oto co znalaz艂em w kodeksie:\n\n"
        for chunk in relevant_chunks:
            response += f"**{chunk['metadata']['nazwa']} - Artyku艂 {chunk['metadata']['article']}**\n"
            response += f"{chunk['text']}\n\n"

        # Display assistant response
        with st.chat_message("assistant"):
            st.markdown(response)
        st.session_state.messages.append({"role": "assistant", "content": response})

    # Sidebar for additional options
    with st.sidebar:
        st.subheader("Opcje")
        if st.button("Wyczy艣膰 histori臋 czatu"):
            st.session_state.messages = []
            st.experimental_rerun()

        st.subheader("Informacje o bazie danych")
        st.write(f"Liczba chunk贸w: {len(chunks)}")
        st.write(f"Przyk艂adowy chunk:")
        st.json(chunks[0] if chunks else {})

if __name__ == "__main__":
    main()