File size: 4,164 Bytes
736842d
b1a798e
736842d
 
b1a798e
 
13778dd
 
 
 
 
 
 
f9e10ad
 
 
 
 
 
 
 
13778dd
f9e10ad
13778dd
 
 
 
 
 
 
 
 
 
 
736842d
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
3ae17c8
340cc83
 
 
3ae17c8
340cc83
 
 
3ae17c8
 
 
1cb0871
3ae17c8
1cb0871
3ae17c8
 
41b5bdf
1cb0871
3ae17c8
1cb0871
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
3ae17c8
1cb0871
 
 
 
 
3ceb12a
 
 
 
 
 
1cb0871
 
 
 
 
093848b
 
41b5bdf
 
 
5e4315c
 
 
 
41b5bdf
5e4315c
 
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
import weaviate
import streamlit as st
from weaviate.embedded import EmbeddedOptions
from weaviate import Client
import pandas as pd  # <-- Add this import
from io import StringIO  # <-- Add this import
import pandas as pd

def hybrid_search_weaviate(client, selected_class, query):
    """
    Perform a hybrid search on Weaviate using the provided class and query.
    Return the results as a list of dictionaries.
    """
    # Construct the hybrid search query
    search_query = {
        "class": selected_class,
        "properties": [],  # Retrieve all properties for now
        "searchString": query,
        "limit": 100  # Limit to 100 results for now
    }
    
    # Perform the hybrid search
    results = client.get(search_query)
    
    return results

def convert_to_tapas_format(data):
    """
    Convert the list of dictionaries (from Weaviate) into the format TAPAS expects.
    Return the table as a list of lists.
    """
    df = pd.DataFrame(data)
    table = [df.columns.tolist()] + df.values.tolist()
    return table

def initialize_weaviate_client():
    return weaviate.Client(embedded_options=EmbeddedOptions())

def class_exists(client, class_name):
    try:
        client.schema.get_class(class_name)
        return True
    except:
        return False

def map_dtype_to_weaviate(dtype):
    if "int" in str(dtype):
        return "int"
    elif "float" in str(dtype):
        return "number"
    elif "bool" in str(dtype):
        return "boolean"
    else:
        return "string"

def create_new_class_schema(client, class_name, class_description):
    class_schema = {
        "class": class_name,
        "description": class_description,
        "properties": []
    }
    try:
        client.schema.create({"classes": [class_schema]})
        st.success(f"Class {class_name} created successfully!")
    except Exception as e:
        st.error(f"Error creating class: {e}")
        
def ingest_data_to_weaviate(client, csv_file, selected_class):
    # Read the CSV data
    data = csv_file.read().decode("utf-8")
    dataframe = pd.read_csv(StringIO(data))

    # Fetch the schema for the selected class
    class_schema = get_class_schema(client, selected_class)

    # If the schema is empty, create it based on the CSV columns
    if not class_schema or not class_schema["properties"]:
        for column_name, data_type in zip(dataframe.columns, dataframe.dtypes):
            property_schema = {
                "name": column_name,
                "description": f"Property for {column_name}",
                "dataType": [map_dtype_to_weaviate(data_type)]
            }
            try:
                client.schema.property.create(selected_class, property_schema)
            except weaviate.exceptions.SchemaValidationException:
                # Property might already exist, so we can continue
                pass
    else:
        # If the schema is not empty, compare it with the CSV columns
        schema_columns = [prop["name"] for prop in class_schema["properties"]]
        if set(dataframe.columns) != set(schema_columns):
            st.error("The columns in the uploaded CSV do not match the schema of the selected class. Please check and upload the correct CSV or create a new class.")
            return

    # Ingest the data into Weaviate
    data = dataframe.to_dict(orient="records")
    for record in data:
        try:
            client.data_object.create(record, selected_class)
        except Exception as e:
            st.error(f"Error ingesting record: {e}")

    # Display a preview of the ingested data
    st.write(f"Your CSV was successfully integrated into the vector database under the class '{selected_class}'")
    st.write(dataframe.head())  # Display the first few rows of the dataframe as a preview

    # Return the dataframe for preview
    return dataframe  # Added this line

def get_class_schema(client, class_name):
    try:
        schema = client.schema.get()
        for cls in schema["classes"]:
            if cls["class"] == class_name:
                return cls
        return None
    except weaviate.exceptions.SchemaValidationException:
        return None