Spaces:

jskinner215
/

TAPAS_WTQ_Chunking

Build error

File size: 3,879 Bytes

import weaviate
import streamlit as st
from weaviate.embedded import EmbeddedOptions
from weaviate import Client
import pandas as pd  # <-- Add this import
from io import StringIO  # <-- Add this import
import pandas as pd

def hybrid_search_weaviate(client, selected_class, query):
    """
    Perform a hybrid search on Weaviate using the provided class and query.
    Return the results as a list of dictionaries.
    """
    # Perform the hybrid search
    results = client.data_object.get_by_search(
        className=selected_class,
        query=query,
        filters=None,  # No additional filters for now
        limit=100  # Limit to 100 results for now
    )
    
    return results

def convert_to_tapas_format(data):
    """
    Convert the list of dictionaries (from Weaviate) into the format TAPAS expects.
    Return the table as a list of lists.
    """
    df = pd.DataFrame(data)
    table = [df.columns.tolist()] + df.values.tolist()
    return table

def initialize_weaviate_client():
    return weaviate.Client(embedded_options=EmbeddedOptions())

def class_exists(client, class_name):
    try:
        client.schema.get_class(class_name)
        return True
    except:
        return False

def map_dtype_to_weaviate(dtype):
    if "int" in str(dtype):
        return "int"
    elif "float" in str(dtype):
        return "number"
    elif "bool" in str(dtype):
        return "boolean"
    else:
        return "string"

def create_new_class_schema(client, class_name, class_description):
    class_schema = {
        "class": class_name,
        "description": class_description,
        "properties": []
    }
    try:
        client.schema.create({"classes": [class_schema]})
        st.success(f"Class {class_name} created successfully!")
    except Exception as e:
        st.error(f"Error creating class: {e}")
        
def ingest_data_to_weaviate(client, csv_file, selected_class):
    # Read the CSV data
    data = csv_file.read().decode("utf-8")
    dataframe = pd.read_csv(StringIO(data))

    # Fetch the schema for the selected class
    class_schema = get_class_schema(client, selected_class)

    # If the schema is empty, create it based on the CSV columns
    if not class_schema or not class_schema["properties"]:
        for column_name, data_type in zip(dataframe.columns, dataframe.dtypes):
            property_schema = {
                "name": column_name,
                "description": f"Property for {column_name}",
                "dataType": [map_dtype_to_weaviate(data_type)]
            }
            try:
                client.schema.property.create(selected_class, property_schema)
            except weaviate.exceptions.SchemaValidationException:
                # Property might already exist, so we can continue
                pass
    else:
        # If the schema is not empty, compare it with the CSV columns
        schema_columns = [prop["name"] for prop in class_schema["properties"]]
        if set(dataframe.columns) != set(schema_columns):
            st.error("The columns in the uploaded CSV do not match the schema of the selected class. Please check and upload the correct CSV or create a new class.")
            return

    # Ingest the data into Weaviate
    data = dataframe.to_dict(orient="records")
    client.data_object.create(data, selected_class)

    # Display a preview of the ingested data
    st.write(f"Your CSV was successfully integrated into the vector database under the class '{selected_class}'")
    st.write(dataframe.head())  # Display the first few rows of the dataframe as a preview


def get_class_schema(client, class_name):
    try:
        schema = client.schema.get()
        for cls in schema["classes"]:
            if cls["class"] == class_name:
                return cls
        return None
    except weaviate.exceptions.SchemaValidationException:
        return None