Spaces:
Build error
Build error
File size: 3,879 Bytes
736842d b1a798e 736842d b1a798e 13778dd 736842d 3ae17c8 340cc83 3ae17c8 340cc83 3ae17c8 1cb0871 3ae17c8 1cb0871 3ae17c8 41b5bdf 1cb0871 3ae17c8 1cb0871 3ae17c8 1cb0871 41b5bdf 5e4315c 41b5bdf 5e4315c |
1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20 21 22 23 24 25 26 27 28 29 30 31 32 33 34 35 36 37 38 39 40 41 42 43 44 45 46 47 48 49 50 51 52 53 54 55 56 57 58 59 60 61 62 63 64 65 66 67 68 69 70 71 72 73 74 75 76 77 78 79 80 81 82 83 84 85 86 87 88 89 90 91 92 93 94 95 96 97 98 99 100 101 102 103 104 105 106 107 108 109 110 |
import weaviate
import streamlit as st
from weaviate.embedded import EmbeddedOptions
from weaviate import Client
import pandas as pd # <-- Add this import
from io import StringIO # <-- Add this import
import pandas as pd
def hybrid_search_weaviate(client, selected_class, query):
"""
Perform a hybrid search on Weaviate using the provided class and query.
Return the results as a list of dictionaries.
"""
# Perform the hybrid search
results = client.data_object.get_by_search(
className=selected_class,
query=query,
filters=None, # No additional filters for now
limit=100 # Limit to 100 results for now
)
return results
def convert_to_tapas_format(data):
"""
Convert the list of dictionaries (from Weaviate) into the format TAPAS expects.
Return the table as a list of lists.
"""
df = pd.DataFrame(data)
table = [df.columns.tolist()] + df.values.tolist()
return table
def initialize_weaviate_client():
return weaviate.Client(embedded_options=EmbeddedOptions())
def class_exists(client, class_name):
try:
client.schema.get_class(class_name)
return True
except:
return False
def map_dtype_to_weaviate(dtype):
if "int" in str(dtype):
return "int"
elif "float" in str(dtype):
return "number"
elif "bool" in str(dtype):
return "boolean"
else:
return "string"
def create_new_class_schema(client, class_name, class_description):
class_schema = {
"class": class_name,
"description": class_description,
"properties": []
}
try:
client.schema.create({"classes": [class_schema]})
st.success(f"Class {class_name} created successfully!")
except Exception as e:
st.error(f"Error creating class: {e}")
def ingest_data_to_weaviate(client, csv_file, selected_class):
# Read the CSV data
data = csv_file.read().decode("utf-8")
dataframe = pd.read_csv(StringIO(data))
# Fetch the schema for the selected class
class_schema = get_class_schema(client, selected_class)
# If the schema is empty, create it based on the CSV columns
if not class_schema or not class_schema["properties"]:
for column_name, data_type in zip(dataframe.columns, dataframe.dtypes):
property_schema = {
"name": column_name,
"description": f"Property for {column_name}",
"dataType": [map_dtype_to_weaviate(data_type)]
}
try:
client.schema.property.create(selected_class, property_schema)
except weaviate.exceptions.SchemaValidationException:
# Property might already exist, so we can continue
pass
else:
# If the schema is not empty, compare it with the CSV columns
schema_columns = [prop["name"] for prop in class_schema["properties"]]
if set(dataframe.columns) != set(schema_columns):
st.error("The columns in the uploaded CSV do not match the schema of the selected class. Please check and upload the correct CSV or create a new class.")
return
# Ingest the data into Weaviate
data = dataframe.to_dict(orient="records")
client.data_object.create(data, selected_class)
# Display a preview of the ingested data
st.write(f"Your CSV was successfully integrated into the vector database under the class '{selected_class}'")
st.write(dataframe.head()) # Display the first few rows of the dataframe as a preview
def get_class_schema(client, class_name):
try:
schema = client.schema.get()
for cls in schema["classes"]:
if cls["class"] == class_name:
return cls
return None
except weaviate.exceptions.SchemaValidationException:
return None |