Spaces:
Build error
Build error
File size: 4,164 Bytes
736842d b1a798e 736842d b1a798e 13778dd f9e10ad 13778dd f9e10ad 13778dd 736842d 3ae17c8 340cc83 3ae17c8 340cc83 3ae17c8 1cb0871 3ae17c8 1cb0871 3ae17c8 41b5bdf 1cb0871 3ae17c8 1cb0871 3ae17c8 1cb0871 3ceb12a 1cb0871 093848b 41b5bdf 5e4315c 41b5bdf 5e4315c |
1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20 21 22 23 24 25 26 27 28 29 30 31 32 33 34 35 36 37 38 39 40 41 42 43 44 45 46 47 48 49 50 51 52 53 54 55 56 57 58 59 60 61 62 63 64 65 66 67 68 69 70 71 72 73 74 75 76 77 78 79 80 81 82 83 84 85 86 87 88 89 90 91 92 93 94 95 96 97 98 99 100 101 102 103 104 105 106 107 108 109 110 111 112 113 114 115 116 117 118 119 |
import weaviate
import streamlit as st
from weaviate.embedded import EmbeddedOptions
from weaviate import Client
import pandas as pd # <-- Add this import
from io import StringIO # <-- Add this import
import pandas as pd
def hybrid_search_weaviate(client, selected_class, query):
"""
Perform a hybrid search on Weaviate using the provided class and query.
Return the results as a list of dictionaries.
"""
# Construct the hybrid search query
search_query = {
"class": selected_class,
"properties": [], # Retrieve all properties for now
"searchString": query,
"limit": 100 # Limit to 100 results for now
}
# Perform the hybrid search
results = client.get(search_query)
return results
def convert_to_tapas_format(data):
"""
Convert the list of dictionaries (from Weaviate) into the format TAPAS expects.
Return the table as a list of lists.
"""
df = pd.DataFrame(data)
table = [df.columns.tolist()] + df.values.tolist()
return table
def initialize_weaviate_client():
return weaviate.Client(embedded_options=EmbeddedOptions())
def class_exists(client, class_name):
try:
client.schema.get_class(class_name)
return True
except:
return False
def map_dtype_to_weaviate(dtype):
if "int" in str(dtype):
return "int"
elif "float" in str(dtype):
return "number"
elif "bool" in str(dtype):
return "boolean"
else:
return "string"
def create_new_class_schema(client, class_name, class_description):
class_schema = {
"class": class_name,
"description": class_description,
"properties": []
}
try:
client.schema.create({"classes": [class_schema]})
st.success(f"Class {class_name} created successfully!")
except Exception as e:
st.error(f"Error creating class: {e}")
def ingest_data_to_weaviate(client, csv_file, selected_class):
# Read the CSV data
data = csv_file.read().decode("utf-8")
dataframe = pd.read_csv(StringIO(data))
# Fetch the schema for the selected class
class_schema = get_class_schema(client, selected_class)
# If the schema is empty, create it based on the CSV columns
if not class_schema or not class_schema["properties"]:
for column_name, data_type in zip(dataframe.columns, dataframe.dtypes):
property_schema = {
"name": column_name,
"description": f"Property for {column_name}",
"dataType": [map_dtype_to_weaviate(data_type)]
}
try:
client.schema.property.create(selected_class, property_schema)
except weaviate.exceptions.SchemaValidationException:
# Property might already exist, so we can continue
pass
else:
# If the schema is not empty, compare it with the CSV columns
schema_columns = [prop["name"] for prop in class_schema["properties"]]
if set(dataframe.columns) != set(schema_columns):
st.error("The columns in the uploaded CSV do not match the schema of the selected class. Please check and upload the correct CSV or create a new class.")
return
# Ingest the data into Weaviate
data = dataframe.to_dict(orient="records")
for record in data:
try:
client.data_object.create(record, selected_class)
except Exception as e:
st.error(f"Error ingesting record: {e}")
# Display a preview of the ingested data
st.write(f"Your CSV was successfully integrated into the vector database under the class '{selected_class}'")
st.write(dataframe.head()) # Display the first few rows of the dataframe as a preview
# Return the dataframe for preview
return dataframe # Added this line
def get_class_schema(client, class_name):
try:
schema = client.schema.get()
for cls in schema["classes"]:
if cls["class"] == class_name:
return cls
return None
except weaviate.exceptions.SchemaValidationException:
return None |