Spaces:

aquibmoin
/

Semantic-Search-with-IndusST

Sleeping

File size: 2,207 Bytes

import gradio as gr
import requests
import os
import re

API_TOKEN = os.getenv('API_TOKEN')
API_URL = "https://api-inference.huggingface.co/models/nasa-impact/nasa-smd-ibm-st-v2"
headers = {"Authorization": f"Bearer {API_TOKEN}"}

def query_similarity(source_sentence, sentences):
    payload = {
        "inputs": {
            "source_sentence": source_sentence,
            "sentences": sentences
        }
    }
    response = requests.post(API_URL, headers=headers, json=payload)
    return response.json()

def format_output(response):
    results = sorted(response, key=lambda x: x['score'], reverse=True)
    formatted_results = []
    for item in results:
        formatted_results.append(f"Sentence: {item['sentence']}, Score: {item['score']:.4f}")
    return "\n".join(formatted_results)

def split_into_chunks(text, chunk_size=100):
    sentences = re.split(r'(?<=[.!?]) +', text)  # Split text into sentences
    chunks = []
    current_chunk = []
    current_length = 0

    for sentence in sentences:
        sentence_length = len(sentence.split())
        if current_length + sentence_length > chunk_size:
            chunks.append(" ".join(current_chunk))
            current_chunk = [sentence]
            current_length = sentence_length
        else:
            current_chunk.append(sentence)
            current_length += sentence_length

    if current_chunk:
        chunks.append(" ".join(current_chunk))

    return chunks

def semantic_search(query, file):
    if file is not None:
        document = file.read().decode('utf-8')  # Correct way to read the content
        chunks = split_into_chunks(document)
        response = query_similarity(query, chunks)
        return format_output(response)
    else:
        return "Please upload a .txt file."

# Define Gradio interface
iface = gr.Interface(
    fn=semantic_search,
    inputs=[
        gr.Textbox(lines=2, placeholder="Enter your query here..."),
        gr.File(file_types=['txt'], label="Upload a .txt file")
    ],
    outputs="text",
    title="Document Semantic Search",
    description="Input a query and upload a document (.txt) to find the most semantically similar paragraphs or sentences."
)

iface.launch()