| import gradio as gr | |
| import numpy as np | |
| from usearch.index import Index | |
| from sentence_transformers import SentenceTransformer | |
| from datasets import load_dataset | |
| from sentencex import segment | |
| from usearch.index import search, MetricKind, Matches, BatchMatches | |
| HTML_Output = """<html><head><style>/* Tooltip container */ | |
| .tooltip { | |
| position: relative; | |
| width: 600px; | |
| display: inline-block; | |
| border-bottom: 1px dotted black; /* If you want dots under the hoverable text */ | |
| } | |
| /* Tooltip text */ | |
| .tooltip .tooltiptext { | |
| visibility: hidden; | |
| width: "100%"; | |
| background-color: #555; | |
| color: #34e1eb; | |
| text-align: center; | |
| padding: 5px 0; | |
| border-radius: 6px; | |
| /* Position the tooltip text */ | |
| position: absolute; | |
| z-index: 1; | |
| top: 125%; | |
| left: 50%; | |
| margin-left: -60px; | |
| /* Fade in tooltip */ | |
| opacity: 0; | |
| transition: opacity 0.3s; | |
| } | |
| /* Tooltip arrow */ | |
| .tooltip .tooltiptext::before { | |
| content: ""; | |
| position: absolute; | |
| bottom: 100%; | |
| left: 50%; | |
| margin-left: -5px; | |
| border-width: 5px; | |
| border-style: solid; | |
| border-color: #555 transparent transparent transparent; | |
| } | |
| /* Show the tooltip text when you mouse over the tooltip container */ | |
| .tooltip:hover .tooltiptext { | |
| visibility: visible; | |
| opacity: 1; | |
| }</style></head><body>""" | |
| model = SentenceTransformer("Corran/SciGenNomicEmbed",trust_remote_code=True) | |
| rf = load_dataset("Corran/RhetoricFunctionsList")['train']['rhetoric_function'] | |
| rf = list(rf) | |
| rf_emb = model.encode(rf) | |
| def get_matches(inputs): | |
| global index, model, rf | |
| paragraph_matches = [] | |
| for input in inputs: | |
| embs = model.encode(input,batch_size=128) | |
| matches = search(rf_emb, embs, 3, MetricKind.L2sq, exact=True) | |
| sentence_matches = [] | |
| for match_ in matches: | |
| sentence_matches.append((rf[match_.key],str(round(match_.distance,2)))) | |
| paragraph_matches.append(sentence_matches) | |
| return paragraph_matches | |
| def return_rf_scores(abstract): | |
| sentences = list(segment("en", abstract)) | |
| matches = get_matches(sentences) | |
| output = HTML_Output | |
| for s,m in zip(sentences,matches): | |
| tooltip = [f"{mm[0]} : {mm[1]})<br>" for mm in m] | |
| tooltip = "\n".join(tooltip) | |
| output+=f"""<div class="tooltip">{s} | |
| <span class="tooltiptext">{tooltip}</span> | |
| </div><br>""" | |
| output += "</body></html>" | |
| return output | |
| examples = [] | |
| with open("examples.tsv","r") as ex: | |
| rd = csv.reader(ex, delimiter="\t", quotechar='"') | |
| for row in rd: | |
| examples.append(row) | |
| demo = gr.Interface(fn=return_rf_scores, inputs="text", outputs="html",examples=[""]) | |
| demo.launch() | |