Spaces:

KaiserML
/

Demo-Sci-Rhetoric-Classifier

Sleeping

App Files Files Community

Demo-Sci-Rhetoric-Classifier / app.py

Corran

Update app.py

32b3519 verified 7 months ago

raw

history blame

2.6 kB

	import gradio as gr
	import numpy as np
	from usearch.index import Index
	from sentence_transformers import SentenceTransformer
	from datasets import load_dataset
	from sentencex import segment
	from usearch.index import search, MetricKind, Matches, BatchMatches

	HTML_Output = """<html><head><style>/* Tooltip container */
	.tooltip {
	position: relative;
	width: 600px;
	display: inline-block;
	border-bottom: 1px dotted black; /* If you want dots under the hoverable text */
	}

	/* Tooltip text */
	.tooltip .tooltiptext {
	visibility: hidden;
	width: "100%";
	background-color: #555;
	color: #34e1eb;
	text-align: center;
	padding: 5px 0;
	border-radius: 6px;

	/* Position the tooltip text */
	position: absolute;
	z-index: 1;
	top: 125%;
	left: 50%;
	margin-left: -60px;

	/* Fade in tooltip */
	opacity: 0;
	transition: opacity 0.3s;
	}

	/* Tooltip arrow */
	.tooltip .tooltiptext::before {
	content: "";
	position: absolute;
	bottom: 100%;
	left: 50%;
	margin-left: -5px;
	border-width: 5px;
	border-style: solid;
	border-color: #555 transparent transparent transparent;
	}

	/* Show the tooltip text when you mouse over the tooltip container */
	.tooltip:hover .tooltiptext {
	visibility: visible;
	opacity: 1;
	}</style></head><body>"""

	model = SentenceTransformer("Corran/SciGenNomicEmbed",trust_remote_code=True)

	rf = load_dataset("Corran/RhetoricFunctionsList")['train']['rhetoric_function']

	rf = list(rf)
	rf_emb = model.encode(rf)


	def get_matches(inputs):
	global index, model, rf
	paragraph_matches = []

	for input in inputs:
	embs = model.encode(input,batch_size=128)

	matches = search(rf_emb, embs, 3, MetricKind.L2sq, exact=True)
	sentence_matches = []
	for match_ in matches:
	sentence_matches.append((rf[match_.key],str(round(match_.distance,2))))
	paragraph_matches.append(sentence_matches)

	return paragraph_matches


	def return_rf_scores(abstract):

	sentences = list(segment("en", abstract))
	matches = get_matches(sentences)

	output = HTML_Output

	for s,m in zip(sentences,matches):
	tooltip = [f"{mm[0]} : {mm[1]})<br>" for mm in m]
	tooltip = "\n".join(tooltip)
	output+=f"""<div class="tooltip">{s}
	<span class="tooltiptext">{tooltip}</span>
	</div><br>"""

	output += "</body></html>"

	return output

	examples = []

	with open("examples.tsv","r") as ex:
	rd = csv.reader(ex, delimiter="\t", quotechar='"')
	for row in rd:
	examples.append(row)


	demo = gr.Interface(fn=return_rf_scores, inputs="text", outputs="html",examples=[""])
	demo.launch()