AyaTonic

Runtime error

AyaTonic / app.py

tonic

Update app.py

b7f5537 over 1 year ago

7.61 kB

	import gradio as gr
	from gradio_rich_textbox import RichTextbox
	from PIL import Image
	from surya.ocr import run_ocr
	from surya.model.detection.segformer import load_model as load_det_model, load_processor as load_det_processor
	from surya.model.recognition.model import load_model as load_rec_model
	from surya.model.recognition.processor import load_processor as load_rec_processor
	from lang_list import TEXT_SOURCE_LANGUAGE_NAMES
	from gradio_client import Client
	from dotenv import load_dotenv
	import requests
	from io import BytesIO
	import cohere
	import os
	import re
	import pandas as pd


	title = "# Welcome to AyaTonic"
	description = "Learn a New Language With Aya"
	# Load environment variables
	load_dotenv()
	COHERE_API_KEY = os.getenv('CO_API_KEY')
	SEAMLESSM4T = os.getenv('SEAMLESSM4T')
	df = pd.read_csv("lang_list.csv")

	inputlanguage = ""
	producetext = "\n\nProduce a complete expositional blog post in {target_language} based on the above :"
	formatinputstring = "\n\nthe above text is a learning aid. you must use rich text format to rewrite the above and add 1 . a red color tags for nouns 2. a blue color tag for verbs 3. a green color tag for adjectives and adverbs:"

	# Regular expression patterns for each color
	patterns = {
	"red": r'<span style="color: red;">(.*?)</span>',
	"blue": r'<span style="color: blue;">(.*?)</span>',
	"green": r'<span style="color: green;">(.*?)</span>',
	}

	# Dictionaries to hold the matches
	matches = {
	"red": [],
	"blue": [],
	"green": [],
	}
	class TaggedPhraseExtractor:
	def __init__(self, text=''):
	self.text = text
	self.patterns = {}

	def set_text(self, text):
	"""Set the text to search within."""
	self.text = text

	def add_pattern(self, color, pattern):
	"""Add a new color and its associated pattern."""
	self.patterns[color] = pattern

	def extract_phrases(self):
	"""Extract phrases for all colors and patterns added."""
	matches = {color: re.findall(pattern, self.text) for color, pattern in self.patterns.items()}
	return matches

	def print_phrases(self):
	"""Extract phrases and print them."""
	matches = self.extract_phrases()
	for color, phrases in matches.items():
	print(f"Phrases with color {color}:")
	for phrase in phrases:
	print(f"- {phrase}")
	print()

	co = cohere.Client(COHERE_API_KEY)
	audio_client = Client(SEAMLESSM4T)
	client = Client(SEAMLESSM4T)

	def process_audio_to_text(audio_path, inputlanguage="English"):
	"""
	Convert audio input to text using the Gradio client.
	"""
	audio_client = Client(SEAMLESSM4T)
	result = audio_client.predict(
	audio_path,
	inputlanguage,
	inputlanguage,
	api_name="/s2tt"
	)
	print("Audio Result: ", result)
	return result[0]

	def process_text_to_audio(text, translatefrom, translateto):
	"""
	Convert text input to audio using the Gradio client.
	"""
	audio_client = Client(SEAMLESSM4T)
	result = audio_client.predict(
	text,
	translatefrom,
	translateto,
	api_name="/t2st"
	)
	return result[0]

	class OCRProcessor:
	def __init__(self, langs=["en"]): #add input language code
	self.langs = langs
	self.det_processor, self.det_model = load_det_processor(), load_det_model()
	self.rec_model, self.rec_processor = load_rec_model(), load_rec_processor()

	def process_image(self, image):
	"""
	Process a PIL image and return the OCR text.
	"""
	predictions = run_ocr([image], [self.langs], self.det_model, self.det_processor, self.rec_model, self.rec_processor)
	return predictions[0] # Assuming the first item in predictions contains the desired text

	def process_pdf(self, pdf_path):
	"""
	Process a PDF file and return the OCR text.
	"""
	predictions = run_ocr([pdf_path], [self.langs], self.det_model, self.det_processor, self.rec_model, self.rec_processor)
	return predictions[0] # Assuming the first item in predictions contains the desired text

	def process_input(image=None, file=None, audio=None, text="", translateto = "English", translatefrom = "English" ):
	ocr_processor = OCRProcessor()
	final_text = text
	if image is not None:
	ocr_prediction = ocr_processor.process_image(image)
	# gettig text from ocr object
	for idx in range(len((list(ocr_prediction)[0][1]))):
	final_text += " "
	final_text += list((list(ocr_prediction)[0][1])[idx])[1][1]
	if file is not None:
	if file.name.lower().endswith(('.png', '.jpg', '.jpeg')):
	pil_image = Image.open(file)
	ocr_prediction = ocr_processor.process_image(pil_image)
	# gettig text from ocr object
	for idx in range(len((list(ocr_prediction)[0][1]))):
	final_text += " "
	final_text += list((list(ocr_prediction)[0][1])[idx])[1][1]
	elif file.name.lower().endswith('.pdf'):
	ocr_prediction = ocr_processor.process_pdf(file.name)
	# gettig text from ocr object
	for idx in range(len((list(ocr_prediction)[0][1]))):
	final_text += " "
	final_text += list((list(ocr_prediction)[0][1])[idx])[1][1]
	else:
	final_text += "\nUnsupported file type."
	print("OCR Text: ", final_text)
	if audio is not None:
	audio_text = process_audio_to_text(audio)
	final_text += "\n" + audio_text

	final_text_with_producetext = final_text + producetext

	response = co.generate(
	model='c4ai-aya',
	prompt=final_text_with_producetext,
	max_tokens=1024,
	temperature=0.5
	)
	# add graceful handling for errors (overflow)
	generated_text = response.generations[0].text
	print("Generated Text: ", generated_text)
	generated_text_with_format = generated_text + "\n" + formatinputstring
	response = co.generate(
	model='command-nightly',
	prompt=generated_text_with_format,
	max_tokens=4000,
	temperature=0.5
	)
	processed_text = response.generations[0].text

	audio_output = process_text_to_audio(processed_text, translateto, translateto)

	return processed_text, audio_output

	def main():
	with gr.Blocks() as demo:
	gr.Markdown(title)
	gr.Markdown(description)

	with gr.Row():
	input_language = gr.Dropdown(choices=df["name"].to_list(), label="Your Native Language")
	target_language = gr.Dropdown(choices=df["name"].to_list(), label="Language To Learn")

	with gr.Accordion("Talk To 🌟AyaTonic"):
	with gr.Tab("🤙🏻Audio & Text"):
	audio_input = gr.Audio(sources="microphone", type="filepath", label="Mic Input")
	text_input = gr.Textbox(lines=2, label="Text Input")
	with gr.Tab("📸Image & File"):
	image_input = gr.Image(type="pil", label="Camera Input")
	file_input = gr.File(label="File Upload")

	process_button = gr.Button("🌟AyaTonic")

	processed_text_output = RichTextbox(label="Processed Text")
	audio_output = gr.Audio(label="Audio Output")

	process_button.click(
	fn=process_input,
	inputs=[image_input, file_input, audio_input, text_input, input_language, target_language],
	outputs=[processed_text_output, audio_output]
	)

	if __name__ == "__main__":
	main()