Spaces:
Paused
Paused
| import gradio as gr | |
| import spaces | |
| from transformers import pipeline | |
| import torch | |
| DESCRIPTION=""" | |
| ### a Turkish encoder-decoder language model | |
| Welcome to our Huggingface space, where you can explore the capabilities of TURNA. | |
| **Key Features of TURNA:** | |
| - **Powerful Architecture:** TURNA contains 1.1B parameters, and was pre-trained with an encoder-decoder architecture following the UL2 framework on 43B tokens from various domains. | |
| - **Diverse Training Data:** Our model is trained on a varied dataset of 43 billion tokens, covering a wide array of domains. | |
| - **Broad Applications:** TURNA is fine-tuned for a variety of generation and understanding tasks, including: | |
| - Summarization | |
| - Paraphrasing | |
| - News title generation | |
| - Sentiment classification | |
| - Text categorization | |
| - Named entity recognition | |
| - Part-of-speech tagging | |
| - Semantic textual similarity | |
| - Natural language inference | |
| **Note:** First inference might take time as the models are downloaded on-the-go. | |
| *TURNA can generate toxic content or provide erroneous information. Double-check before usage.* | |
| """ | |
| CITATION = """ | |
| Refer to our [paper](https://arxiv.org/abs/2401.14373) for more details. | |
| ### Citation | |
| ```bibtex | |
| @misc{uludoğan2024turna, | |
| title={TURNA: A Turkish Encoder-Decoder Language Model for Enhanced Understanding and Generation}, | |
| author={Gökçe Uludoğan and Zeynep Yirmibeşoğlu Balal and Furkan Akkurt and Melikşah Türker and Onur Güngör and Susan Üsküdarlı}, | |
| year={2024}, | |
| eprint={2401.14373}, | |
| archivePrefix={arXiv}, | |
| primaryClass={cs.CL} | |
| } | |
| ``` | |
| """ | |
| sentiment_example = [["Bu üründen çok memnun kaldım."]] | |
| long_text = [["Eyfel Kulesi (Fransızca: La tour Eiffel [la tuʀ ɛˈfɛl]), Paris'teki demir kule. Kule, aynı zamanda tüm dünyada Fransa'nın sembolü halini almıştır. İsmini, inşa ettiren Fransız inşaat mühendisi Gustave Eiffel'den alır.[1] En büyük turizm cazibelerinden biri olan Eyfel Kulesi, yılda 6 milyon turist çeker. 2002 yılında toplam ziyaretçi sayısı 200 milyona ulaşmıştır.", "Kalp krizi geçirenlerin yaklaşık üçte birinin kısa bir süre önce grip atlattığı düşünülüyor. Peki grip virüsü ne yapıyor da kalp krizine yol açıyor? Karpuz şöyle açıkladı: Grip virüsü kanın yapışkanlığını veya pıhtılaşmasını artırıyor."]] | |
| ner_example = [["Benim adım Turna."]] | |
| t2t_example = [["Paraphrase: Bu üründen çok memnun kaldım."]] | |
| nli_example = [["Bunu çok beğendim. Bunu çok sevdim."]] | |
| text_category_example = [[" anadolu_efes e 18 lik star ! beko_basketbol_ligi nde iddialı bir kadroyla sezona giren anadolu_efes transfer harekatına devam ediyor"]] | |
| def nli(input, model_choice="turna_nli_nli_tr"): | |
| if model_choice=="turna_nli_nli_tr": | |
| nli_model = pipeline(model="boun-tabi-LMG/turna_nli_nli_tr", device=0) | |
| return nli_model(input)[0]["generated_text"] | |
| else: | |
| stsb_model = pipeline(model="boun-tabi-LMG/turna_semantic_similarity_stsb_tr", device=0) | |
| return stsb_model(input)[0]["generated_text"] | |
| def sentiment_analysis(input, model_choice="turna_classification_17bintweet_sentiment"): | |
| if model_choice=="turna_classification_17bintweet_sentiment": | |
| sentiment_model = pipeline(model="boun-tabi-LMG/turna_classification_17bintweet_sentiment", device=0) | |
| return sentiment_model(input)[0]["generated_text"] | |
| else: | |
| product_reviews = pipeline(model="boun-tabi-LMG/turna_classification_tr_product_reviews", device=0) | |
| return product_reviews(input)[0]["generated_text"] | |
| def pos(input, model_choice="turna_pos_imst"): | |
| if model_choice=="turna_pos_imst": | |
| pos_imst = pipeline(model="boun-tabi-LMG/turna_pos_imst", device=0) | |
| return pos_imst(input)[0]["generated_text"] | |
| else: | |
| pos_boun = pipeline(model="boun-tabi-LMG/turna_pos_boun", device=0) | |
| return pos_boun(input)[0]["generated_text"] | |
| def ner(input, model_choice="turna_ner_wikiann"): | |
| if model_choice=="turna_ner_wikiann": | |
| ner_wikiann = pipeline(model="boun-tabi-LMG/turna_ner_wikiann", device=0) | |
| return ner_wikiann(input)[0]["generated_text"] | |
| else: | |
| ner_model = pipeline(model="boun-tabi-LMG/turna_ner_milliyet", device=0) | |
| return ner_model(input)[0]["generated_text"] | |
| def paraphrase(input, model_choice="turna_paraphrasing_tatoeba"): | |
| if model_choice=="turna_paraphrasing_tatoeba": | |
| paraphrasing = pipeline(model="boun-tabi-LMG/turna_paraphrasing_tatoeba", device=0) | |
| return paraphrasing(input)[0]["generated_text"] | |
| else: | |
| paraphrasing_sub = pipeline(model="boun-tabi-LMG/turna_paraphrasing_opensubtitles", device=0) | |
| return paraphrasing_sub(input)[0]["generated_text"] | |
| def summarize(input, model_choice, max_new_tokens, length_penalty, no_repeat_ngram_size): | |
| model_mapping = {"turna_summarization_tr_news": "boun-tabi-LMG/turna_summarization_tr_news", | |
| "turna_summarization_mlsum": "boun-tabi-LMG/turna_summarization_mlsum"} | |
| summarization_model = pipeline(model=model_mapping[model_choice], device=0) | |
| return summarization_model(input, max_new_tokens = max_new_tokens, length_penalty=length_penalty, no_repeat_ngram_size=no_repeat_ngram_size)[0]["generated_text"] | |
| def categorize(input): | |
| ttc = pipeline(model="boun-tabi-LMG/turna_classification_ttc4900", device=0) | |
| return ttc(input)[0]["generated_text"] | |
| def turna(input, max_new_tokens, length_penalty, | |
| top_k, top_p, temp, num_beams, | |
| do_sample, no_repeat_ngram_size, repetition_penalty): | |
| turna = pipeline(model="boun-tabi-LMG/TURNA", device=0) | |
| input = f"[S2S] {input}<EOS>" | |
| return turna(input, max_new_tokens = max_new_tokens, length_penalty=length_penalty, | |
| top_k=top_k, top_p=top_p, temperature=temp, num_beams=num_beams, | |
| do_sample = do_sample, no_repeat_ngram_size=no_repeat_ngram_size, repetition_penalty=repetition_penalty)[0]["generated_text"] | |
| with gr.Blocks(theme="abidlabs/Lime") as demo: | |
| gr.Markdown("# TURNA") | |
| gr.Image("images/turna-logo.png", width=100, show_label=False, show_download_button=False, show_share_button=False) | |
| gr.Markdown(DESCRIPTION) | |
| with gr.Tab("Sentiment Analysis"): | |
| gr.Markdown("TURNA fine-tuned on sentiment analysis. Enter text to analyse sentiment and pick the model (tweets or product reviews).") | |
| with gr.Column(): | |
| with gr.Row(): | |
| with gr.Column(): | |
| sentiment_choice = gr.Radio(choices = ["turna_classification_17bintweet_sentiment", "turna_classification_tr_product_reviews"], label ="Model", value="turna_classification_17bintweet_sentiment") | |
| sentiment_input = gr.Textbox(label="Sentiment Analysis Input") | |
| sentiment_submit = gr.Button() | |
| sentiment_output = gr.Textbox(label="Sentiment Analysis Output") | |
| sentiment_submit.click(sentiment_analysis, inputs=[sentiment_input, sentiment_choice], outputs=sentiment_output) | |
| sentiment_examples = gr.Examples(examples = sentiment_example, inputs = [sentiment_input, sentiment_choice], outputs=sentiment_output, fn=sentiment_analysis) | |
| with gr.Tab("TURNA 🐦"): | |
| gr.Markdown("Pre-trained TURNA. Enter text to start generating.") | |
| with gr.Column(): | |
| with gr.Row(): | |
| with gr.Column(): | |
| with gr.Accordion("Advanced Generation Parameters"): | |
| max_new_tokens = gr.Slider(label = "Maximum length", | |
| minimum = 0, | |
| maximum = 512, | |
| value = 128) | |
| length_penalty = gr.Slider(label = "Length penalty", | |
| value=1.0) | |
| top_k = gr.Slider(label = "Top-k", value=10) | |
| top_p = gr.Slider(label = "Top-p", value=0.95) | |
| temp = gr.Slider(label = "Temperature", value=1.0, minimum=0.1, maximum=100.0) | |
| no_repeat_ngram_size =gr.Slider(label="No Repeat N-Gram Size", minimum=0,value=3,) | |
| repetition_penalty = gr.Slider(label = "Repetition Penalty", minimum=0.0, value=3.1, step=0.1) | |
| num_beams = gr.Slider(label = "Number of beams", minimum=1, | |
| maximum=10, value=3) | |
| do_sample = gr.Radio(choices = [True, False], value = True, label = "Sampling") | |
| with gr.Column(): | |
| text_gen_input = gr.Textbox(label="Text Generation Input") | |
| text_gen_submit = gr.Button() | |
| text_gen_output = gr.Textbox(label="Text Generation Output") | |
| text_gen_submit.click(turna, inputs=[text_gen_input, max_new_tokens, length_penalty, | |
| top_k, top_p, temp, num_beams, | |
| do_sample, no_repeat_ngram_size, repetition_penalty], outputs=text_gen_output) | |
| text_gen_example = [["Bir varmış, bir yokmuş, evvel zaman içinde, kalbur saman içinde, uzak diyarların birinde bir turna"]] | |
| text_gen_examples = gr.Examples(examples = text_gen_example, inputs = [text_gen_input, max_new_tokens, length_penalty, | |
| top_k, top_p, temp, num_beams, do_sample, no_repeat_ngram_size, repetition_penalty], outputs=text_gen_output, fn=turna) | |
| with gr.Tab("Text Categorization"): | |
| gr.Markdown("TURNA fine-tuned on text categorization. Enter text to categorize text or try the example.") | |
| with gr.Column(): | |
| with gr.Row(): | |
| with gr.Column(): | |
| text_input = gr.Textbox(label="Text Categorization Input") | |
| text_submit = gr.Button() | |
| text_output = gr.Textbox(label="Text Categorization Output") | |
| text_submit.click(categorize, inputs=[text_input], outputs=text_output) | |
| text_examples = gr.Examples(examples = text_category_example,inputs=[text_input], outputs=text_output, fn=categorize) | |
| with gr.Tab("NLI"): | |
| gr.Markdown("TURNA fine-tuned on natural language inference. Enter text to infer entailment and pick the model. You can also check for semantic similarity entailment.") | |
| with gr.Column(): | |
| with gr.Row(): | |
| with gr.Column(): | |
| nli_choice = gr.Radio(choices = ["turna_nli_nli_tr", "turna_semantic_similarity_stsb_tr"], label ="Model", value="turna_nli_nli_tr") | |
| nli_input = gr.Textbox(label="NLI Input") | |
| nli_submit = gr.Button() | |
| nli_output = gr.Textbox(label="NLI Output") | |
| nli_submit.click(nli, inputs=[nli_input, nli_choice], outputs=nli_output) | |
| nli_examples = gr.Examples(examples = nli_example, inputs = [nli_input, nli_choice], outputs=nli_output, fn=nli) | |
| with gr.Tab("POS"): | |
| gr.Markdown("TURNA fine-tuned on part-of-speech-tagging. Enter text to parse parts of speech and pick the model.") | |
| with gr.Column(): | |
| with gr.Row(): | |
| with gr.Column(): | |
| pos_choice = gr.Radio(choices = ["turna_pos_imst", "turna_pos_boun"], label ="Model", value="turna_pos_imst") | |
| pos_input = gr.Textbox(label="POS Input") | |
| pos_submit = gr.Button() | |
| pos_output = gr.Textbox(label="POS Output") | |
| pos_submit.click(pos, inputs=[pos_input, pos_choice], outputs=pos_output) | |
| pos_examples = gr.Examples(examples = ner_example, inputs = [pos_input, pos_choice], outputs=pos_output, fn=pos) | |
| with gr.Tab("NER"): | |
| gr.Markdown("TURNA fine-tuned on named entity recognition. Enter text to parse named entities and pick the model.") | |
| with gr.Column(): | |
| with gr.Row(): | |
| with gr.Column(): | |
| ner_choice = gr.Radio(choices = ["turna_ner_wikiann", "turna_ner_milliyet"], label ="Model", value="turna_ner_wikiann") | |
| ner_input = gr.Textbox(label="NER Input") | |
| ner_submit = gr.Button() | |
| ner_output = gr.Textbox(label="NER Output") | |
| ner_submit.click(ner, inputs=[ner_input, ner_choice], outputs=ner_output) | |
| ner_examples = gr.Examples(examples = ner_example, inputs = [ner_input, ner_choice], outputs=ner_output, fn=ner) | |
| with gr.Tab("Paraphrase"): | |
| gr.Markdown("TURNA fine-tuned on paraphrasing. Enter text to paraphrase and pick the model.") | |
| with gr.Column(): | |
| with gr.Row(): | |
| with gr.Column(): | |
| paraphrasing_choice = gr.Radio(choices = ["turna_paraphrasing_tatoeba", "turna_paraphrasing_opensubtitles"], label ="Model", value="turna_paraphrasing_tatoeba") | |
| paraphrasing_input = gr.Textbox(label = "Paraphrasing Input") | |
| paraphrasing_submit = gr.Button() | |
| paraphrasing_output = gr.Text(label="Paraphrasing Output") | |
| paraphrasing_submit.click(paraphrase, inputs=[paraphrasing_input, paraphrasing_choice], outputs=paraphrasing_output) | |
| paraphrase_examples = gr.Examples(examples = long_text, inputs = [paraphrasing_input, paraphrasing_choice], outputs=paraphrasing_output, fn=paraphrase) | |
| with gr.Tab("Summarization"): | |
| gr.Markdown("TURNA fine-tuned on summarization. Enter text to summarize and pick the model.") | |
| with gr.Column(): | |
| with gr.Row(): | |
| with gr.Column(): | |
| sum_choice = gr.Radio(choices = ["turna_summarization_mlsum", "turna_summarization_tr_news"], label ="Model", value="turna_summarization_mlsum") | |
| with gr.Accordion("Advanced Generation Parameters"): | |
| max_new_tokens = gr.Slider(label = "Maximum length", | |
| minimum = 0, | |
| maximum = 512, | |
| value = 128) | |
| length_penalty = gr.Slider(label = "Length penalty", | |
| minimum = -10, | |
| maximum = 10, | |
| value=2.0) | |
| no_repeat_ngram_size =gr.Slider(label="No Repeat N-Gram Size", minimum=0,value=3,) | |
| with gr.Column(): | |
| sum_input = gr.Textbox(label = "Summarization Input") | |
| sum_submit = gr.Button() | |
| sum_output = gr.Textbox(label = "Summarization Output") | |
| sum_submit.click(summarize, inputs=[sum_input, sum_choice, max_new_tokens, length_penalty, no_repeat_ngram_size], outputs=sum_output) | |
| sum_examples = gr.Examples(examples = long_text, inputs = [sum_input, sum_choice, max_new_tokens, length_penalty, no_repeat_ngram_size], outputs=sum_output, fn=summarize) | |
| gr.Markdown(CITATION) | |
| demo.launch() |