|
from transformers import AutoTokenizer |
|
|
|
|
|
def tokenize(input_text): |
|
tokens = tokenizer(input_text)["input_ids"] |
|
return f"Number of tokens: {len(tokens)}" |
|
|
|
tokenize_tool = gr.Interface( |
|
fn=tokenize, |
|
inputs=gr.Textbox(lines=7, label="Input Text"), |
|
outputs=gr.Textbox(label="Tokenization Result"), |
|
live=True, |
|
title="GPT-2 Tokenizer", |
|
description="This tool tokenizes input text using the lgaalves/gpt2-dolly model.", |
|
) |
|
|
|
tokenize_tool.launch() |
|
|
|
import os |
|
from transformers import pipeline |
|
from transformers import Tool |
|
|
|
class TokenCounterTool(Tool): |
|
name = "text_generator" |
|
description = "This is a tool for counting token used by a prompt. It takes a prompt as input and returns the generated text." |
|
inputs = ["text"] |
|
outputs = ["text"] |
|
|
|
def __call__(self, prompt: str): |
|
token = os.environ['hf'] |
|
tokenizer = AutoTokenizer.from_pretrained("lgaalves/gpt2-dolly") |
|
tokens = tokenizer(input_text)["input_ids"] |
|
return f"Number of tokens: {len(tokens)}" |
|
|