Spaces:
Running
Running
| import os | |
| from collections import OrderedDict | |
| os.environ["HF_HUB_ENABLE_HF_TRANSFER"] = "1" | |
| import gradio as gr | |
| from shitsu import ShitsuScorer | |
| from huggingface_hub import hf_hub_download | |
| class OptimizedShitsuScorer: | |
| def __init__(self, max_models=2): | |
| self.scorers = OrderedDict() | |
| self.max_models = max_models | |
| self.current_language = None | |
| def get_scorer(self, language): | |
| if language in self.scorers: | |
| # Move the accessed language to the end (most recently used) | |
| self.scorers.move_to_end(language) | |
| else: | |
| # If we're at capacity, remove the least recently used model | |
| if len(self.scorers) >= self.max_models: | |
| self.scorers.popitem(last=False) | |
| # Load the new model | |
| self.scorers[language] = ShitsuScorer(language) | |
| self.current_language = language | |
| return self.scorers[language] | |
| def score(self, text, language): | |
| scorer = self.get_scorer(language) | |
| return scorer.score([text])[0] | |
| def get_loaded_languages(self): | |
| return list(self.scorers.keys()) | |
| optimized_scorer = OptimizedShitsuScorer(max_models=2) | |
| # Preload English model | |
| optimized_scorer.get_scorer('en') | |
| example_inputs = [ | |
| "The Beatles were a popular band in the 1960s. They released many hit songs.", | |
| "Chocolate is a type of sweet food that people often eat for dessert.", | |
| "I'm thinking of going to the beach this weekend. The weather is supposed to be great!", | |
| "Quantum mechanics is a fundamental theory in physics that provides a description of the physical properties of nature at the scale of atoms and subatomic particles.", | |
| "Can you believe it's already September? This year is flying by!" | |
| ] | |
| def get_score(user_text, language): | |
| score = optimized_scorer.score(user_text, language) | |
| formatted_score = f"{score:.4g}" | |
| loaded_languages = optimized_scorer.get_loaded_languages() | |
| display_loaded_languages = [('Currently loaded languages:', None)] | |
| for language in loaded_languages: | |
| display_loaded_languages.append((language_map[language], language)) | |
| display_loaded_languages.append((' ', None)) | |
| return f'<div class="nice-box"> Score: {formatted_score}</div>', display_loaded_languages | |
| language_options = ['am', 'ar', 'bg', 'bn', 'cs', 'da', 'de', 'el', 'en', 'es', 'fa', 'fi', 'fr', 'gu', 'ha', 'hi', 'hu', 'id', 'it', 'ja', 'jv', 'kn', 'ko', 'lt', 'mr', 'nl', 'no', 'yo', 'zh'] | |
| language_map = { | |
| 'am': 'Amharic', | |
| 'ar': 'Arabic', | |
| 'bg': 'Bulgarian', | |
| 'bn': 'Bengali', | |
| 'cs': 'Czech', | |
| 'da': 'Danish', | |
| 'de': 'German', | |
| 'el': 'Greek', | |
| 'en': 'English', | |
| 'es': 'Spanish', | |
| 'fa': 'Persian', | |
| 'fi': 'Finnish', | |
| 'fr': 'French', | |
| 'gu': 'Gujarati', | |
| 'ha': 'Hausa', | |
| 'hi': 'Hindi', | |
| 'hu': 'Hungarian', | |
| 'id': 'Indonesian', | |
| 'it': 'Italian', | |
| 'ja': 'Japanese', | |
| 'jv': 'Javanese', | |
| 'kn': 'Kannada', | |
| 'ko': 'Korean', | |
| 'lt': 'Lithuanian', | |
| 'mr': 'Marathi', | |
| 'nl': 'Dutch', | |
| 'no': 'Norwegian', | |
| 'yo': 'Yoruba', | |
| 'zh': 'Chinese' | |
| } | |
| color_map = { | |
| "am": "green", # Ethiopia's flag has green | |
| "ar": "black", # Many Arab flags feature black | |
| "bg": "white", # Bulgaria's flag has white | |
| "bn": "green", # Bangladesh's flag is green and red | |
| "cs": "blue", # Czech Republic's flag has blue | |
| "da": "red", # Denmark's flag is red and white | |
| "de": "black", # Germany's flag has black | |
| "el": "blue", # Greece's flag has blue | |
| "en": "red", # UK/US flags have red | |
| "es": "yellow", # Spain's flag has yellow | |
| "fa": "green", # Iran's flag has green | |
| "fi": "blue", # Finland's flag is blue and white | |
| "fr": "blue", # France's flag has blue | |
| "gu": "saffron", # India (Gujarat) flag's color | |
| "ha": "green", # Nigeria's flag has green | |
| "hi": "orange", # India's flag has orange | |
| "hu": "red", # Hungary's flag has red | |
| "id": "red", # Indonesia's flag is red and white | |
| "it": "green", # Italy's flag has green | |
| "ja": "red", # Japan's flag has a red sun | |
| "jv": "brown", # Associated with traditional Javanese culture | |
| "kn": "yellow", # Karnataka (Indian state) flag has yellow | |
| "ko": "blue", # South Korea's flag has blue | |
| "lt": "yellow", # Lithuania's flag has yellow | |
| "mr": "saffron", # Marathi culture often uses saffron | |
| "nl": "orange", # The Netherlands is often associated with orange | |
| "no": "red", # Norway's flag is red, white, and blue | |
| "yo": "green", # Nigeria's flag for Yoruba-speaking people | |
| "zh": "red" # China's flag is red | |
| } | |
| css = ''' | |
| #gen_btn{height: 100%} | |
| #title{text-align: center} | |
| #title h1{font-size: 3em; display:inline-flex; align-items:center} | |
| #title img{width: 100px; margin-right: 0.5em} | |
| #gallery .grid-wrap{height: 10vh} | |
| .card_internal{display: flex;height: 100px;margin-top: .5em} | |
| .card_internal img{margin-right: 1em} | |
| .styler{--form-gap-width: 0px !important} | |
| .nice-box { | |
| border: 2px solid #007bff; | |
| border-radius: 10px; | |
| padding: 15px; | |
| background-color: #f8f9fa; | |
| font-size: 18px; | |
| text-align: center; | |
| min-height: 60px; | |
| display: flex; | |
| align-items: center; | |
| justify-content: center; | |
| } | |
| ''' | |
| theme = gr.themes.Soft( | |
| primary_hue="blue", | |
| secondary_hue="sky", | |
| ) | |
| with gr.Blocks(theme=theme, css=css) as demo: | |
| title = gr.HTML( | |
| """<h1><img src="https://huggingface.co/spaces/Dusduo/shitsu-text-scorer-demo/resolve/main/shitsu-logo.jpeg" alt="LightBlue"> Shitsu Text Scorer</h1>""", | |
| elem_id="title", | |
| ) | |
| gr.Markdown( | |
| """This is a demo of [Shitsu text scorer](https://huggingface.co/lightblue/shitsu_text_scorer) for multiple languages, which scores text based on the amount of useful, textbook-like information in it. | |
| It outputs a score generally between 0 and 1 but can exceed both of these bounds as it is a regressor. | |
| """ | |
| ) | |
| with gr.Row(): | |
| user_text = gr.Textbox(label='Input text', placeholder='Type something here...') | |
| with gr.Column(scale=0): | |
| submit_btn = gr.Button("Submit") | |
| score = gr.HTML( | |
| value='<div class="nice-box"> Score... </div>', | |
| label="Output" | |
| ) | |
| with gr.Row(): | |
| language_choice = gr.Dropdown( | |
| choices=language_options, | |
| label="Choose a language", | |
| info="Type to search", | |
| value="en", | |
| allow_custom_value=True, | |
| ) | |
| loaded_languages = gr.HighlightedText( | |
| value = [('Currently loaded languages: \n', None), ('English', 'en')], | |
| label="", | |
| combine_adjacent=True, | |
| show_legend=False, #True, | |
| color_map=color_map, | |
| scale=0) | |
| #loaded_languages = gr.Markdown("Currently loaded languages: en") | |
| gr.Examples(examples=example_inputs, inputs=user_text) | |
| gr.Markdown( | |
| """ | |
| --- | |
| ## 🛈 **Additional Information** | |
| This model can also be found on [Github](https://github.com/lightblue-tech/shitsu) and has its own pip installable package. | |
| This model is based on fasttext embeddings, meaning that it can be used on large amounts of data with limited compute quickly. | |
| This scorer can be used to filter useful information from large text corpora in many languages. | |
| """ | |
| ) | |
| submit_btn.click(get_score, inputs=[user_text, language_choice], outputs=[score, loaded_languages]) | |
| demo.launch() | |