File size: 4,787 Bytes
b605eca
 
 
 
 
ace9aa6
 
 
 
 
 
379a274
cd1f2cb
ace9aa6
 
b605eca
 
ace9aa6
b605eca
 
 
 
 
 
 
 
 
ace9aa6
37fe427
cd1f2cb
 
5e78dce
 
 
ace9aa6
b605eca
ace9aa6
 
 
 
 
 
25e3777
b605eca
 
ace9aa6
 
 
b605eca
ace9aa6
 
379a274
cd1f2cb
447b9ea
cd1f2cb
 
447b9ea
 
 
 
 
cd1f2cb
447b9ea
 
cd1f2cb
 
ace9aa6
b605eca
 
 
 
cd1f2cb
 
 
a054096
cd1f2cb
ace9aa6
 
 
 
 
 
b51f7b9
cd1f2cb
 
ace9aa6
 
fa5e9aa
ace9aa6
 
cd1f2cb
ace9aa6
b605eca
 
 
c0a0850
ace9aa6
b605eca
ace9aa6
 
4e441f5
c0a0850
7da89d6
b605eca
 
447b9ea
d76e7c4
2153e72
b605eca
2153e72
8804405
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
import gradio as gr
import yaml
from joeynmt.prediction import load_params_for_prediction,translate
from huggingface_hub import hf_hub_download



language_map = {'English':'en','Swahili':'sw','Fon':'fon','Igbo':'ig',
                'Arabic':'ar','Shona':'sn','Ẹ̀dó':'bin','Hausa':'ha',
                'Efik':'efi','Twi':'twi','Afrikaans':'af','Yoruba':'yo'}  
                
#List of available languages I worked on.
available_language_pairs =['en-sw','en-af','en-ar','efi-en','en-ha','en-ig','en-fon','en-twi','sn-en','sw-en','yo-en']


def load_config(path="configs/default.yaml") -> dict:
    """
    CODE ADAPTED FROM: https://github.com/joeynmt/joeynmt
    Loads and parses a YAML configuration file.

    :param path: path to YAML configuration file
    :return: configuration dictionary
    """
    with open(path, 'r', encoding="utf-8") as ymlfile:
      
        cfg = yaml.safe_load(ymlfile)
    return cfg
  
def load_model(source_language,target_language):  
    #source_language = language_map[source_language_]
    #target_language = language_map[target_language_]
    
    #source_language = 'en'
    #target_language = 'sw'
    translation_dir = 'main'

    try:
      file_yaml = hf_hub_download("chrisjay/masakhane_benchmarks", filename=f"{source_language}-{target_language}/{translation_dir}/config.yaml",force_filename='config.yaml')
      src_vocab = hf_hub_download("chrisjay/masakhane_benchmarks", filename=f"{source_language}-{target_language}/{translation_dir}/src_vocab.txt")
      trg_vocab  = hf_hub_download("chrisjay/masakhane_benchmarks", filename=f"{source_language}-{target_language}/{translation_dir}/trg_vocab.txt")
      best_ckpt = hf_hub_download("chrisjay/masakhane_benchmarks", filename=f"{source_language}-{target_language}/{translation_dir}/best.ckpt")
    except Exception:
      raise Exception(f'It seems we do not have a working configuration repo yet for {source_language} -> {target_language}. \n You could help us by creating it here: https://huggingface.co/chrisjay/masakhane_benchmarks/tree/main')


    parsed_yaml_file = load_config(file_yaml)
    parsed_yaml_file['data']['src_vocab']=src_vocab
    parsed_yaml_file['data']['trg_vocab']=trg_vocab

    params = load_params_for_prediction(parsed_yaml_file,best_ckpt)
    return params

#Load models of all available language pairs
examples_available_models=[]
model_mapping = {} 
for availabe_lang in available_language_pairs:
    try:
        model_mapping.update({availabe_lang:load_model(availabe_lang.split('-')[0],availabe_lang.split('-')[1])})
        examples_available_models.append([f"{list(language_map.keys())[list(language_map.values()).index(availabe_lang.split('-')[0])]}",f"{list(language_map.keys())[list(language_map.values()).index(availabe_lang.split('-')[1])]}"]) #idea to extract key from value got from https://stackoverflow.com/questions/8023306/get-key-by-value-in-dictionary
    except Exception:
        continue

if examples_available_models==[]:
    raise Exception(f'Available models for Space cannot be empty!')


def get_translation(source_language,target_language,source_sentence=None,source_file=None):
    '''
    This takes a sentence and gets the translation.
    type_=2 tells joeynmt translate that it should expect a sentence. 
    '''
    
    source_language_ = language_map[source_language]
    target_language_ = language_map[target_language]
   
    
    source = source_sentence
    type_=2
    if source_file!=None:
        type_=1
        source = source_file.name
    try:
       
        #params = load_model(source_language,target_language)
        params = model_mapping[f'{source_language_}-{target_language_}']
        pred = translate(params,source,type_)
    except Exception:
        return f'There was an issue loading the translation model for {source_language} -> {target_language}. Try another pair please'
    
    return pred[0] if source_file==None else pred
 



title = "Interact with Masakhane Benchmark Models"
description = "This enables you to interact with some of the Masakhane Benchmark Models and keep up with their improvement. Some of these models undergo finetuning on a regular basis. This way, you can easily use the best model with no hassles."

iface = gr.Interface(fn=get_translation, 
  inputs=[gr.inputs.Dropdown(choices = available_languages,default='English'),
  gr.inputs.Dropdown(choices = available_languages,default='Swahili'),
  gr.inputs.Textbox(label="Input"),
  gr.inputs.File(file_count="single", type="file", label='Or upload txt file containing sentences', optional=True)],
  outputs=gr.outputs.Textbox(type="auto", label='Translation'),
  title=title,
  description=description,
  examples=examples_available_models,
  enable_queue=True,
  theme='huggingface')
iface.launch()