Spaces:
Running
Running
| # -*- coding: utf-8 -*- | |
| """ Use torchMoji to encode texts into emotional feature vectors. | |
| """ | |
| from __future__ import print_function, division, unicode_literals | |
| import json | |
| from torchmoji.sentence_tokenizer import SentenceTokenizer | |
| from torchmoji.model_def import torchmoji_feature_encoding | |
| from torchmoji.global_variables import PRETRAINED_PATH, VOCAB_PATH | |
| TEST_SENTENCES = ['I love mom\'s cooking', | |
| 'I love how you never reply back..', | |
| 'I love cruising with my homies', | |
| 'I love messing with yo mind!!', | |
| 'I love you and now you\'re just gone..', | |
| 'This is shit', | |
| 'This is the shit'] | |
| maxlen = 30 | |
| batch_size = 32 | |
| print('Tokenizing using dictionary from {}'.format(VOCAB_PATH)) | |
| with open(VOCAB_PATH, 'r') as f: | |
| vocabulary = json.load(f) | |
| st = SentenceTokenizer(vocabulary, maxlen) | |
| tokenized, _, _ = st.tokenize_sentences(TEST_SENTENCES) | |
| print('Loading model from {}.'.format(PRETRAINED_PATH)) | |
| model = torchmoji_feature_encoding(PRETRAINED_PATH) | |
| print(model) | |
| print('Encoding texts..') | |
| encoding = model(tokenized) | |
| print('First 5 dimensions for sentence: {}'.format(TEST_SENTENCES[0])) | |
| print(encoding[0,:5]) | |
| # Now you could visualize the encodings to see differences, | |
| # run a logistic regression classifier on top, | |
| # or basically anything you'd like to do. |