M-AI2 commited on
Commit
1e820d2
·
verified ·
1 Parent(s): c21e556

Upload 2 files

Browse files
Files changed (2) hide show
  1. README.md +21 -6
  2. pyai.py +131 -0
README.md CHANGED
@@ -1,6 +1,21 @@
1
- ---
2
- license: mit
3
- ---
4
-
5
- # PyAI
6
- Open-Source Python AI Model
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ # PyAI
2
+ This open-source library includes both an RNN and a KNN algorithm. In addition to these algorithms, PyAI offers a simple NLP and adds a way to convert audio into text through *Whisper*.
3
+
4
+ Clone: `gh repo clone MitchellShibilski-Unkel/PyAI`
5
+
6
+ # Algorithms
7
+ - RNN
8
+ - KNN
9
+ - ReLU
10
+
11
+ # NLP
12
+ Able To Do:
13
+ - Gets sentence tense
14
+ - Gets each word and sentence
15
+ - Able to change the type of tokenization `(letters, words, or sentences)`
16
+
17
+ # Audio
18
+ Able To Do:
19
+ - Able to convert audio to text
20
+ - Get the language used in the audio
21
+ - Translate one language to another
pyai.py ADDED
@@ -0,0 +1,131 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ import numpy as np
2
+ import whisper
3
+ from torch import nn
4
+ from torch import Tensor
5
+
6
+
7
+ class PyAI:
8
+ def __init__(self, useGPU: bool):
9
+ self.GPU = useGPU
10
+
11
+ class Algorithms:
12
+ def KNN(x, y, returnValues = 0):
13
+ distances = []
14
+ for axisX, axisY in zip(x, y):
15
+ distance = axisX - axisY
16
+ absDistance = np.absolute(distance)
17
+ distances.append(absDistance)
18
+
19
+ sortedDistances = []
20
+ checkDistance = min(distances, key = lambda x:np.absolute(x-i))
21
+ sortedDistances.append(checkDistance)
22
+ distances.remove(checkDistance)
23
+
24
+ if returnValues == 0:
25
+ return sortedDistances[0]
26
+ else:
27
+ return sortedDistances[0:returnValues-1]
28
+
29
+ def RNN(w, u, b, x):
30
+ yt = 0
31
+ ht = 1 / 1 (w * x + u * yt ** -1 + b) ** -1
32
+ yt = 1 / 1 (w * ht + b) ** -1
33
+
34
+ return yt
35
+
36
+ def ReLU(self, x: list, *y: list, **u: list):
37
+ X, Y, U = [Tensor(x2) for x2 in x], [Tensor(y2) for y2 in y], [Tensor(u2) for u2 in u]
38
+
39
+ if self.GPU:
40
+ relu = nn.ReLU().to("cuda")
41
+ else:
42
+ relu = nn.ReLU().to("cpu")
43
+
44
+ newX, newY, newU = [relu(x) for x in X], [relu(y) for y in Y], [relu(u) for u in U]
45
+
46
+ if newU is not None:
47
+ return newX, newY, newU
48
+ elif newY is not None:
49
+ return newX, newY
50
+ else:
51
+ return newX
52
+
53
+ class Audio:
54
+ def __init__(self, audio: str):
55
+ self.model = whisper.load_model("base")
56
+ self.audio = audio
57
+
58
+ def generateTextFromAudio(self) -> str:
59
+ aud = whisper.load_audio(self.audio)
60
+ aud = whisper.pad_or_trim(aud)
61
+
62
+ self.mel = whisper.log_mel_spectrogram(aud).to(self.model.device)
63
+
64
+ self.model.detect_language(self.mel)
65
+
66
+ options = whisper.DecodingOptions()
67
+ result = whisper.decode(self.model, self.mel, options)
68
+
69
+ return result.text
70
+
71
+ def translateText(self, text: str, dataSet: str) -> str:
72
+ with open(dataSet, "r") as d:
73
+ data = d.read()
74
+
75
+ translation = text.translate(data)
76
+
77
+ return translation
78
+
79
+ def getLang(self):
80
+ i, lang = self.model.detect_language(self.mel)
81
+ return max(lang, key=lang.get)
82
+
83
+ class NLP:
84
+ def __init__(self, text: str):
85
+ self.text = text
86
+ self.sentences = text.split(".")
87
+ self.words = text.split(" ")
88
+ self._past = ["was", "had", "did"]
89
+ self._present = ["is", "has"]
90
+ self._future = ["will", "shall"]
91
+
92
+ def setTokensTo(self, letters: bool, *words: bool, **sentences: bool):
93
+ self.tokens = []
94
+
95
+ if letters:
96
+ tokens = iter(self.text)
97
+ for t in tokens:
98
+ self.tokens.append(t)
99
+ elif words:
100
+ for t in self.words:
101
+ self.tokens.append(t)
102
+ elif sentences:
103
+ for t in self.sentences:
104
+ self.tokens.append(t)
105
+ else:
106
+ self.tokens.append("ERROR")
107
+
108
+ def getTense(self):
109
+ self.past = False
110
+ self.present = False
111
+ self.future = False
112
+
113
+ if self.sentences in self._past:
114
+ self.past = True
115
+ elif self.sentences in self._present:
116
+ self.present = True
117
+ elif self.sentences in self._future:
118
+ self.future = True
119
+ else:
120
+ return "ERROR - Tense :: Not Enough Data"
121
+
122
+ return self.past, self.present, self.future
123
+
124
+ def getWords(self):
125
+ return self.words
126
+
127
+ def getSentences(self):
128
+ return self.sentences
129
+
130
+ def getTokens(self):
131
+ return self.tokens