summaryrefslogtreecommitdiff
diff options
context:
space:
mode:
-rw-r--r--ru_RU-irina-medium.onnxbin0 -> 63201294 bytes
-rw-r--r--ru_RU-irina-medium.onnx.json483
-rwxr-xr-xvoice-to-text15
-rw-r--r--vosk-model-small-ru-0.22/README8
-rw-r--r--vosk-model-small-ru-0.22/am/final.mdlbin0 -> 15862383 bytes
-rw-r--r--vosk-model-small-ru-0.22/conf/mfcc.conf7
-rw-r--r--vosk-model-small-ru-0.22/conf/model.conf10
-rw-r--r--vosk-model-small-ru-0.22/graph/Gr.fstbin0 -> 32288259 bytes
-rw-r--r--vosk-model-small-ru-0.22/graph/HCLr.fstbin0 -> 32993810 bytes
-rw-r--r--vosk-model-small-ru-0.22/graph/disambig_tid.int5
-rw-r--r--vosk-model-small-ru-0.22/graph/phones/word_boundary.int202
-rw-r--r--vosk-model-small-ru-0.22/ivector/final.dubmbin0 -> 168048 bytes
-rw-r--r--vosk-model-small-ru-0.22/ivector/final.iebin0 -> 9927287 bytes
-rw-r--r--vosk-model-small-ru-0.22/ivector/final.matbin0 -> 44975 bytes
-rw-r--r--vosk-model-small-ru-0.22/ivector/global_cmvn.stats3
-rw-r--r--vosk-model-small-ru-0.22/ivector/online_cmvn.conf1
-rw-r--r--vosk-model-small-ru-0.22/ivector/splice.conf2
17 files changed, 736 insertions, 0 deletions
diff --git a/ru_RU-irina-medium.onnx b/ru_RU-irina-medium.onnx
new file mode 100644
index 0000000..bff2b02
--- /dev/null
+++ b/ru_RU-irina-medium.onnx
Binary files differ
diff --git a/ru_RU-irina-medium.onnx.json b/ru_RU-irina-medium.onnx.json
new file mode 100644
index 0000000..7b61766
--- /dev/null
+++ b/ru_RU-irina-medium.onnx.json
@@ -0,0 +1,483 @@
+{
+ "audio": {
+ "sample_rate": 22050,
+ "quality": "medium"
+ },
+ "espeak": {
+ "voice": "ru"
+ },
+ "inference": {
+ "noise_scale": 0.667,
+ "length_scale": 1,
+ "noise_w": 0.8
+ },
+ "phoneme_map": {},
+ "phoneme_id_map": {
+ "_": [
+ 0
+ ],
+ "^": [
+ 1
+ ],
+ "$": [
+ 2
+ ],
+ " ": [
+ 3
+ ],
+ "!": [
+ 4
+ ],
+ "'": [
+ 5
+ ],
+ "(": [
+ 6
+ ],
+ ")": [
+ 7
+ ],
+ ",": [
+ 8
+ ],
+ "-": [
+ 9
+ ],
+ ".": [
+ 10
+ ],
+ ":": [
+ 11
+ ],
+ ";": [
+ 12
+ ],
+ "?": [
+ 13
+ ],
+ "a": [
+ 14
+ ],
+ "b": [
+ 15
+ ],
+ "c": [
+ 16
+ ],
+ "d": [
+ 17
+ ],
+ "e": [
+ 18
+ ],
+ "f": [
+ 19
+ ],
+ "h": [
+ 20
+ ],
+ "i": [
+ 21
+ ],
+ "j": [
+ 22
+ ],
+ "k": [
+ 23
+ ],
+ "l": [
+ 24
+ ],
+ "m": [
+ 25
+ ],
+ "n": [
+ 26
+ ],
+ "o": [
+ 27
+ ],
+ "p": [
+ 28
+ ],
+ "q": [
+ 29
+ ],
+ "r": [
+ 30
+ ],
+ "s": [
+ 31
+ ],
+ "t": [
+ 32
+ ],
+ "u": [
+ 33
+ ],
+ "v": [
+ 34
+ ],
+ "w": [
+ 35
+ ],
+ "x": [
+ 36
+ ],
+ "y": [
+ 37
+ ],
+ "z": [
+ 38
+ ],
+ "æ": [
+ 39
+ ],
+ "ç": [
+ 40
+ ],
+ "ð": [
+ 41
+ ],
+ "ø": [
+ 42
+ ],
+ "ħ": [
+ 43
+ ],
+ "ŋ": [
+ 44
+ ],
+ "œ": [
+ 45
+ ],
+ "ǀ": [
+ 46
+ ],
+ "ǁ": [
+ 47
+ ],
+ "ǂ": [
+ 48
+ ],
+ "ǃ": [
+ 49
+ ],
+ "ɐ": [
+ 50
+ ],
+ "ɑ": [
+ 51
+ ],
+ "ɒ": [
+ 52
+ ],
+ "ɓ": [
+ 53
+ ],
+ "ɔ": [
+ 54
+ ],
+ "ɕ": [
+ 55
+ ],
+ "ɖ": [
+ 56
+ ],
+ "ɗ": [
+ 57
+ ],
+ "ɘ": [
+ 58
+ ],
+ "ə": [
+ 59
+ ],
+ "ɚ": [
+ 60
+ ],
+ "ɛ": [
+ 61
+ ],
+ "ɜ": [
+ 62
+ ],
+ "ɞ": [
+ 63
+ ],
+ "ɟ": [
+ 64
+ ],
+ "ɠ": [
+ 65
+ ],
+ "ɡ": [
+ 66
+ ],
+ "ɢ": [
+ 67
+ ],
+ "ɣ": [
+ 68
+ ],
+ "ɤ": [
+ 69
+ ],
+ "ɥ": [
+ 70
+ ],
+ "ɦ": [
+ 71
+ ],
+ "ɧ": [
+ 72
+ ],
+ "ɨ": [
+ 73
+ ],
+ "ɪ": [
+ 74
+ ],
+ "ɫ": [
+ 75
+ ],
+ "ɬ": [
+ 76
+ ],
+ "ɭ": [
+ 77
+ ],
+ "ɮ": [
+ 78
+ ],
+ "ɯ": [
+ 79
+ ],
+ "ɰ": [
+ 80
+ ],
+ "ɱ": [
+ 81
+ ],
+ "ɲ": [
+ 82
+ ],
+ "ɳ": [
+ 83
+ ],
+ "ɴ": [
+ 84
+ ],
+ "ɵ": [
+ 85
+ ],
+ "ɶ": [
+ 86
+ ],
+ "ɸ": [
+ 87
+ ],
+ "ɹ": [
+ 88
+ ],
+ "ɺ": [
+ 89
+ ],
+ "ɻ": [
+ 90
+ ],
+ "ɽ": [
+ 91
+ ],
+ "ɾ": [
+ 92
+ ],
+ "ʀ": [
+ 93
+ ],
+ "ʁ": [
+ 94
+ ],
+ "ʂ": [
+ 95
+ ],
+ "ʃ": [
+ 96
+ ],
+ "ʄ": [
+ 97
+ ],
+ "ʈ": [
+ 98
+ ],
+ "ʉ": [
+ 99
+ ],
+ "ʊ": [
+ 100
+ ],
+ "ʋ": [
+ 101
+ ],
+ "ʌ": [
+ 102
+ ],
+ "ʍ": [
+ 103
+ ],
+ "ʎ": [
+ 104
+ ],
+ "ʏ": [
+ 105
+ ],
+ "ʐ": [
+ 106
+ ],
+ "ʑ": [
+ 107
+ ],
+ "ʒ": [
+ 108
+ ],
+ "ʔ": [
+ 109
+ ],
+ "ʕ": [
+ 110
+ ],
+ "ʘ": [
+ 111
+ ],
+ "ʙ": [
+ 112
+ ],
+ "ʛ": [
+ 113
+ ],
+ "ʜ": [
+ 114
+ ],
+ "ʝ": [
+ 115
+ ],
+ "ʟ": [
+ 116
+ ],
+ "ʡ": [
+ 117
+ ],
+ "ʢ": [
+ 118
+ ],
+ "ʲ": [
+ 119
+ ],
+ "ˈ": [
+ 120
+ ],
+ "ˌ": [
+ 121
+ ],
+ "ː": [
+ 122
+ ],
+ "ˑ": [
+ 123
+ ],
+ "˞": [
+ 124
+ ],
+ "β": [
+ 125
+ ],
+ "θ": [
+ 126
+ ],
+ "χ": [
+ 127
+ ],
+ "ᵻ": [
+ 128
+ ],
+ "ⱱ": [
+ 129
+ ],
+ "0": [
+ 130
+ ],
+ "1": [
+ 131
+ ],
+ "2": [
+ 132
+ ],
+ "3": [
+ 133
+ ],
+ "4": [
+ 134
+ ],
+ "5": [
+ 135
+ ],
+ "6": [
+ 136
+ ],
+ "7": [
+ 137
+ ],
+ "8": [
+ 138
+ ],
+ "9": [
+ 139
+ ],
+ "̧": [
+ 140
+ ],
+ "̃": [
+ 141
+ ],
+ "̪": [
+ 142
+ ],
+ "̯": [
+ 143
+ ],
+ "̩": [
+ 144
+ ],
+ "ʰ": [
+ 145
+ ],
+ "ˤ": [
+ 146
+ ],
+ "ε": [
+ 147
+ ],
+ "↓": [
+ 148
+ ],
+ "#": [
+ 149
+ ],
+ "\"": [
+ 150
+ ]
+ },
+ "num_symbols": 256,
+ "num_speakers": 1,
+ "speaker_id_map": {},
+ "piper_version": "1.0.0",
+ "language": {
+ "code": "ru_RU",
+ "family": "ru",
+ "region": "RU",
+ "name_native": "Русский",
+ "name_english": "Russian",
+ "country_english": "Russia"
+ },
+ "dataset": "irina"
+} \ No newline at end of file
diff --git a/voice-to-text b/voice-to-text
new file mode 100755
index 0000000..6dadaac
--- /dev/null
+++ b/voice-to-text
@@ -0,0 +1,15 @@
+#!/bin/sh
+
+#pip install vosk --break-system-packages
+#Download model for vosk on https://alphacephei.com/vosk/models
+#sudo pacman -S chatblade
+#pip install piper-tts --break-system-packages
+#Download model for piper on https://huggingface.co/rhasspy/piper-voices
+
+export OPENAI_API_KEY=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
+
+ffmpeg -y -f alsa -i default -acodec pcm_s16le -ac 1 -ar 44100 -t 4 -f wav ~/.cache/audio.wav >/dev/null 2>&1
+vosk-transcriber -m vosk-model-small-ru-0.22 -i ~/.cache/audio.wav -o ~/.cache/transcript.txt >/dev/null 2>&1
+chatblade -e $(cat ~/.cache/transcript.txt) > ~/.cache/response.txt
+cat ~/.cache/response.txt
+cat ~/.cache/response.txt | piper --model ru_RU-irina-medium.onnx --output-raw | aplay -r 22050 -f S16_LE -t raw -
diff --git a/vosk-model-small-ru-0.22/README b/vosk-model-small-ru-0.22/README
new file mode 100644
index 0000000..f5d35ea
--- /dev/null
+++ b/vosk-model-small-ru-0.22/README
@@ -0,0 +1,8 @@
+Small Russian model for Vosk (Android, RPi, other small devices)
+
+%WER 22.71 [ 9092 / 40042, 1124 ins, 1536 del, 6432 sub ] exp/chain_a/tdnn/decode_test_audiobooks_look_fast/wer_10_0.0
+%WER 11.79 [ 5940 / 50394, 894 ins, 832 del, 4214 sub ] exp/chain_a/tdnn/decode_test_golos_crowd_look_fast/wer_11_0.0
+%WER 21.34 [ 1789 / 8382, 173 ins, 440 del, 1176 sub ] exp/chain_a/tdnn/decode_test_golos_farfield_look_fast/wer_10_0.0
+%WER 29.89 [ 5579 / 18666, 476 ins, 1550 del, 3553 sub ] exp/chain_a/tdnn/decode_test_sova_devices_look_fast/wer_10_0.0
+%WER 31.97 [ 13588 / 42496, 1013 ins, 3640 del, 8935 sub ] exp/chain_a/tdnn/decode_test_youtube_look_fast/wer_9_0.0
+
diff --git a/vosk-model-small-ru-0.22/am/final.mdl b/vosk-model-small-ru-0.22/am/final.mdl
new file mode 100644
index 0000000..8978dba
--- /dev/null
+++ b/vosk-model-small-ru-0.22/am/final.mdl
Binary files differ
diff --git a/vosk-model-small-ru-0.22/conf/mfcc.conf b/vosk-model-small-ru-0.22/conf/mfcc.conf
new file mode 100644
index 0000000..eaa40c5
--- /dev/null
+++ b/vosk-model-small-ru-0.22/conf/mfcc.conf
@@ -0,0 +1,7 @@
+--sample-frequency=16000
+--use-energy=false
+--num-mel-bins=40
+--num-ceps=40
+--low-freq=20
+--high-freq=7600
+--allow-downsample=true
diff --git a/vosk-model-small-ru-0.22/conf/model.conf b/vosk-model-small-ru-0.22/conf/model.conf
new file mode 100644
index 0000000..64bc89e
--- /dev/null
+++ b/vosk-model-small-ru-0.22/conf/model.conf
@@ -0,0 +1,10 @@
+--min-active=200
+--max-active=3000
+--beam=10.0
+--lattice-beam=2.0
+--acoustic-scale=1.0
+--frame-subsampling-factor=3
+--endpoint.silence-phones=1:2:3:4:5:6:7:8:9:10
+--endpoint.rule2.min-trailing-silence=0.5
+--endpoint.rule3.min-trailing-silence=1.0
+--endpoint.rule4.min-trailing-silence=2.0
diff --git a/vosk-model-small-ru-0.22/graph/Gr.fst b/vosk-model-small-ru-0.22/graph/Gr.fst
new file mode 100644
index 0000000..3952cc6
--- /dev/null
+++ b/vosk-model-small-ru-0.22/graph/Gr.fst
Binary files differ
diff --git a/vosk-model-small-ru-0.22/graph/HCLr.fst b/vosk-model-small-ru-0.22/graph/HCLr.fst
new file mode 100644
index 0000000..373f12f
--- /dev/null
+++ b/vosk-model-small-ru-0.22/graph/HCLr.fst
Binary files differ
diff --git a/vosk-model-small-ru-0.22/graph/disambig_tid.int b/vosk-model-small-ru-0.22/graph/disambig_tid.int
new file mode 100644
index 0000000..8294520
--- /dev/null
+++ b/vosk-model-small-ru-0.22/graph/disambig_tid.int
@@ -0,0 +1,5 @@
+9855
+9856
+9857
+9858
+9859
diff --git a/vosk-model-small-ru-0.22/graph/phones/word_boundary.int b/vosk-model-small-ru-0.22/graph/phones/word_boundary.int
new file mode 100644
index 0000000..f4a3008
--- /dev/null
+++ b/vosk-model-small-ru-0.22/graph/phones/word_boundary.int
@@ -0,0 +1,202 @@
+1 nonword
+2 begin
+3 end
+4 internal
+5 singleton
+6 nonword
+7 begin
+8 end
+9 internal
+10 singleton
+11 begin
+12 end
+13 internal
+14 singleton
+15 begin
+16 end
+17 internal
+18 singleton
+19 begin
+20 end
+21 internal
+22 singleton
+23 begin
+24 end
+25 internal
+26 singleton
+27 begin
+28 end
+29 internal
+30 singleton
+31 begin
+32 end
+33 internal
+34 singleton
+35 begin
+36 end
+37 internal
+38 singleton
+39 begin
+40 end
+41 internal
+42 singleton
+43 begin
+44 end
+45 internal
+46 singleton
+47 begin
+48 end
+49 internal
+50 singleton
+51 begin
+52 end
+53 internal
+54 singleton
+55 begin
+56 end
+57 internal
+58 singleton
+59 begin
+60 end
+61 internal
+62 singleton
+63 begin
+64 end
+65 internal
+66 singleton
+67 begin
+68 end
+69 internal
+70 singleton
+71 begin
+72 end
+73 internal
+74 singleton
+75 begin
+76 end
+77 internal
+78 singleton
+79 begin
+80 end
+81 internal
+82 singleton
+83 begin
+84 end
+85 internal
+86 singleton
+87 begin
+88 end
+89 internal
+90 singleton
+91 begin
+92 end
+93 internal
+94 singleton
+95 begin
+96 end
+97 internal
+98 singleton
+99 begin
+100 end
+101 internal
+102 singleton
+103 begin
+104 end
+105 internal
+106 singleton
+107 begin
+108 end
+109 internal
+110 singleton
+111 begin
+112 end
+113 internal
+114 singleton
+115 begin
+116 end
+117 internal
+118 singleton
+119 begin
+120 end
+121 internal
+122 singleton
+123 begin
+124 end
+125 internal
+126 singleton
+127 begin
+128 end
+129 internal
+130 singleton
+131 begin
+132 end
+133 internal
+134 singleton
+135 begin
+136 end
+137 internal
+138 singleton
+139 begin
+140 end
+141 internal
+142 singleton
+143 begin
+144 end
+145 internal
+146 singleton
+147 begin
+148 end
+149 internal
+150 singleton
+151 begin
+152 end
+153 internal
+154 singleton
+155 begin
+156 end
+157 internal
+158 singleton
+159 begin
+160 end
+161 internal
+162 singleton
+163 begin
+164 end
+165 internal
+166 singleton
+167 begin
+168 end
+169 internal
+170 singleton
+171 begin
+172 end
+173 internal
+174 singleton
+175 begin
+176 end
+177 internal
+178 singleton
+179 begin
+180 end
+181 internal
+182 singleton
+183 begin
+184 end
+185 internal
+186 singleton
+187 begin
+188 end
+189 internal
+190 singleton
+191 begin
+192 end
+193 internal
+194 singleton
+195 begin
+196 end
+197 internal
+198 singleton
+199 begin
+200 end
+201 internal
+202 singleton
diff --git a/vosk-model-small-ru-0.22/ivector/final.dubm b/vosk-model-small-ru-0.22/ivector/final.dubm
new file mode 100644
index 0000000..4166b20
--- /dev/null
+++ b/vosk-model-small-ru-0.22/ivector/final.dubm
Binary files differ
diff --git a/vosk-model-small-ru-0.22/ivector/final.ie b/vosk-model-small-ru-0.22/ivector/final.ie
new file mode 100644
index 0000000..f256f7a
--- /dev/null
+++ b/vosk-model-small-ru-0.22/ivector/final.ie
Binary files differ
diff --git a/vosk-model-small-ru-0.22/ivector/final.mat b/vosk-model-small-ru-0.22/ivector/final.mat
new file mode 100644
index 0000000..a6dbb02
--- /dev/null
+++ b/vosk-model-small-ru-0.22/ivector/final.mat
Binary files differ
diff --git a/vosk-model-small-ru-0.22/ivector/global_cmvn.stats b/vosk-model-small-ru-0.22/ivector/global_cmvn.stats
new file mode 100644
index 0000000..4be171c
--- /dev/null
+++ b/vosk-model-small-ru-0.22/ivector/global_cmvn.stats
@@ -0,0 +1,3 @@
+ [
+ 8.330133e+10 -4.600894e+09 -2.394861e+09 2.127165e+09 -9.355799e+09 -9.378007e+09 -1.302309e+10 -9.460417e+09 -9.260028e+09 -4.58608e+09 -5.287111e+09 -1.972033e+09 -6.090821e+09 -1.336419e+09 -5.214569e+09 -2.321841e+09 -3.889789e+09 -1.060202e+09 -2.065653e+09 -2.684904e+08 -7.4007e+08 -4587485 -1.315853e+08 -8597548 2.599227e+08 7.408538e+07 5.505751e+08 -1.161846e+07 5.138103e+08 -1.828159e+08 4.251498e+08 -2.901496e+07 6.469246e+08 2.489644e+08 6.289868e+08 2.490337e+08 3.38884e+08 -1.788837e+08 -2.536016e+08 -1.591728e+08 8.388078e+08
+ 8.660994e+12 4.637783e+11 3.366465e+11 4.467952e+11 5.094759e+11 5.179353e+11 6.145244e+11 4.970492e+11 5.014889e+11 4.027981e+11 3.937422e+11 3.602942e+11 3.162307e+11 2.40687e+11 2.267307e+11 1.563018e+11 1.341105e+11 8.535779e+10 6.12398e+10 3.207774e+10 1.737325e+10 5.704115e+09 7.980573e+08 2.168777e+08 2.763352e+09 6.859176e+09 1.214891e+10 1.604714e+10 2.005353e+10 2.240119e+10 2.366007e+10 2.300222e+10 2.406182e+10 2.354406e+10 2.098983e+10 1.619869e+10 1.491578e+10 1.224871e+10 9.502735e+09 6.517532e+09 0 ]
diff --git a/vosk-model-small-ru-0.22/ivector/online_cmvn.conf b/vosk-model-small-ru-0.22/ivector/online_cmvn.conf
new file mode 100644
index 0000000..7748a4a
--- /dev/null
+++ b/vosk-model-small-ru-0.22/ivector/online_cmvn.conf
@@ -0,0 +1 @@
+# configuration file for apply-cmvn-online, used in the script ../local/run_online_decoding.sh
diff --git a/vosk-model-small-ru-0.22/ivector/splice.conf b/vosk-model-small-ru-0.22/ivector/splice.conf
new file mode 100644
index 0000000..960cd2e
--- /dev/null
+++ b/vosk-model-small-ru-0.22/ivector/splice.conf
@@ -0,0 +1,2 @@
+--left-context=3
+--right-context=3