diff options
17 files changed, 736 insertions, 0 deletions
diff --git a/ru_RU-irina-medium.onnx b/ru_RU-irina-medium.onnx Binary files differnew file mode 100644 index 0000000..bff2b02 --- /dev/null +++ b/ru_RU-irina-medium.onnx diff --git a/ru_RU-irina-medium.onnx.json b/ru_RU-irina-medium.onnx.json new file mode 100644 index 0000000..7b61766 --- /dev/null +++ b/ru_RU-irina-medium.onnx.json @@ -0,0 +1,483 @@ +{ + "audio": { + "sample_rate": 22050, + "quality": "medium" + }, + "espeak": { + "voice": "ru" + }, + "inference": { + "noise_scale": 0.667, + "length_scale": 1, + "noise_w": 0.8 + }, + "phoneme_map": {}, + "phoneme_id_map": { + "_": [ + 0 + ], + "^": [ + 1 + ], + "$": [ + 2 + ], + " ": [ + 3 + ], + "!": [ + 4 + ], + "'": [ + 5 + ], + "(": [ + 6 + ], + ")": [ + 7 + ], + ",": [ + 8 + ], + "-": [ + 9 + ], + ".": [ + 10 + ], + ":": [ + 11 + ], + ";": [ + 12 + ], + "?": [ + 13 + ], + "a": [ + 14 + ], + "b": [ + 15 + ], + "c": [ + 16 + ], + "d": [ + 17 + ], + "e": [ + 18 + ], + "f": [ + 19 + ], + "h": [ + 20 + ], + "i": [ + 21 + ], + "j": [ + 22 + ], + "k": [ + 23 + ], + "l": [ + 24 + ], + "m": [ + 25 + ], + "n": [ + 26 + ], + "o": [ + 27 + ], + "p": [ + 28 + ], + "q": [ + 29 + ], + "r": [ + 30 + ], + "s": [ + 31 + ], + "t": [ + 32 + ], + "u": [ + 33 + ], + "v": [ + 34 + ], + "w": [ + 35 + ], + "x": [ + 36 + ], + "y": [ + 37 + ], + "z": [ + 38 + ], + "æ": [ + 39 + ], + "ç": [ + 40 + ], + "ð": [ + 41 + ], + "ø": [ + 42 + ], + "ħ": [ + 43 + ], + "ŋ": [ + 44 + ], + "œ": [ + 45 + ], + "ǀ": [ + 46 + ], + "ǁ": [ + 47 + ], + "ǂ": [ + 48 + ], + "ǃ": [ + 49 + ], + "ɐ": [ + 50 + ], + "ɑ": [ + 51 + ], + "ɒ": [ + 52 + ], + "ɓ": [ + 53 + ], + "ɔ": [ + 54 + ], + "ɕ": [ + 55 + ], + "ɖ": [ + 56 + ], + "ɗ": [ + 57 + ], + "ɘ": [ + 58 + ], + "ə": [ + 59 + ], + "ɚ": [ + 60 + ], + "ɛ": [ + 61 + ], + "ɜ": [ + 62 + ], + "ɞ": [ + 63 + ], + "ɟ": [ + 64 + ], + "ɠ": [ + 65 + ], + "ɡ": [ + 66 + ], + "ɢ": [ + 67 + ], + "ɣ": [ + 68 + ], + "ɤ": [ + 69 + ], + "ɥ": [ + 70 + ], + "ɦ": [ + 71 + ], + "ɧ": [ + 72 + ], + "ɨ": [ + 73 + ], + "ɪ": [ + 74 + ], + "ɫ": [ + 75 + ], + "ɬ": [ + 76 + ], + "ɭ": [ + 77 + ], + "ɮ": [ + 78 + ], + "ɯ": [ + 79 + ], + "ɰ": [ + 80 + ], + "ɱ": [ + 81 + ], + "ɲ": [ + 82 + ], + "ɳ": [ + 83 + ], + "ɴ": [ + 84 + ], + "ɵ": [ + 85 + ], + "ɶ": [ + 86 + ], + "ɸ": [ + 87 + ], + "ɹ": [ + 88 + ], + "ɺ": [ + 89 + ], + "ɻ": [ + 90 + ], + "ɽ": [ + 91 + ], + "ɾ": [ + 92 + ], + "ʀ": [ + 93 + ], + "ʁ": [ + 94 + ], + "ʂ": [ + 95 + ], + "ʃ": [ + 96 + ], + "ʄ": [ + 97 + ], + "ʈ": [ + 98 + ], + "ʉ": [ + 99 + ], + "ʊ": [ + 100 + ], + "ʋ": [ + 101 + ], + "ʌ": [ + 102 + ], + "ʍ": [ + 103 + ], + "ʎ": [ + 104 + ], + "ʏ": [ + 105 + ], + "ʐ": [ + 106 + ], + "ʑ": [ + 107 + ], + "ʒ": [ + 108 + ], + "ʔ": [ + 109 + ], + "ʕ": [ + 110 + ], + "ʘ": [ + 111 + ], + "ʙ": [ + 112 + ], + "ʛ": [ + 113 + ], + "ʜ": [ + 114 + ], + "ʝ": [ + 115 + ], + "ʟ": [ + 116 + ], + "ʡ": [ + 117 + ], + "ʢ": [ + 118 + ], + "ʲ": [ + 119 + ], + "ˈ": [ + 120 + ], + "ˌ": [ + 121 + ], + "ː": [ + 122 + ], + "ˑ": [ + 123 + ], + "˞": [ + 124 + ], + "β": [ + 125 + ], + "θ": [ + 126 + ], + "χ": [ + 127 + ], + "ᵻ": [ + 128 + ], + "ⱱ": [ + 129 + ], + "0": [ + 130 + ], + "1": [ + 131 + ], + "2": [ + 132 + ], + "3": [ + 133 + ], + "4": [ + 134 + ], + "5": [ + 135 + ], + "6": [ + 136 + ], + "7": [ + 137 + ], + "8": [ + 138 + ], + "9": [ + 139 + ], + "̧": [ + 140 + ], + "̃": [ + 141 + ], + "̪": [ + 142 + ], + "̯": [ + 143 + ], + "̩": [ + 144 + ], + "ʰ": [ + 145 + ], + "ˤ": [ + 146 + ], + "ε": [ + 147 + ], + "↓": [ + 148 + ], + "#": [ + 149 + ], + "\"": [ + 150 + ] + }, + "num_symbols": 256, + "num_speakers": 1, + "speaker_id_map": {}, + "piper_version": "1.0.0", + "language": { + "code": "ru_RU", + "family": "ru", + "region": "RU", + "name_native": "Русский", + "name_english": "Russian", + "country_english": "Russia" + }, + "dataset": "irina" +}
\ No newline at end of file diff --git a/voice-to-text b/voice-to-text new file mode 100755 index 0000000..6dadaac --- /dev/null +++ b/voice-to-text @@ -0,0 +1,15 @@ +#!/bin/sh + +#pip install vosk --break-system-packages +#Download model for vosk on https://alphacephei.com/vosk/models +#sudo pacman -S chatblade +#pip install piper-tts --break-system-packages +#Download model for piper on https://huggingface.co/rhasspy/piper-voices + +export OPENAI_API_KEY=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 + +ffmpeg -y -f alsa -i default -acodec pcm_s16le -ac 1 -ar 44100 -t 4 -f wav ~/.cache/audio.wav >/dev/null 2>&1 +vosk-transcriber -m vosk-model-small-ru-0.22 -i ~/.cache/audio.wav -o ~/.cache/transcript.txt >/dev/null 2>&1 +chatblade -e $(cat ~/.cache/transcript.txt) > ~/.cache/response.txt +cat ~/.cache/response.txt +cat ~/.cache/response.txt | piper --model ru_RU-irina-medium.onnx --output-raw | aplay -r 22050 -f S16_LE -t raw - diff --git a/vosk-model-small-ru-0.22/README b/vosk-model-small-ru-0.22/README new file mode 100644 index 0000000..f5d35ea --- /dev/null +++ b/vosk-model-small-ru-0.22/README @@ -0,0 +1,8 @@ +Small Russian model for Vosk (Android, RPi, other small devices) + +%WER 22.71 [ 9092 / 40042, 1124 ins, 1536 del, 6432 sub ] exp/chain_a/tdnn/decode_test_audiobooks_look_fast/wer_10_0.0 +%WER 11.79 [ 5940 / 50394, 894 ins, 832 del, 4214 sub ] exp/chain_a/tdnn/decode_test_golos_crowd_look_fast/wer_11_0.0 +%WER 21.34 [ 1789 / 8382, 173 ins, 440 del, 1176 sub ] exp/chain_a/tdnn/decode_test_golos_farfield_look_fast/wer_10_0.0 +%WER 29.89 [ 5579 / 18666, 476 ins, 1550 del, 3553 sub ] exp/chain_a/tdnn/decode_test_sova_devices_look_fast/wer_10_0.0 +%WER 31.97 [ 13588 / 42496, 1013 ins, 3640 del, 8935 sub ] exp/chain_a/tdnn/decode_test_youtube_look_fast/wer_9_0.0 + diff --git a/vosk-model-small-ru-0.22/am/final.mdl b/vosk-model-small-ru-0.22/am/final.mdl Binary files differnew file mode 100644 index 0000000..8978dba --- /dev/null +++ b/vosk-model-small-ru-0.22/am/final.mdl diff --git a/vosk-model-small-ru-0.22/conf/mfcc.conf b/vosk-model-small-ru-0.22/conf/mfcc.conf new file mode 100644 index 0000000..eaa40c5 --- /dev/null +++ b/vosk-model-small-ru-0.22/conf/mfcc.conf @@ -0,0 +1,7 @@ +--sample-frequency=16000 +--use-energy=false +--num-mel-bins=40 +--num-ceps=40 +--low-freq=20 +--high-freq=7600 +--allow-downsample=true diff --git a/vosk-model-small-ru-0.22/conf/model.conf b/vosk-model-small-ru-0.22/conf/model.conf new file mode 100644 index 0000000..64bc89e --- /dev/null +++ b/vosk-model-small-ru-0.22/conf/model.conf @@ -0,0 +1,10 @@ +--min-active=200 +--max-active=3000 +--beam=10.0 +--lattice-beam=2.0 +--acoustic-scale=1.0 +--frame-subsampling-factor=3 +--endpoint.silence-phones=1:2:3:4:5:6:7:8:9:10 +--endpoint.rule2.min-trailing-silence=0.5 +--endpoint.rule3.min-trailing-silence=1.0 +--endpoint.rule4.min-trailing-silence=2.0 diff --git a/vosk-model-small-ru-0.22/graph/Gr.fst b/vosk-model-small-ru-0.22/graph/Gr.fst Binary files differnew file mode 100644 index 0000000..3952cc6 --- /dev/null +++ b/vosk-model-small-ru-0.22/graph/Gr.fst diff --git a/vosk-model-small-ru-0.22/graph/HCLr.fst b/vosk-model-small-ru-0.22/graph/HCLr.fst Binary files differnew file mode 100644 index 0000000..373f12f --- /dev/null +++ b/vosk-model-small-ru-0.22/graph/HCLr.fst diff --git a/vosk-model-small-ru-0.22/graph/disambig_tid.int b/vosk-model-small-ru-0.22/graph/disambig_tid.int new file mode 100644 index 0000000..8294520 --- /dev/null +++ b/vosk-model-small-ru-0.22/graph/disambig_tid.int @@ -0,0 +1,5 @@ +9855 +9856 +9857 +9858 +9859 diff --git a/vosk-model-small-ru-0.22/graph/phones/word_boundary.int b/vosk-model-small-ru-0.22/graph/phones/word_boundary.int new file mode 100644 index 0000000..f4a3008 --- /dev/null +++ b/vosk-model-small-ru-0.22/graph/phones/word_boundary.int @@ -0,0 +1,202 @@ +1 nonword +2 begin +3 end +4 internal +5 singleton +6 nonword +7 begin +8 end +9 internal +10 singleton +11 begin +12 end +13 internal +14 singleton +15 begin +16 end +17 internal +18 singleton +19 begin +20 end +21 internal +22 singleton +23 begin +24 end +25 internal +26 singleton +27 begin +28 end +29 internal +30 singleton +31 begin +32 end +33 internal +34 singleton +35 begin +36 end +37 internal +38 singleton +39 begin +40 end +41 internal +42 singleton +43 begin +44 end +45 internal +46 singleton +47 begin +48 end +49 internal +50 singleton +51 begin +52 end +53 internal +54 singleton +55 begin +56 end +57 internal +58 singleton +59 begin +60 end +61 internal +62 singleton +63 begin +64 end +65 internal +66 singleton +67 begin +68 end +69 internal +70 singleton +71 begin +72 end +73 internal +74 singleton +75 begin +76 end +77 internal +78 singleton +79 begin +80 end +81 internal +82 singleton +83 begin +84 end +85 internal +86 singleton +87 begin +88 end +89 internal +90 singleton +91 begin +92 end +93 internal +94 singleton +95 begin +96 end +97 internal +98 singleton +99 begin +100 end +101 internal +102 singleton +103 begin +104 end +105 internal +106 singleton +107 begin +108 end +109 internal +110 singleton +111 begin +112 end +113 internal +114 singleton +115 begin +116 end +117 internal +118 singleton +119 begin +120 end +121 internal +122 singleton +123 begin +124 end +125 internal +126 singleton +127 begin +128 end +129 internal +130 singleton +131 begin +132 end +133 internal +134 singleton +135 begin +136 end +137 internal +138 singleton +139 begin +140 end +141 internal +142 singleton +143 begin +144 end +145 internal +146 singleton +147 begin +148 end +149 internal +150 singleton +151 begin +152 end +153 internal +154 singleton +155 begin +156 end +157 internal +158 singleton +159 begin +160 end +161 internal +162 singleton +163 begin +164 end +165 internal +166 singleton +167 begin +168 end +169 internal +170 singleton +171 begin +172 end +173 internal +174 singleton +175 begin +176 end +177 internal +178 singleton +179 begin +180 end +181 internal +182 singleton +183 begin +184 end +185 internal +186 singleton +187 begin +188 end +189 internal +190 singleton +191 begin +192 end +193 internal +194 singleton +195 begin +196 end +197 internal +198 singleton +199 begin +200 end +201 internal +202 singleton diff --git a/vosk-model-small-ru-0.22/ivector/final.dubm b/vosk-model-small-ru-0.22/ivector/final.dubm Binary files differnew file mode 100644 index 0000000..4166b20 --- /dev/null +++ b/vosk-model-small-ru-0.22/ivector/final.dubm diff --git a/vosk-model-small-ru-0.22/ivector/final.ie b/vosk-model-small-ru-0.22/ivector/final.ie Binary files differnew file mode 100644 index 0000000..f256f7a --- /dev/null +++ b/vosk-model-small-ru-0.22/ivector/final.ie diff --git a/vosk-model-small-ru-0.22/ivector/final.mat b/vosk-model-small-ru-0.22/ivector/final.mat Binary files differnew file mode 100644 index 0000000..a6dbb02 --- /dev/null +++ b/vosk-model-small-ru-0.22/ivector/final.mat diff --git a/vosk-model-small-ru-0.22/ivector/global_cmvn.stats b/vosk-model-small-ru-0.22/ivector/global_cmvn.stats new file mode 100644 index 0000000..4be171c --- /dev/null +++ b/vosk-model-small-ru-0.22/ivector/global_cmvn.stats @@ -0,0 +1,3 @@ + [ + 8.330133e+10 -4.600894e+09 -2.394861e+09 2.127165e+09 -9.355799e+09 -9.378007e+09 -1.302309e+10 -9.460417e+09 -9.260028e+09 -4.58608e+09 -5.287111e+09 -1.972033e+09 -6.090821e+09 -1.336419e+09 -5.214569e+09 -2.321841e+09 -3.889789e+09 -1.060202e+09 -2.065653e+09 -2.684904e+08 -7.4007e+08 -4587485 -1.315853e+08 -8597548 2.599227e+08 7.408538e+07 5.505751e+08 -1.161846e+07 5.138103e+08 -1.828159e+08 4.251498e+08 -2.901496e+07 6.469246e+08 2.489644e+08 6.289868e+08 2.490337e+08 3.38884e+08 -1.788837e+08 -2.536016e+08 -1.591728e+08 8.388078e+08 + 8.660994e+12 4.637783e+11 3.366465e+11 4.467952e+11 5.094759e+11 5.179353e+11 6.145244e+11 4.970492e+11 5.014889e+11 4.027981e+11 3.937422e+11 3.602942e+11 3.162307e+11 2.40687e+11 2.267307e+11 1.563018e+11 1.341105e+11 8.535779e+10 6.12398e+10 3.207774e+10 1.737325e+10 5.704115e+09 7.980573e+08 2.168777e+08 2.763352e+09 6.859176e+09 1.214891e+10 1.604714e+10 2.005353e+10 2.240119e+10 2.366007e+10 2.300222e+10 2.406182e+10 2.354406e+10 2.098983e+10 1.619869e+10 1.491578e+10 1.224871e+10 9.502735e+09 6.517532e+09 0 ] diff --git a/vosk-model-small-ru-0.22/ivector/online_cmvn.conf b/vosk-model-small-ru-0.22/ivector/online_cmvn.conf new file mode 100644 index 0000000..7748a4a --- /dev/null +++ b/vosk-model-small-ru-0.22/ivector/online_cmvn.conf @@ -0,0 +1 @@ +# configuration file for apply-cmvn-online, used in the script ../local/run_online_decoding.sh diff --git a/vosk-model-small-ru-0.22/ivector/splice.conf b/vosk-model-small-ru-0.22/ivector/splice.conf new file mode 100644 index 0000000..960cd2e --- /dev/null +++ b/vosk-model-small-ru-0.22/ivector/splice.conf @@ -0,0 +1,2 @@ +--left-context=3 +--right-context=3 |