-
Notifications
You must be signed in to change notification settings - Fork 4.2k
/
Copy pathdownload-ggml-model.sh
executable file
·149 lines (124 loc) · 3.67 KB
/
download-ggml-model.sh
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
#!/bin/sh
# This script downloads Whisper model files that have already been converted to ggml format.
# This way you don't have to convert them yourself.
#src="https://ggml.ggerganov.com"
#pfx="ggml-model-whisper"
src="https://huggingface.co/ggerganov/whisper.cpp"
pfx="resolve/main/ggml"
BOLD="\033[1m"
RESET='\033[0m'
# get the path of this script
get_script_path() {
if [ -x "$(command -v realpath)" ]; then
dirname "$(realpath "$0")"
else
_ret="$(cd -- "$(dirname "$0")" >/dev/null 2>&1 || exit ; pwd -P)"
echo "$_ret"
fi
}
script_path="$(get_script_path)"
# Check if the script is inside a /bin/ directory
case "$script_path" in
*/bin) default_download_path="$PWD" ;; # Use current directory as default download path if in /bin/
*) default_download_path="$script_path" ;; # Otherwise, use script directory
esac
models_path="${2:-$default_download_path}"
# Whisper models
models="tiny
tiny.en
tiny-q5_1
tiny.en-q5_1
tiny-q8_0
base
base.en
base-q5_1
base.en-q5_1
base-q8_0
small
small.en
small.en-tdrz
small-q5_1
small.en-q5_1
small-q8_0
medium
medium.en
medium-q5_0
medium.en-q5_0
medium-q8_0
large-v1
large-v2
large-v2-q5_0
large-v2-q8_0
large-v3
large-v3-q5_0
large-v3-turbo
large-v3-turbo-q5_0
large-v3-turbo-q8_0"
# list available models
list_models() {
printf "\n"
printf "Available models:"
model_class=""
for model in $models; do
this_model_class="${model%%[.-]*}"
if [ "$this_model_class" != "$model_class" ]; then
printf "\n "
model_class=$this_model_class
fi
printf " %s" "$model"
done
printf "\n\n"
}
if [ "$#" -lt 1 ] || [ "$#" -gt 2 ]; then
printf "Usage: %s <model> [models_path]\n" "$0"
list_models
printf "___________________________________________________________\n"
printf "${BOLD}.en${RESET} = english-only ${BOLD}-q5_[01]${RESET} = quantized ${BOLD}-tdrz${RESET} = tinydiarize\n"
exit 1
fi
model=$1
if ! echo "$models" | grep -q -w "$model"; then
printf "Invalid model: %s\n" "$model"
list_models
exit 1
fi
# check if model contains `tdrz` and update the src and pfx accordingly
if echo "$model" | grep -q "tdrz"; then
src="https://huggingface.co/akashmjn/tinydiarize-whisper.cpp"
pfx="resolve/main/ggml"
fi
echo "$model" | grep -q '^"tdrz"*$'
# download ggml model
printf "Downloading ggml model %s from '%s' ...\n" "$model" "$src"
cd "$models_path" || exit
if [ -f "ggml-$model.bin" ]; then
printf "Model %s already exists. Skipping download.\n" "$model"
exit 0
fi
if [ -x "$(command -v wget2)" ]; then
wget2 --no-config --progress bar -O ggml-"$model".bin $src/$pfx-"$model".bin
elif [ -x "$(command -v wget)" ]; then
wget --no-config --quiet --show-progress -O ggml-"$model".bin $src/$pfx-"$model".bin
elif [ -x "$(command -v curl)" ]; then
curl -L --output ggml-"$model".bin $src/$pfx-"$model".bin
else
printf "Either wget or curl is required to download models.\n"
exit 1
fi
if [ $? -ne 0 ]; then
printf "Failed to download ggml model %s \n" "$model"
printf "Please try again later or download the original Whisper model files and convert them yourself.\n"
exit 1
fi
# Check if 'whisper-cli' is available in the system PATH
if command -v whisper-cli >/dev/null 2>&1; then
# If found, use 'whisper-cli' (relying on PATH resolution)
whisper_cmd="whisper-cli"
else
# If not found, use the local build version
whisper_cmd="./build/bin/whisper-cli"
fi
printf "Done! Model '%s' saved in '%s/ggml-%s.bin'\n" "$model" "$models_path" "$model"
printf "You can now use it like this:\n\n"
printf " $ %s -m %s/ggml-%s.bin -f samples/jfk.wav\n" "$whisper_cmd" "$models_path" "$model"
printf "\n"