From bc746ade99a0cbb6eb96a82416033f7fba407641 Mon Sep 17 00:00:00 2001 From: William P Date: Sun, 10 May 2026 15:42:21 -0400 Subject: [PATCH] listener: basic implementation of whisper --- .gitignore | 2 + listener/.gitignore | 27 ++++ listener/Cargo.lock | 287 ++++++++++++++++++++++++++++++++++++++++++ listener/Cargo.toml | 7 ++ listener/src/audio.rs | 21 ++++ listener/src/main.rs | 38 ++++++ 6 files changed, 382 insertions(+) create mode 100644 .gitignore create mode 100644 listener/.gitignore create mode 100644 listener/Cargo.lock create mode 100644 listener/Cargo.toml create mode 100644 listener/src/audio.rs create mode 100644 listener/src/main.rs diff --git a/.gitignore b/.gitignore new file mode 100644 index 0000000..00eca70 --- /dev/null +++ b/.gitignore @@ -0,0 +1,2 @@ +.devcontainer/ +scratch/ \ No newline at end of file diff --git a/listener/.gitignore b/listener/.gitignore new file mode 100644 index 0000000..13f04fa --- /dev/null +++ b/listener/.gitignore @@ -0,0 +1,27 @@ +# Generated by Cargo +# will have compiled files and executables +debug +target + +# These are backup files generated by rustfmt +**/*.rs.bk + +# MSVC Windows builds of rustc generate these, which store debugging information +*.pdb + +# Generated by cargo mutants +# Contains mutation testing data +**/mutants.out*/ + +# rustc will dump stack traces when hitting an internal compiler error to PWD +rustc-ice-*.txt + +# RustRover +# JetBrains specific template is maintained in a separate JetBrains.gitignore that can +# be found at https://github.com/github/gitignore/blob/main/Global/JetBrains.gitignore +# and can be added to the global gitignore or merged into this file. For a more nuclear +# option (not recommended) you can uncomment the following to ignore the entire idea folder. +#.idea/ + +# Models dir +models/ \ No newline at end of file diff --git a/listener/Cargo.lock b/listener/Cargo.lock new file mode 100644 index 0000000..e2fce5d --- /dev/null +++ b/listener/Cargo.lock @@ -0,0 +1,287 @@ +# This file is automatically @generated by Cargo. +# It is not intended for manual editing. +version = 4 + +[[package]] +name = "aho-corasick" +version = "1.1.4" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "ddd31a130427c27518df266943a5308ed92d4b226cc639f5a8f1002816174301" +dependencies = [ + "memchr", +] + +[[package]] +name = "bindgen" +version = "0.72.1" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "993776b509cfb49c750f11b8f07a46fa23e0a1386ffc01fb1e7d343efc387895" +dependencies = [ + "bitflags", + "cexpr", + "clang-sys", + "itertools", + "log", + "prettyplease", + "proc-macro2", + "quote", + "regex", + "rustc-hash", + "shlex", + "syn", +] + +[[package]] +name = "bitflags" +version = "2.11.1" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "c4512299f36f043ab09a583e57bceb5a5aab7a73db1805848e8fef3c9e8c78b3" + +[[package]] +name = "cc" +version = "1.2.62" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "a1dce859f0832a7d088c4f1119888ab94ef4b5d6795d1ce05afb7fe159d79f98" +dependencies = [ + "find-msvc-tools", + "shlex", +] + +[[package]] +name = "cexpr" +version = "0.6.0" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "6fac387a98bb7c37292057cffc56d62ecb629900026402633ae9160df93a8766" +dependencies = [ + "nom", +] + +[[package]] +name = "cfg-if" +version = "1.0.4" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "9330f8b2ff13f34540b44e946ef35111825727b38d33286ef986142615121801" + +[[package]] +name = "clang-sys" +version = "1.8.1" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "0b023947811758c97c59bf9d1c188fd619ad4718dcaa767947df1cadb14f39f4" +dependencies = [ + "glob", + "libc", + "libloading", +] + +[[package]] +name = "cmake" +version = "0.1.58" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "c0f78a02292a74a88ac736019ab962ece0bc380e3f977bf72e376c5d78ff0678" +dependencies = [ + "cc", +] + +[[package]] +name = "either" +version = "1.15.0" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "48c757948c5ede0e46177b7add2e67155f70e33c07fea8284df6576da70b3719" + +[[package]] +name = "find-msvc-tools" +version = "0.1.9" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "5baebc0774151f905a1a2cc41989300b1e6fbb29aff0ceffa1064fdd3088d582" + +[[package]] +name = "fs_extra" +version = "1.3.0" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "42703706b716c37f96a77aea830392ad231f44c9e9a67872fa5548707e11b11c" + +[[package]] +name = "glob" +version = "0.3.3" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "0cc23270f6e1808e30a928bdc84dea0b9b4136a8bc82338574f23baf47bbd280" + +[[package]] +name = "itertools" +version = "0.13.0" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "413ee7dfc52ee1a4949ceeb7dbc8a33f2d6c088194d9f922fb8318faf1f01186" +dependencies = [ + "either", +] + +[[package]] +name = "libc" +version = "0.2.186" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "68ab91017fe16c622486840e4c83c9a37afeff978bd239b5293d61ece587de66" + +[[package]] +name = "libloading" +version = "0.8.9" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "d7c4b02199fee7c5d21a5ae7d8cfa79a6ef5bb2fc834d6e9058e89c825efdc55" +dependencies = [ + "cfg-if", + "windows-link", +] + +[[package]] +name = "log" +version = "0.4.29" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "5e5032e24019045c762d3c0f28f5b6b8bbf38563a65908389bf7978758920897" + +[[package]] +name = "memchr" +version = "2.8.0" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "f8ca58f447f06ed17d5fc4043ce1b10dd205e060fb3ce5b979b8ed8e59ff3f79" + +[[package]] +name = "minimal-lexical" +version = "0.2.1" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "68354c5c6bd36d73ff3feceb05efa59b6acb7626617f4962be322a825e61f79a" + +[[package]] +name = "nom" +version = "7.1.3" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "d273983c5a657a70a3e8f2a01329822f3b8c8172b73826411a55751e404a0a4a" +dependencies = [ + "memchr", + "minimal-lexical", +] + +[[package]] +name = "prettyplease" +version = "0.2.37" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "479ca8adacdd7ce8f1fb39ce9ecccbfe93a3f1344b3d0d97f20bc0196208f62b" +dependencies = [ + "proc-macro2", + "syn", +] + +[[package]] +name = "proc-macro2" +version = "1.0.106" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "8fd00f0bb2e90d81d1044c2b32617f68fcb9fa3bb7640c23e9c748e53fb30934" +dependencies = [ + "unicode-ident", +] + +[[package]] +name = "quote" +version = "1.0.45" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "41f2619966050689382d2b44f664f4bc593e129785a36d6ee376ddf37259b924" +dependencies = [ + "proc-macro2", +] + +[[package]] +name = "regex" +version = "1.12.3" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "e10754a14b9137dd7b1e3e5b0493cc9171fdd105e0ab477f51b72e7f3ac0e276" +dependencies = [ + "aho-corasick", + "memchr", + "regex-automata", + "regex-syntax", +] + +[[package]] +name = "regex-automata" +version = "0.4.14" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "6e1dd4122fc1595e8162618945476892eefca7b88c52820e74af6262213cae8f" +dependencies = [ + "aho-corasick", + "memchr", + "regex-syntax", +] + +[[package]] +name = "regex-syntax" +version = "0.8.10" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "dc897dd8d9e8bd1ed8cdad82b5966c3e0ecae09fb1907d58efaa013543185d0a" + +[[package]] +name = "rustc-hash" +version = "2.1.2" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "94300abf3f1ae2e2b8ffb7b58043de3d399c73fa6f4b73826402a5c457614dbe" + +[[package]] +name = "scannerbot-listener" +version = "0.1.0" +dependencies = [ + "whisper-rs", +] + +[[package]] +name = "semver" +version = "1.0.28" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "8a7852d02fc848982e0c167ef163aaff9cd91dc640ba85e263cb1ce46fae51cd" + +[[package]] +name = "shlex" +version = "1.3.0" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "0fda2ff0d084019ba4d7c6f371c95d8fd75ce3524c3cb8fb653a3023f6323e64" + +[[package]] +name = "syn" +version = "2.0.117" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "e665b8803e7b1d2a727f4023456bbbbe74da67099c585258af0ad9c5013b9b99" +dependencies = [ + "proc-macro2", + "quote", + "unicode-ident", +] + +[[package]] +name = "unicode-ident" +version = "1.0.24" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "e6e4313cd5fcd3dad5cafa179702e2b244f760991f45397d14d4ebf38247da75" + +[[package]] +name = "whisper-rs" +version = "0.16.0" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "2088172d00f936c348d6a72f488dc2660ab3f507263a195df308a3c2383229f6" +dependencies = [ + "whisper-rs-sys", +] + +[[package]] +name = "whisper-rs-sys" +version = "0.15.0" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "6986c0fe081241d391f09b9a071fbcbb59720c3563628c3c829057cf69f2a56f" +dependencies = [ + "bindgen", + "cfg-if", + "cmake", + "fs_extra", + "semver", +] + +[[package]] +name = "windows-link" +version = "0.2.1" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "f0805222e57f7521d6a62e36fa9163bc891acd422f971defe97d64e70d0a4fe5" diff --git a/listener/Cargo.toml b/listener/Cargo.toml new file mode 100644 index 0000000..eb9de5d --- /dev/null +++ b/listener/Cargo.toml @@ -0,0 +1,7 @@ +[package] +name = "scannerbot-listener" +version = "0.1.0" +edition = "2024" + +[dependencies] +whisper-rs = "0.16.0" diff --git a/listener/src/audio.rs b/listener/src/audio.rs new file mode 100644 index 0000000..19e3d61 --- /dev/null +++ b/listener/src/audio.rs @@ -0,0 +1,21 @@ +use std::process::{Command, Stdio}; + +pub fn decode(input: &str) -> Result, Box> { + let output = Command::new("ffmpeg") + .args([ + "-i", input, + "-ar", "16000", // resample 16kHz + "-ac", "1", // mono + "-f", "f32le", // use raw f32 little endian samples + "-" // stdout + ]) + .stderr(Stdio::null()) + .output()?; + + let audio_data = output.stdout + .chunks_exact(4) + .map(|b| f32::from_le_bytes(b.try_into().unwrap())) + .collect(); + + Ok(audio_data) +} \ No newline at end of file diff --git a/listener/src/main.rs b/listener/src/main.rs new file mode 100644 index 0000000..fcf06bc --- /dev/null +++ b/listener/src/main.rs @@ -0,0 +1,38 @@ +use whisper_rs::{WhisperContext, WhisperContextParameters, FullParams, SamplingStrategy}; + +mod audio; + +fn main() -> Result<(), Box> { + //println!("Hello, world!"); + + //let audio = audio::decode("../scratch/audio/gtav_police_sample.wav")?; + let audio = audio::decode(&std::env::args().nth(2).unwrap())?; + + let path_to_model = std::env::args().nth(1).unwrap(); + + let ctx = WhisperContext::new_with_params( + path_to_model, + WhisperContextParameters::default() + ).expect("failed to load model"); + + let params = FullParams::new(SamplingStrategy::BeamSearch { + beam_size: 5, patience: -1.0 + }); + + let mut state = ctx.create_state().expect("failed to create state"); + state + .full(params, &audio[..]) + .expect("failed to run model"); + + for segment in state.as_iter() { + println!( + "[{} - {}]: {}", + segment.start_timestamp(), + segment.end_timestamp(), + segment + ); + } + + Ok(()) + +}