listener: optimize transcription into 30s portions with timestamps

This commit is contained in:
2026-05-10 16:28:20 -04:00
parent bc746ade99
commit beccc7b50b
5 changed files with 427 additions and 41 deletions
+3
View File
@@ -25,3 +25,6 @@ rustc-ice-*.txt
# Models dir
models/
# Transcription text file
transcription.txt
+307
View File
@@ -11,6 +11,21 @@ dependencies = [
"memchr",
]
[[package]]
name = "android_system_properties"
version = "0.1.5"
source = "registry+https://github.com/rust-lang/crates.io-index"
checksum = "819e7219dbd41043ac279b19830f2efc897156490d7fd6ea916720117ee66311"
dependencies = [
"libc",
]
[[package]]
name = "autocfg"
version = "1.5.0"
source = "registry+https://github.com/rust-lang/crates.io-index"
checksum = "c08606f8c3cbf4ce6ec8e28fb0014a2c086708fe954eaa885384a6165172e7e8"
[[package]]
name = "bindgen"
version = "0.72.1"
@@ -37,6 +52,21 @@ version = "2.11.1"
source = "registry+https://github.com/rust-lang/crates.io-index"
checksum = "c4512299f36f043ab09a583e57bceb5a5aab7a73db1805848e8fef3c9e8c78b3"
[[package]]
name = "block2"
version = "0.6.2"
source = "registry+https://github.com/rust-lang/crates.io-index"
checksum = "cdeb9d870516001442e364c5220d3574d2da8dc765554b4a617230d33fa58ef5"
dependencies = [
"objc2",
]
[[package]]
name = "bumpalo"
version = "3.20.2"
source = "registry+https://github.com/rust-lang/crates.io-index"
checksum = "5d20789868f4b01b2f2caec9f5c4e0213b41e3e5702a50157d699ae31ced2fcb"
[[package]]
name = "cc"
version = "1.2.62"
@@ -62,6 +92,25 @@ version = "1.0.4"
source = "registry+https://github.com/rust-lang/crates.io-index"
checksum = "9330f8b2ff13f34540b44e946ef35111825727b38d33286ef986142615121801"
[[package]]
name = "cfg_aliases"
version = "0.2.1"
source = "registry+https://github.com/rust-lang/crates.io-index"
checksum = "613afe47fcd5fac7ccf1db93babcb082c5994d996f20b8b159f2ad1658eb5724"
[[package]]
name = "chrono"
version = "0.4.44"
source = "registry+https://github.com/rust-lang/crates.io-index"
checksum = "c673075a2e0e5f4a1dde27ce9dee1ea4558c7ffe648f576438a20ca1d2acc4b0"
dependencies = [
"iana-time-zone",
"js-sys",
"num-traits",
"wasm-bindgen",
"windows-link",
]
[[package]]
name = "clang-sys"
version = "1.8.1"
@@ -82,6 +131,35 @@ dependencies = [
"cc",
]
[[package]]
name = "core-foundation-sys"
version = "0.8.7"
source = "registry+https://github.com/rust-lang/crates.io-index"
checksum = "773648b94d0e5d620f64f280777445740e61fe701025087ec8b57f45c791888b"
[[package]]
name = "ctrlc"
version = "3.5.2"
source = "registry+https://github.com/rust-lang/crates.io-index"
checksum = "e0b1fab2ae45819af2d0731d60f2afe17227ebb1a1538a236da84c93e9a60162"
dependencies = [
"dispatch2",
"nix",
"windows-sys",
]
[[package]]
name = "dispatch2"
version = "0.3.1"
source = "registry+https://github.com/rust-lang/crates.io-index"
checksum = "1e0e367e4e7da84520dedcac1901e4da967309406d1e51017ae1abfb97adbd38"
dependencies = [
"bitflags",
"block2",
"libc",
"objc2",
]
[[package]]
name = "either"
version = "1.15.0"
@@ -100,12 +178,60 @@ version = "1.3.0"
source = "registry+https://github.com/rust-lang/crates.io-index"
checksum = "42703706b716c37f96a77aea830392ad231f44c9e9a67872fa5548707e11b11c"
[[package]]
name = "futures-core"
version = "0.3.32"
source = "registry+https://github.com/rust-lang/crates.io-index"
checksum = "7e3450815272ef58cec6d564423f6e755e25379b217b0bc688e295ba24df6b1d"
[[package]]
name = "futures-task"
version = "0.3.32"
source = "registry+https://github.com/rust-lang/crates.io-index"
checksum = "037711b3d59c33004d3856fbdc83b99d4ff37a24768fa1be9ce3538a1cde4393"
[[package]]
name = "futures-util"
version = "0.3.32"
source = "registry+https://github.com/rust-lang/crates.io-index"
checksum = "389ca41296e6190b48053de0321d02a77f32f8a5d2461dd38762c0593805c6d6"
dependencies = [
"futures-core",
"futures-task",
"pin-project-lite",
"slab",
]
[[package]]
name = "glob"
version = "0.3.3"
source = "registry+https://github.com/rust-lang/crates.io-index"
checksum = "0cc23270f6e1808e30a928bdc84dea0b9b4136a8bc82338574f23baf47bbd280"
[[package]]
name = "iana-time-zone"
version = "0.1.65"
source = "registry+https://github.com/rust-lang/crates.io-index"
checksum = "e31bc9ad994ba00e440a8aa5c9ef0ec67d5cb5e5cb0cc7f8b744a35b389cc470"
dependencies = [
"android_system_properties",
"core-foundation-sys",
"iana-time-zone-haiku",
"js-sys",
"log",
"wasm-bindgen",
"windows-core",
]
[[package]]
name = "iana-time-zone-haiku"
version = "0.1.2"
source = "registry+https://github.com/rust-lang/crates.io-index"
checksum = "f31827a206f56af32e590ba56d5d2d085f558508192593743f16b2306495269f"
dependencies = [
"cc",
]
[[package]]
name = "itertools"
version = "0.13.0"
@@ -115,6 +241,18 @@ dependencies = [
"either",
]
[[package]]
name = "js-sys"
version = "0.3.98"
source = "registry+https://github.com/rust-lang/crates.io-index"
checksum = "67df7112613f8bfd9150013a0314e196f4800d3201ae742489d999db2f979f08"
dependencies = [
"cfg-if",
"futures-util",
"once_cell",
"wasm-bindgen",
]
[[package]]
name = "libc"
version = "0.2.186"
@@ -149,6 +287,18 @@ version = "0.2.1"
source = "registry+https://github.com/rust-lang/crates.io-index"
checksum = "68354c5c6bd36d73ff3feceb05efa59b6acb7626617f4962be322a825e61f79a"
[[package]]
name = "nix"
version = "0.31.2"
source = "registry+https://github.com/rust-lang/crates.io-index"
checksum = "5d6d0705320c1e6ba1d912b5e37cf18071b6c2e9b7fa8215a1e8a7651966f5d3"
dependencies = [
"bitflags",
"cfg-if",
"cfg_aliases",
"libc",
]
[[package]]
name = "nom"
version = "7.1.3"
@@ -159,6 +309,42 @@ dependencies = [
"minimal-lexical",
]
[[package]]
name = "num-traits"
version = "0.2.19"
source = "registry+https://github.com/rust-lang/crates.io-index"
checksum = "071dfc062690e90b734c0b2273ce72ad0ffa95f0c74596bc250dcfd960262841"
dependencies = [
"autocfg",
]
[[package]]
name = "objc2"
version = "0.6.4"
source = "registry+https://github.com/rust-lang/crates.io-index"
checksum = "3a12a8ed07aefc768292f076dc3ac8c48f3781c8f2d5851dd3d98950e8c5a89f"
dependencies = [
"objc2-encode",
]
[[package]]
name = "objc2-encode"
version = "4.1.0"
source = "registry+https://github.com/rust-lang/crates.io-index"
checksum = "ef25abbcd74fb2609453eb695bd2f860d389e457f67dc17cafc8b8cbc89d0c33"
[[package]]
name = "once_cell"
version = "1.21.4"
source = "registry+https://github.com/rust-lang/crates.io-index"
checksum = "9f7c3e4beb33f85d45ae3e3a1792185706c8e16d043238c593331cc7cd313b50"
[[package]]
name = "pin-project-lite"
version = "0.2.17"
source = "registry+https://github.com/rust-lang/crates.io-index"
checksum = "a89322df9ebe1c1578d689c92318e070967d1042b512afbe49518723f4e6d5cd"
[[package]]
name = "prettyplease"
version = "0.2.37"
@@ -222,10 +408,18 @@ version = "2.1.2"
source = "registry+https://github.com/rust-lang/crates.io-index"
checksum = "94300abf3f1ae2e2b8ffb7b58043de3d399c73fa6f4b73826402a5c457614dbe"
[[package]]
name = "rustversion"
version = "1.0.22"
source = "registry+https://github.com/rust-lang/crates.io-index"
checksum = "b39cdef0fa800fc44525c84ccb54a029961a8215f9619753635a9c0d2538d46d"
[[package]]
name = "scannerbot-listener"
version = "0.1.0"
dependencies = [
"chrono",
"ctrlc",
"whisper-rs",
]
@@ -241,6 +435,12 @@ version = "1.3.0"
source = "registry+https://github.com/rust-lang/crates.io-index"
checksum = "0fda2ff0d084019ba4d7c6f371c95d8fd75ce3524c3cb8fb653a3023f6323e64"
[[package]]
name = "slab"
version = "0.4.12"
source = "registry+https://github.com/rust-lang/crates.io-index"
checksum = "0c790de23124f9ab44544d7ac05d60440adc586479ce501c1d6d7da3cd8c9cf5"
[[package]]
name = "syn"
version = "2.0.117"
@@ -258,6 +458,51 @@ version = "1.0.24"
source = "registry+https://github.com/rust-lang/crates.io-index"
checksum = "e6e4313cd5fcd3dad5cafa179702e2b244f760991f45397d14d4ebf38247da75"
[[package]]
name = "wasm-bindgen"
version = "0.2.121"
source = "registry+https://github.com/rust-lang/crates.io-index"
checksum = "49ace1d07c165b0864824eee619580c4689389afa9dc9ed3a4c75040d82e6790"
dependencies = [
"cfg-if",
"once_cell",
"rustversion",
"wasm-bindgen-macro",
"wasm-bindgen-shared",
]
[[package]]
name = "wasm-bindgen-macro"
version = "0.2.121"
source = "registry+https://github.com/rust-lang/crates.io-index"
checksum = "8e68e6f4afd367a562002c05637acb8578ff2dea1943df76afb9e83d177c8578"
dependencies = [
"quote",
"wasm-bindgen-macro-support",
]
[[package]]
name = "wasm-bindgen-macro-support"
version = "0.2.121"
source = "registry+https://github.com/rust-lang/crates.io-index"
checksum = "d95a9ec35c64b2a7cb35d3fead40c4238d0940c86d107136999567a4703259f2"
dependencies = [
"bumpalo",
"proc-macro2",
"quote",
"syn",
"wasm-bindgen-shared",
]
[[package]]
name = "wasm-bindgen-shared"
version = "0.2.121"
source = "registry+https://github.com/rust-lang/crates.io-index"
checksum = "c4e0100b01e9f0d03189a92b96772a1fb998639d981193d7dbab487302513441"
dependencies = [
"unicode-ident",
]
[[package]]
name = "whisper-rs"
version = "0.16.0"
@@ -280,8 +525,70 @@ dependencies = [
"semver",
]
[[package]]
name = "windows-core"
version = "0.62.2"
source = "registry+https://github.com/rust-lang/crates.io-index"
checksum = "b8e83a14d34d0623b51dce9581199302a221863196a1dde71a7663a4c2be9deb"
dependencies = [
"windows-implement",
"windows-interface",
"windows-link",
"windows-result",
"windows-strings",
]
[[package]]
name = "windows-implement"
version = "0.60.2"
source = "registry+https://github.com/rust-lang/crates.io-index"
checksum = "053e2e040ab57b9dc951b72c264860db7eb3b0200ba345b4e4c3b14f67855ddf"
dependencies = [
"proc-macro2",
"quote",
"syn",
]
[[package]]
name = "windows-interface"
version = "0.59.3"
source = "registry+https://github.com/rust-lang/crates.io-index"
checksum = "3f316c4a2570ba26bbec722032c4099d8c8bc095efccdc15688708623367e358"
dependencies = [
"proc-macro2",
"quote",
"syn",
]
[[package]]
name = "windows-link"
version = "0.2.1"
source = "registry+https://github.com/rust-lang/crates.io-index"
checksum = "f0805222e57f7521d6a62e36fa9163bc891acd422f971defe97d64e70d0a4fe5"
[[package]]
name = "windows-result"
version = "0.4.1"
source = "registry+https://github.com/rust-lang/crates.io-index"
checksum = "7781fa89eaf60850ac3d2da7af8e5242a5ea78d1a11c49bf2910bb5a73853eb5"
dependencies = [
"windows-link",
]
[[package]]
name = "windows-strings"
version = "0.5.1"
source = "registry+https://github.com/rust-lang/crates.io-index"
checksum = "7837d08f69c77cf6b07689544538e017c1bfcf57e34b4c0ff58e6c2cd3b37091"
dependencies = [
"windows-link",
]
[[package]]
name = "windows-sys"
version = "0.61.2"
source = "registry+https://github.com/rust-lang/crates.io-index"
checksum = "ae137229bcbd6cdf0f7b80a31df61766145077ddf49416a728b02cb3921ff3fc"
dependencies = [
"windows-link",
]
+2
View File
@@ -4,4 +4,6 @@ version = "0.1.0"
edition = "2024"
[dependencies]
chrono = "0.4.44"
ctrlc = "3.5.2"
whisper-rs = "0.16.0"
+45 -8
View File
@@ -1,14 +1,9 @@
use std::process::{Command, Stdio};
use std::io::Read;
use std::process::{Child, ChildStdout, Command, Stdio};
pub fn decode(input: &str) -> Result<Vec<f32>, Box<dyn std::error::Error>> {
let output = Command::new("ffmpeg")
.args([
"-i", input,
"-ar", "16000", // resample 16kHz
"-ac", "1", // mono
"-f", "f32le", // use raw f32 little endian samples
"-" // stdout
])
.args(["-i", input, "-ar", "16000", "-ac", "1", "-f", "f32le", "-"])
.stderr(Stdio::null())
.output()?;
@@ -19,3 +14,45 @@ pub fn decode(input: &str) -> Result<Vec<f32>, Box<dyn std::error::Error>> {
Ok(audio_data)
}
pub struct LiveStream {
child: Child,
stdout: ChildStdout,
}
impl LiveStream {
pub fn open(source: &str) -> Result<Self, Box<dyn std::error::Error>> {
let mut child = Command::new("ffmpeg")
.args(["-f", "pulse", "-i", source, "-ar", "16000", "-ac", "1", "-f", "f32le", "pipe:1"])
.env("PULSE_PROP_application.name", "scannerbot-listener")
.stderr(Stdio::null())
.stdout(Stdio::piped())
.spawn()?;
let stdout = child.stdout.take().unwrap();
Ok(LiveStream { child, stdout })
}
// Reads exactly `secs` seconds of audio. Returns None when the stream ends.
pub fn next_chunk(&mut self, secs: u32) -> Result<Option<Vec<f32>>, Box<dyn std::error::Error>> {
let num_bytes = secs as usize * 16000 * 4;
let mut buf = vec![0u8; num_bytes];
match self.stdout.read_exact(&mut buf) {
Ok(()) => {
let samples = buf.chunks_exact(4)
.map(|b| f32::from_le_bytes(b.try_into().unwrap()))
.collect();
Ok(Some(samples))
}
Err(e) if e.kind() == std::io::ErrorKind::UnexpectedEof => Ok(None),
Err(e) => Err(e.into()),
}
}
}
impl Drop for LiveStream {
fn drop(&mut self) {
let _ = self.child.kill();
}
}
+64 -27
View File
@@ -1,38 +1,75 @@
use whisper_rs::{WhisperContext, WhisperContextParameters, FullParams, SamplingStrategy};
use std::fs::OpenOptions;
use std::io::Write;
use std::sync::atomic::{AtomicBool, Ordering};
use std::sync::Arc;
use chrono::{DateTime, Local};
use whisper_rs::{FullParams, SamplingStrategy, WhisperContext, WhisperContextParameters};
mod audio;
fn main() -> Result<(), Box<dyn std::error::Error>> {
//println!("Hello, world!");
//let audio = audio::decode("../scratch/audio/gtav_police_sample.wav")?;
let audio = audio::decode(&std::env::args().nth(2).unwrap())?;
let path_to_model = std::env::args().nth(1).unwrap();
let ctx = WhisperContext::new_with_params(
path_to_model,
WhisperContextParameters::default()
).expect("failed to load model");
const CHUNK_SECS: u32 = 30;
const CHUNK_SAMPLES: usize = 16000 * CHUNK_SECS as usize;
fn transcribe_chunk(
state: &mut whisper_rs::WhisperState,
chunk: &[f32],
out: &mut impl Write,
) -> Result<(), Box<dyn std::error::Error>> {
let params = FullParams::new(SamplingStrategy::BeamSearch {
beam_size: 5, patience: -1.0
beam_size: 5,
patience: -1.0,
});
let mut state = ctx.create_state().expect("failed to create state");
state
.full(params, &audio[..])
.expect("failed to run model");
let time: DateTime<Local> = Local::now();
out.write_all(format!("[{}]: ", time.to_string()).as_bytes())?;
state.full(params, chunk)?;
for segment in state.as_iter() {
println!(
"[{} - {}]: {}",
segment.start_timestamp(),
segment.end_timestamp(),
segment
);
let line = format!("{}\n", segment);
print!("{}", line);
out.write_all(line.as_bytes())?;
}
out.flush()?;
Ok(())
}
fn main() -> Result<(), Box<dyn std::error::Error>> {
let model_path = std::env::args().nth(1).expect("usage: listener <model> <file.wav|pulse:SOURCE> [output.txt]");
let audio_arg = std::env::args().nth(2).expect("usage: listener <model> <file.wav|pulse:SOURCE> [output.txt]");
let output_path = std::env::args().nth(3).unwrap_or_else(|| "transcription.txt".to_string());
let running = Arc::new(AtomicBool::new(true));
let r = running.clone();
ctrlc::set_handler(move || {
r.store(false, Ordering::SeqCst);
})?;
let ctx = WhisperContext::new_with_params(&model_path, WhisperContextParameters::default())
.expect("failed to load model");
let mut state = ctx.create_state().expect("failed to create state");
let mut out = OpenOptions::new().create(true).append(true).open(&output_path)?;
eprintln!("Transcribing {}{} (Ctrl+C to stop)", audio_arg, output_path);
if let Some(source) = audio_arg.strip_prefix("pulse:") {
let mut stream = audio::LiveStream::open(source)?;
while running.load(Ordering::SeqCst) {
match stream.next_chunk(CHUNK_SECS)? {
Some(chunk) => transcribe_chunk(&mut state, &chunk, &mut out)?,
None => break,
}
}
} else {
while running.load(Ordering::SeqCst) {
let audio = audio::decode(&audio_arg)?;
for chunk in audio.chunks(CHUNK_SAMPLES) {
if !running.load(Ordering::SeqCst) {
break;
}
transcribe_chunk(&mut state, chunk, &mut out)?;
}
}
}
eprintln!("Stopped.");
Ok(())
}