diff --git a/listener/.gitignore b/listener/.gitignore index 672c96f..6576326 100644 --- a/listener/.gitignore +++ b/listener/.gitignore @@ -27,4 +27,7 @@ rustc-ice-*.txt models/ # Transcription text file -transcription.txt \ No newline at end of file +transcription.txt + +# Clips dir +audio_clips/ \ No newline at end of file diff --git a/listener/Cargo.lock b/listener/Cargo.lock index ab5f04c..0e2304e 100644 --- a/listener/Cargo.lock +++ b/listener/Cargo.lock @@ -208,6 +208,12 @@ version = "0.3.3" source = "registry+https://github.com/rust-lang/crates.io-index" checksum = "0cc23270f6e1808e30a928bdc84dea0b9b4136a8bc82338574f23baf47bbd280" +[[package]] +name = "hound" +version = "3.5.1" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "62adaabb884c94955b19907d60019f4e145d091c75345379e70d1ee696f7854f" + [[package]] name = "iana-time-zone" version = "0.1.65" @@ -420,6 +426,7 @@ version = "0.1.0" dependencies = [ "chrono", "ctrlc", + "hound", "whisper-rs", ] diff --git a/listener/Cargo.toml b/listener/Cargo.toml index b251b23..98e03b2 100644 --- a/listener/Cargo.toml +++ b/listener/Cargo.toml @@ -6,4 +6,5 @@ edition = "2024" [dependencies] chrono = "0.4.44" ctrlc = "3.5.2" +hound = "3.5.1" whisper-rs = "0.16.0" diff --git a/listener/src/main.rs b/listener/src/main.rs index c146da2..4ea4105 100644 --- a/listener/src/main.rs +++ b/listener/src/main.rs @@ -1,4 +1,4 @@ -use std::fs::OpenOptions; +use std::fs::{self, OpenOptions}; use std::io::Write; use std::sync::atomic::{AtomicBool, Ordering}; use std::sync::Arc; @@ -9,18 +9,41 @@ mod audio; const CHUNK_SECS: u32 = 30; const CHUNK_SAMPLES: usize = 16000 * CHUNK_SECS as usize; +const CLIP_DIR: &str = "./audio_clips"; + +fn save_clip(samples: &[f32], path: &str) -> Result<(), Box> { + let spec = hound::WavSpec { + channels: 1, + sample_rate: 16000, + bits_per_sample: 16, + sample_format: hound::SampleFormat::Int, + }; + let mut writer = hound::WavWriter::create(path, spec)?; + for &s in samples { + writer.write_sample((s.clamp(-1.0, 1.0) * i16::MAX as f32) as i16)?; + } + writer.finalize()?; + Ok(()) +} fn transcribe_chunk( state: &mut whisper_rs::WhisperState, chunk: &[f32], out: &mut impl Write, + counter: &mut u32, ) -> Result<(), Box> { + let time: DateTime = Local::now(); + let id = format!("{}_{:04}", time.format("%Y%m%d_%H%M%S"), counter); + *counter += 1; + + let clip_path = format!("{}/{}.wav", CLIP_DIR, id); + save_clip(chunk, &clip_path)?; + let params = FullParams::new(SamplingStrategy::BeamSearch { beam_size: 5, patience: -1.0, }); - let time: DateTime = Local::now(); - out.write_all(format!("[{}]: ", time.to_string()).as_bytes())?; + out.write_all(format!("[{}] [{}]: ", time, id).as_bytes())?; state.full(params, chunk)?; for segment in state.as_iter() { let line = format!("{}\n", segment); @@ -36,6 +59,8 @@ fn main() -> Result<(), Box> { let audio_arg = std::env::args().nth(2).expect("usage: listener [output.txt]"); let output_path = std::env::args().nth(3).unwrap_or_else(|| "transcription.txt".to_string()); + fs::create_dir_all(CLIP_DIR)?; + let running = Arc::new(AtomicBool::new(true)); let r = running.clone(); ctrlc::set_handler(move || { @@ -47,14 +72,15 @@ fn main() -> Result<(), Box> { let mut state = ctx.create_state().expect("failed to create state"); let mut out = OpenOptions::new().create(true).append(true).open(&output_path)?; + let mut counter: u32 = 0; - eprintln!("Transcribing {} → {} (Ctrl+C to stop)", audio_arg, output_path); + eprintln!("Transcribing {} → {} (clips → {}) (Ctrl+C to stop)", audio_arg, output_path, CLIP_DIR); if let Some(source) = audio_arg.strip_prefix("pulse:") { let mut stream = audio::LiveStream::open(source)?; while running.load(Ordering::SeqCst) { match stream.next_chunk(CHUNK_SECS)? { - Some(chunk) => transcribe_chunk(&mut state, &chunk, &mut out)?, + Some(chunk) => transcribe_chunk(&mut state, &chunk, &mut out, &mut counter)?, None => break, } } @@ -65,7 +91,7 @@ fn main() -> Result<(), Box> { if !running.load(Ordering::SeqCst) { break; } - transcribe_chunk(&mut state, chunk, &mut out)?; + transcribe_chunk(&mut state, chunk, &mut out, &mut counter)?; } } }