listener: introduce clips feature

2026-05-10 19:02:54 -04:00
parent beccc7b50b
commit 37bc07f667
4 changed files with 44 additions and 7 deletions
@@ -28,3 +28,6 @@ models/
 # Transcription text file
 transcription.txt
 # Clips dir
 audio_clips/
@@ -208,6 +208,12 @@ version = "0.3.3"
 source = "registry+https://github.com/rust-lang/crates.io-index"
 checksum = "0cc23270f6e1808e30a928bdc84dea0b9b4136a8bc82338574f23baf47bbd280"
 [[package]]
 name = "hound"
 version = "3.5.1"
 source = "registry+https://github.com/rust-lang/crates.io-index"
 checksum = "62adaabb884c94955b19907d60019f4e145d091c75345379e70d1ee696f7854f"
 [[package]]
 name = "iana-time-zone"
 version = "0.1.65"
@@ -420,6 +426,7 @@ version = "0.1.0"
 dependencies = [
 "chrono",
 "ctrlc",
 "hound",
 "whisper-rs",
 ]
@@ -6,4 +6,5 @@ edition = "2024"
 [dependencies]
 chrono = "0.4.44"
 ctrlc = "3.5.2"
 hound = "3.5.1"
 whisper-rs = "0.16.0"
@@ -1,4 +1,4 @@
-use std::fs::OpenOptions;
+use std::fs::{self, OpenOptions};
 use std::io::Write;
 use std::sync::atomic::{AtomicBool, Ordering};
 use std::sync::Arc;
@@ -9,18 +9,41 @@ mod audio;
 const CHUNK_SECS: u32 = 30;
 const CHUNK_SAMPLES: usize = 16000 * CHUNK_SECS as usize;
 const CLIP_DIR: &str = "./audio_clips";
 fn save_clip(samples: &[f32], path: &str) -> Result<(), Box<dyn std::error::Error>> {
    let spec = hound::WavSpec {
        channels: 1,
        sample_rate: 16000,
        bits_per_sample: 16,
        sample_format: hound::SampleFormat::Int,
    };
    let mut writer = hound::WavWriter::create(path, spec)?;
    for &s in samples {
        writer.write_sample((s.clamp(-1.0, 1.0) * i16::MAX as f32) as i16)?;
    }
    writer.finalize()?;
    Ok(())
 }
 fn transcribe_chunk(
    state: &mut whisper_rs::WhisperState,
    chunk: &[f32],
    out: &mut impl Write,
    counter: &mut u32,
 ) -> Result<(), Box<dyn std::error::Error>> {
    let time: DateTime<Local> = Local::now();
    let id = format!("{}_{:04}", time.format("%Y%m%d_%H%M%S"), counter);
    *counter += 1;
    let clip_path = format!("{}/{}.wav", CLIP_DIR, id);
    save_clip(chunk, &clip_path)?;
    let params = FullParams::new(SamplingStrategy::BeamSearch {
        beam_size: 5,
        patience: -1.0,
    });
-    let time: DateTime<Local> = Local::now();
+    out.write_all(format!("[{}] [{}]: ", time, id).as_bytes())?;
    out.write_all(format!("[{}]: ", time.to_string()).as_bytes())?;
    state.full(params, chunk)?;
    for segment in state.as_iter() {
        let line = format!("{}\n", segment);
@@ -36,6 +59,8 @@ fn main() -> Result<(), Box<dyn std::error::Error>> {
    let audio_arg  = std::env::args().nth(2).expect("usage: listener <model> <file.wav|pulse:SOURCE> [output.txt]");
    let output_path = std::env::args().nth(3).unwrap_or_else(|| "transcription.txt".to_string());
    fs::create_dir_all(CLIP_DIR)?;
    let running = Arc::new(AtomicBool::new(true));
    let r = running.clone();
    ctrlc::set_handler(move || {
@@ -47,14 +72,15 @@ fn main() -> Result<(), Box<dyn std::error::Error>> {
    let mut state = ctx.create_state().expect("failed to create state");
    let mut out = OpenOptions::new().create(true).append(true).open(&output_path)?;
    let mut counter: u32 = 0;
-    eprintln!("Transcribing {} → {}  (Ctrl+C to stop)", audio_arg, output_path);
+    eprintln!("Transcribing {} → {}  (clips → {})  (Ctrl+C to stop)", audio_arg, output_path, CLIP_DIR);
    if let Some(source) = audio_arg.strip_prefix("pulse:") {
        let mut stream = audio::LiveStream::open(source)?;
        while running.load(Ordering::SeqCst) {
            match stream.next_chunk(CHUNK_SECS)? {
-                Some(chunk) => transcribe_chunk(&mut state, &chunk, &mut out)?,
+                Some(chunk) => transcribe_chunk(&mut state, &chunk, &mut out, &mut counter)?,
                None => break,
            }
        }
@@ -65,7 +91,7 @@ fn main() -> Result<(), Box<dyn std::error::Error>> {
                if !running.load(Ordering::SeqCst) {
                    break;
                }
-                transcribe_chunk(&mut state, chunk, &mut out)?;
+                transcribe_chunk(&mut state, chunk, &mut out, &mut counter)?;
            }
        }
    }