listener: introduce clips feature

This commit is contained in:
2026-05-10 19:02:54 -04:00
parent beccc7b50b
commit 37bc07f667
4 changed files with 44 additions and 7 deletions
+3
View File
@@ -28,3 +28,6 @@ models/
# Transcription text file # Transcription text file
transcription.txt transcription.txt
# Clips dir
audio_clips/
+7
View File
@@ -208,6 +208,12 @@ version = "0.3.3"
source = "registry+https://github.com/rust-lang/crates.io-index" source = "registry+https://github.com/rust-lang/crates.io-index"
checksum = "0cc23270f6e1808e30a928bdc84dea0b9b4136a8bc82338574f23baf47bbd280" checksum = "0cc23270f6e1808e30a928bdc84dea0b9b4136a8bc82338574f23baf47bbd280"
[[package]]
name = "hound"
version = "3.5.1"
source = "registry+https://github.com/rust-lang/crates.io-index"
checksum = "62adaabb884c94955b19907d60019f4e145d091c75345379e70d1ee696f7854f"
[[package]] [[package]]
name = "iana-time-zone" name = "iana-time-zone"
version = "0.1.65" version = "0.1.65"
@@ -420,6 +426,7 @@ version = "0.1.0"
dependencies = [ dependencies = [
"chrono", "chrono",
"ctrlc", "ctrlc",
"hound",
"whisper-rs", "whisper-rs",
] ]
+1
View File
@@ -6,4 +6,5 @@ edition = "2024"
[dependencies] [dependencies]
chrono = "0.4.44" chrono = "0.4.44"
ctrlc = "3.5.2" ctrlc = "3.5.2"
hound = "3.5.1"
whisper-rs = "0.16.0" whisper-rs = "0.16.0"
+32 -6
View File
@@ -1,4 +1,4 @@
use std::fs::OpenOptions; use std::fs::{self, OpenOptions};
use std::io::Write; use std::io::Write;
use std::sync::atomic::{AtomicBool, Ordering}; use std::sync::atomic::{AtomicBool, Ordering};
use std::sync::Arc; use std::sync::Arc;
@@ -9,18 +9,41 @@ mod audio;
const CHUNK_SECS: u32 = 30; const CHUNK_SECS: u32 = 30;
const CHUNK_SAMPLES: usize = 16000 * CHUNK_SECS as usize; const CHUNK_SAMPLES: usize = 16000 * CHUNK_SECS as usize;
const CLIP_DIR: &str = "./audio_clips";
fn save_clip(samples: &[f32], path: &str) -> Result<(), Box<dyn std::error::Error>> {
let spec = hound::WavSpec {
channels: 1,
sample_rate: 16000,
bits_per_sample: 16,
sample_format: hound::SampleFormat::Int,
};
let mut writer = hound::WavWriter::create(path, spec)?;
for &s in samples {
writer.write_sample((s.clamp(-1.0, 1.0) * i16::MAX as f32) as i16)?;
}
writer.finalize()?;
Ok(())
}
fn transcribe_chunk( fn transcribe_chunk(
state: &mut whisper_rs::WhisperState, state: &mut whisper_rs::WhisperState,
chunk: &[f32], chunk: &[f32],
out: &mut impl Write, out: &mut impl Write,
counter: &mut u32,
) -> Result<(), Box<dyn std::error::Error>> { ) -> Result<(), Box<dyn std::error::Error>> {
let time: DateTime<Local> = Local::now();
let id = format!("{}_{:04}", time.format("%Y%m%d_%H%M%S"), counter);
*counter += 1;
let clip_path = format!("{}/{}.wav", CLIP_DIR, id);
save_clip(chunk, &clip_path)?;
let params = FullParams::new(SamplingStrategy::BeamSearch { let params = FullParams::new(SamplingStrategy::BeamSearch {
beam_size: 5, beam_size: 5,
patience: -1.0, patience: -1.0,
}); });
let time: DateTime<Local> = Local::now(); out.write_all(format!("[{}] [{}]: ", time, id).as_bytes())?;
out.write_all(format!("[{}]: ", time.to_string()).as_bytes())?;
state.full(params, chunk)?; state.full(params, chunk)?;
for segment in state.as_iter() { for segment in state.as_iter() {
let line = format!("{}\n", segment); let line = format!("{}\n", segment);
@@ -36,6 +59,8 @@ fn main() -> Result<(), Box<dyn std::error::Error>> {
let audio_arg = std::env::args().nth(2).expect("usage: listener <model> <file.wav|pulse:SOURCE> [output.txt]"); let audio_arg = std::env::args().nth(2).expect("usage: listener <model> <file.wav|pulse:SOURCE> [output.txt]");
let output_path = std::env::args().nth(3).unwrap_or_else(|| "transcription.txt".to_string()); let output_path = std::env::args().nth(3).unwrap_or_else(|| "transcription.txt".to_string());
fs::create_dir_all(CLIP_DIR)?;
let running = Arc::new(AtomicBool::new(true)); let running = Arc::new(AtomicBool::new(true));
let r = running.clone(); let r = running.clone();
ctrlc::set_handler(move || { ctrlc::set_handler(move || {
@@ -47,14 +72,15 @@ fn main() -> Result<(), Box<dyn std::error::Error>> {
let mut state = ctx.create_state().expect("failed to create state"); let mut state = ctx.create_state().expect("failed to create state");
let mut out = OpenOptions::new().create(true).append(true).open(&output_path)?; let mut out = OpenOptions::new().create(true).append(true).open(&output_path)?;
let mut counter: u32 = 0;
eprintln!("Transcribing {}{} (Ctrl+C to stop)", audio_arg, output_path); eprintln!("Transcribing {}{} (clips → {}) (Ctrl+C to stop)", audio_arg, output_path, CLIP_DIR);
if let Some(source) = audio_arg.strip_prefix("pulse:") { if let Some(source) = audio_arg.strip_prefix("pulse:") {
let mut stream = audio::LiveStream::open(source)?; let mut stream = audio::LiveStream::open(source)?;
while running.load(Ordering::SeqCst) { while running.load(Ordering::SeqCst) {
match stream.next_chunk(CHUNK_SECS)? { match stream.next_chunk(CHUNK_SECS)? {
Some(chunk) => transcribe_chunk(&mut state, &chunk, &mut out)?, Some(chunk) => transcribe_chunk(&mut state, &chunk, &mut out, &mut counter)?,
None => break, None => break,
} }
} }
@@ -65,7 +91,7 @@ fn main() -> Result<(), Box<dyn std::error::Error>> {
if !running.load(Ordering::SeqCst) { if !running.load(Ordering::SeqCst) {
break; break;
} }
transcribe_chunk(&mut state, chunk, &mut out)?; transcribe_chunk(&mut state, chunk, &mut out, &mut counter)?;
} }
} }
} }