Quantum-realm level normalisation optimization (#965)

This saves up to 1-2% CPU useage on a PI 4 depending on how much normalisation is actually being done.

* We don't need to test against EPSILON. The factor will never be over 1.0 in basic normalisation mode.
* Don't check the normalisation mode EVERY sample.
* Do as little math as possible by simplfiying all equations as much as possible (while still retaining the textbook equations in comments).
* Misc cleanup
This commit is contained in:
Jason Gray 2022-02-13 15:50:32 -06:00 committed by GitHub
parent cb194cfd3b
commit 616809b64c
No known key found for this signature in database
GPG key ID: 4AEE18F83AFDEB23
3 changed files with 96 additions and 97 deletions

View file

@ -23,14 +23,15 @@ pub struct Converter {
impl Converter { impl Converter {
pub fn new(dither_config: Option<DithererBuilder>) -> Self { pub fn new(dither_config: Option<DithererBuilder>) -> Self {
if let Some(ref ditherer_builder) = dither_config { match dither_config {
let ditherer = (ditherer_builder)(); Some(ditherer_builder) => {
info!("Converting with ditherer: {}", ditherer.name()); let ditherer = (ditherer_builder)();
Self { info!("Converting with ditherer: {}", ditherer.name());
ditherer: Some(ditherer), Self {
ditherer: Some(ditherer),
}
} }
} else { None => Self { ditherer: None },
Self { ditherer: None }
} }
} }
@ -52,18 +53,15 @@ impl Converter {
const SCALE_S16: f64 = 32768.; const SCALE_S16: f64 = 32768.;
pub fn scale(&mut self, sample: f64, factor: f64) -> f64 { pub fn scale(&mut self, sample: f64, factor: f64) -> f64 {
let dither = match self.ditherer {
Some(ref mut d) => d.noise(),
None => 0.0,
};
// From the many float to int conversion methods available, match what // From the many float to int conversion methods available, match what
// the reference Vorbis implementation uses: sample * 32768 (for 16 bit) // the reference Vorbis implementation uses: sample * 32768 (for 16 bit)
let int_value = sample * factor + dither;
// Casting float to integer rounds towards zero by default, i.e. it // Casting float to integer rounds towards zero by default, i.e. it
// truncates, and that generates larger error than rounding to nearest. // truncates, and that generates larger error than rounding to nearest.
int_value.round() match self.ditherer.as_mut() {
Some(d) => (sample * factor + d.noise()).round(),
None => (sample * factor).round(),
}
} }
// Special case for samples packed in a word of greater bit depth (e.g. // Special case for samples packed in a word of greater bit depth (e.g.
@ -79,11 +77,12 @@ impl Converter {
let max = factor - 1.0; let max = factor - 1.0;
if int_value < min { if int_value < min {
return min; min
} else if int_value > max { } else if int_value > max {
return max; max
} else {
int_value
} }
int_value
} }
pub fn f64_to_f32(&mut self, samples: &[f64]) -> Vec<f32> { pub fn f64_to_f32(&mut self, samples: &[f64]) -> Vec<f32> {
@ -109,12 +108,7 @@ impl Converter {
pub fn f64_to_s24_3(&mut self, samples: &[f64]) -> Vec<i24> { pub fn f64_to_s24_3(&mut self, samples: &[f64]) -> Vec<i24> {
samples samples
.iter() .iter()
.map(|sample| { .map(|sample| i24::from_s24(self.clamping_scale(*sample, Self::SCALE_S24) as i32))
// Not as DRY as calling f32_to_s24 first, but this saves iterating
// over all samples twice.
let int_value = self.clamping_scale(*sample, Self::SCALE_S24) as i32;
i24::from_s24(int_value)
})
.collect() .collect()
} }

View file

@ -3,7 +3,7 @@ use rand::SeedableRng;
use rand_distr::{Distribution, Normal, Triangular, Uniform}; use rand_distr::{Distribution, Normal, Triangular, Uniform};
use std::fmt; use std::fmt;
const NUM_CHANNELS: usize = 2; use crate::NUM_CHANNELS;
// Dithering lowers digital-to-analog conversion ("requantization") error, // Dithering lowers digital-to-analog conversion ("requantization") error,
// linearizing output, lowering distortion and replacing it with a constant, // linearizing output, lowering distortion and replacing it with a constant,
@ -102,7 +102,7 @@ impl GaussianDitherer {
pub struct HighPassDitherer { pub struct HighPassDitherer {
active_channel: usize, active_channel: usize,
previous_noises: [f64; NUM_CHANNELS], previous_noises: [f64; NUM_CHANNELS as usize],
cached_rng: SmallRng, cached_rng: SmallRng,
distribution: Uniform<f64>, distribution: Uniform<f64>,
} }
@ -111,7 +111,7 @@ impl Ditherer for HighPassDitherer {
fn new() -> Self { fn new() -> Self {
Self { Self {
active_channel: 0, active_channel: 0,
previous_noises: [0.0; NUM_CHANNELS], previous_noises: [0.0; NUM_CHANNELS as usize],
cached_rng: create_rng(), cached_rng: create_rng(),
distribution: Uniform::new_inclusive(-0.5, 0.5), // 1 LSB +/- 1 LSB (previous) = 2 LSB distribution: Uniform::new_inclusive(-0.5, 0.5), // 1 LSB +/- 1 LSB (previous) = 2 LSB
} }

View file

@ -760,7 +760,16 @@ impl PlayerTrackLoader {
position_ms: u32, position_ms: u32,
) -> Option<PlayerLoadedTrackData> { ) -> Option<PlayerLoadedTrackData> {
let audio = match AudioItem::get_audio_item(&self.session, spotify_id).await { let audio = match AudioItem::get_audio_item(&self.session, spotify_id).await {
Ok(audio) => audio, Ok(audio) => match self.find_available_alternative(audio).await {
Some(audio) => audio,
None => {
warn!(
"<{}> is not available",
spotify_id.to_uri().unwrap_or_default()
);
return None;
}
},
Err(e) => { Err(e) => {
error!("Unable to load audio item: {:?}", e); error!("Unable to load audio item: {:?}", e);
return None; return None;
@ -769,17 +778,6 @@ impl PlayerTrackLoader {
info!("Loading <{}> with Spotify URI <{}>", audio.name, audio.uri); info!("Loading <{}> with Spotify URI <{}>", audio.name, audio.uri);
let audio = match self.find_available_alternative(audio).await {
Some(audio) => audio,
None => {
warn!(
"<{}> is not available",
spotify_id.to_uri().unwrap_or_default()
);
return None;
}
};
if audio.duration < 0 { if audio.duration < 0 {
error!( error!(
"Track duration for <{}> cannot be {}", "Track duration for <{}> cannot be {}",
@ -809,26 +807,24 @@ impl PlayerTrackLoader {
], ],
}; };
let entry = formats.iter().find_map(|format| { let (format, file_id) =
if let Some(&file_id) = audio.files.get(format) { match formats
Some((*format, file_id)) .iter()
} else { .find_map(|format| match audio.files.get(format) {
None Some(&file_id) => Some((*format, file_id)),
} _ => None,
}); }) {
Some(t) => t,
let (format, file_id) = match entry { None => {
Some(t) => t, warn!("<{}> is not available in any supported format", audio.name);
None => { return None;
warn!("<{}> is not available in any supported format", audio.name); }
return None; };
}
};
let bytes_per_second = self.stream_data_rate(format); let bytes_per_second = self.stream_data_rate(format);
let play_from_beginning = position_ms == 0; let play_from_beginning = position_ms == 0;
// This is only a loop to be able to reload the file if an error occured // This is only a loop to be able to reload the file if an error occurred
// while opening a cached file. // while opening a cached file.
loop { loop {
let encrypted_file = AudioFile::open( let encrypted_file = AudioFile::open(
@ -1321,25 +1317,30 @@ impl PlayerInternal {
// For the basic normalisation method, a normalisation factor of 1.0 indicates that // For the basic normalisation method, a normalisation factor of 1.0 indicates that
// there is nothing to normalise (all samples should pass unaltered). For the // there is nothing to normalise (all samples should pass unaltered). For the
// dynamic method, there may still be peaks that we want to shave off. // dynamic method, there may still be peaks that we want to shave off.
if self.config.normalisation if self.config.normalisation {
&& !(f64::abs(normalisation_factor - 1.0) <= f64::EPSILON if self.config.normalisation_method == NormalisationMethod::Basic
&& self.config.normalisation_method == NormalisationMethod::Basic) && normalisation_factor < 1.0
{ {
// zero-cost shorthands for sample in data.iter_mut() {
let threshold_db = self.config.normalisation_threshold_dbfs; *sample *= normalisation_factor;
let knee_db = self.config.normalisation_knee_db; }
let attack_cf = self.config.normalisation_attack_cf; } else if self.config.normalisation_method
let release_cf = self.config.normalisation_release_cf; == NormalisationMethod::Dynamic
{
// zero-cost shorthands
let threshold_db = self.config.normalisation_threshold_dbfs;
let knee_db = self.config.normalisation_knee_db;
let attack_cf = self.config.normalisation_attack_cf;
let release_cf = self.config.normalisation_release_cf;
for sample in data.iter_mut() { for sample in data.iter_mut() {
*sample *= normalisation_factor; // for both the basic and dynamic limiter *sample *= normalisation_factor;
// Feedforward limiter in the log domain
// After: Giannoulis, D., Massberg, M., & Reiss, J.D. (2012). Digital Dynamic
// Range Compressor Design—A Tutorial and Analysis. Journal of The Audio
// Engineering Society, 60, 399-408.
// Feedforward limiter in the log domain
// After: Giannoulis, D., Massberg, M., & Reiss, J.D. (2012). Digital Dynamic
// Range Compressor Design—A Tutorial and Analysis. Journal of The Audio
// Engineering Society, 60, 399-408.
if self.config.normalisation_method == NormalisationMethod::Dynamic
{
// Some tracks have samples that are precisely 0.0. That's silence // Some tracks have samples that are precisely 0.0. That's silence
// and we know we don't need to limit that, in which we can spare // and we know we don't need to limit that, in which we can spare
// the CPU cycles. // the CPU cycles.
@ -1348,22 +1349,26 @@ impl PlayerInternal {
// peak detector stuck. Also catch the unlikely case where a sample // peak detector stuck. Also catch the unlikely case where a sample
// is decoded as `NaN` or some other non-normal value. // is decoded as `NaN` or some other non-normal value.
let limiter_db = if sample.is_normal() { let limiter_db = if sample.is_normal() {
// step 1-2: half-wave rectification and conversion into dB // step 1-4: half-wave rectification and conversion into dB
let abs_sample_db = ratio_to_db(sample.abs()); // and gain computer with soft knee and subtractor
let bias_db = ratio_to_db(sample.abs()) - threshold_db;
// step 3-4: gain computer with soft knee and subtractor
let bias_db = abs_sample_db - threshold_db;
let knee_boundary_db = bias_db * 2.0; let knee_boundary_db = bias_db * 2.0;
if knee_boundary_db < -knee_db { if knee_boundary_db < -knee_db {
0.0 0.0
} else if knee_boundary_db.abs() <= knee_db { } else if knee_boundary_db.abs() <= knee_db {
abs_sample_db // The textbook equation:
- (abs_sample_db // ratio_to_db(sample.abs()) - (ratio_to_db(sample.abs()) - (bias_db + knee_db / 2.0).powi(2) / (2.0 * knee_db))
- (bias_db + knee_db / 2.0).powi(2) // Simplifies to:
/ (2.0 * knee_db)) // ((2.0 * bias_db) + knee_db).powi(2) / (8.0 * knee_db)
// Which in our case further simplifies to:
// (knee_boundary_db + knee_db).powi(2) / (8.0 * knee_db)
// because knee_boundary_db is 2.0 * bias_db.
(knee_boundary_db + knee_db).powi(2) / (8.0 * knee_db)
} else { } else {
abs_sample_db - threshold_db // Textbook:
// ratio_to_db(sample.abs()) - threshold_db, which is already our bias_db.
bias_db
} }
} else { } else {
0.0 0.0
@ -1377,14 +1382,24 @@ impl PlayerInternal {
|| self.normalisation_peak > 0.0 || self.normalisation_peak > 0.0
{ {
// step 5: smooth, decoupled peak detector // step 5: smooth, decoupled peak detector
// Textbook:
// release_cf * self.normalisation_integrator + (1.0 - release_cf) * limiter_db
// Simplifies to:
// release_cf * self.normalisation_integrator - release_cf * limiter_db + limiter_db
self.normalisation_integrator = f64::max( self.normalisation_integrator = f64::max(
limiter_db, limiter_db,
release_cf * self.normalisation_integrator release_cf * self.normalisation_integrator
+ (1.0 - release_cf) * limiter_db, - release_cf * limiter_db
+ limiter_db,
); );
// Textbook:
// attack_cf * self.normalisation_peak + (1.0 - attack_cf) * self.normalisation_integrator
// Simplifies to:
// attack_cf * self.normalisation_peak - attack_cf * self.normalisation_integrator + self.normalisation_integrator
self.normalisation_peak = attack_cf self.normalisation_peak = attack_cf
* self.normalisation_peak * self.normalisation_peak
+ (1.0 - attack_cf) * self.normalisation_integrator; - attack_cf * self.normalisation_integrator
+ self.normalisation_integrator;
// step 6: make-up gain applied later (volume attenuation) // step 6: make-up gain applied later (volume attenuation)
// Applying the standard normalisation factor here won't work, // Applying the standard normalisation factor here won't work,
@ -1897,15 +1912,8 @@ impl PlayerInternal {
} }
fn send_event(&mut self, event: PlayerEvent) { fn send_event(&mut self, event: PlayerEvent) {
let mut index = 0; self.event_senders
while index < self.event_senders.len() { .retain(|sender| sender.send(event.clone()).is_ok());
match self.event_senders[index].send(event.clone()) {
Ok(_) => index += 1,
Err(_) => {
self.event_senders.remove(index);
}
}
}
} }
fn load_track( fn load_track(
@ -2079,10 +2087,7 @@ impl<T: Read + Seek> Seek for Subfile<T> {
}; };
let newpos = self.stream.seek(pos)?; let newpos = self.stream.seek(pos)?;
if newpos > self.offset {
Ok(newpos - self.offset) Ok(newpos.saturating_sub(self.offset))
} else {
Ok(0)
}
} }
} }