Improve dithering CPU usage (#866)

This commit is contained in:
Roderick van Domburg 2021-10-19 22:33:04 +02:00 committed by GitHub
parent b125659e12
commit 4c89a721ee
No known key found for this signature in database
GPG key ID: 4AEE18F83AFDEB23
4 changed files with 40 additions and 12 deletions

View file

@ -9,6 +9,7 @@ and this project adheres to [Semantic Versioning](https://semver.org/spec/v2.0.0
### Changed
- Include build profile in the displayed version information
- [playback] Improve dithering CPU usage by about 33%
### Fixed
- [connect] Partly fix behavior after last track of an album/playlist

14
Cargo.lock generated
View file

@ -1288,6 +1288,7 @@ dependencies = [
"portaudio-rs",
"rand",
"rand_distr",
"rand_xoshiro",
"rodio",
"sdl2",
"shell-words",
@ -1881,9 +1882,9 @@ dependencies = [
[[package]]
name = "rand_distr"
version = "0.4.1"
version = "0.4.2"
source = "registry+https://github.com/rust-lang/crates.io-index"
checksum = "051b398806e42b9cd04ad9ec8f81e355d0a382c543ac6672c62f5a5b452ef142"
checksum = "964d548f8e7d12e102ef183a0de7e98180c9f8729f555897a857b96e48122d2f"
dependencies = [
"num-traits",
"rand",
@ -1898,6 +1899,15 @@ dependencies = [
"rand_core",
]
[[package]]
name = "rand_xoshiro"
version = "0.6.0"
source = "registry+https://github.com/rust-lang/crates.io-index"
checksum = "6f97cdb2a36ed4183de61b2f824cc45c9f1037f28afe0a322e9fff4c108b5aaa"
dependencies = [
"rand_core",
]
[[package]]
name = "redox_syscall"
version = "0.2.10"

View file

@ -49,6 +49,7 @@ ogg = "0.8"
# Dithering
rand = "0.8"
rand_distr = "0.4"
rand_xoshiro = "0.6"
[features]
alsa-backend = ["alsa"]

View file

@ -1,4 +1,4 @@
use rand::rngs::ThreadRng;
use rand::SeedableRng;
use rand_distr::{Distribution, Normal, Triangular, Uniform};
use std::fmt;
@ -41,20 +41,36 @@ impl fmt::Display for dyn Ditherer {
}
}
// Implementation note: we save the handle to ThreadRng so it doesn't require
// a lookup on each call (which is on each sample!). This is ~2.5x as fast.
// Downside is that it is not Send so we cannot move it around player threads.
// `SmallRng` is 33% faster than `ThreadRng`, but we can do even better.
// `SmallRng` defaults to `Xoshiro256PlusPlus` on 64-bit platforms and
// `Xoshiro128PlusPlus` on 32-bit platforms. These are excellent for the
// general case. In our case of just 64-bit floating points, we can make
// some optimizations. Compared to `SmallRng`, these hand-picked generators
// improve performance by another 9% on 64-bit platforms and 2% on 32-bit
// platforms.
//
// For reference, see https://prng.di.unimi.it. Note that we do not use
// `Xoroshiro128Plus` or `Xoshiro128Plus` because they display low linear
// complexity in the lower four bits, which is not what we want:
// linearization is the very point of dithering.
#[cfg(target_pointer_width = "64")]
type Rng = rand_xoshiro::Xoshiro256Plus;
#[cfg(not(target_pointer_width = "64"))]
type Rng = rand_xoshiro::Xoshiro128StarStar;
fn create_rng() -> Rng {
Rng::from_entropy()
}
pub struct TriangularDitherer {
cached_rng: ThreadRng,
cached_rng: Rng,
distribution: Triangular<f64>,
}
impl Ditherer for TriangularDitherer {
fn new() -> Self {
Self {
cached_rng: rand::thread_rng(),
cached_rng: create_rng(),
// 2 LSB peak-to-peak needed to linearize the response:
distribution: Triangular::new(-1.0, 1.0, 0.0).unwrap(),
}
@ -74,14 +90,14 @@ impl TriangularDitherer {
}
pub struct GaussianDitherer {
cached_rng: ThreadRng,
cached_rng: Rng,
distribution: Normal<f64>,
}
impl Ditherer for GaussianDitherer {
fn new() -> Self {
Self {
cached_rng: rand::thread_rng(),
cached_rng: create_rng(),
// 1/2 LSB RMS needed to linearize the response:
distribution: Normal::new(0.0, 0.5).unwrap(),
}
@ -103,7 +119,7 @@ impl GaussianDitherer {
pub struct HighPassDitherer {
active_channel: usize,
previous_noises: [f64; NUM_CHANNELS],
cached_rng: ThreadRng,
cached_rng: Rng,
distribution: Uniform<f64>,
}
@ -112,7 +128,7 @@ impl Ditherer for HighPassDitherer {
Self {
active_channel: 0,
previous_noises: [0.0; NUM_CHANNELS],
cached_rng: rand::thread_rng(),
cached_rng: create_rng(),
distribution: Uniform::new_inclusive(-0.5, 0.5), // 1 LSB +/- 1 LSB (previous) = 2 LSB
}
}