Refactor SpotifyId

- perf:
  * base62 encoding is an order of magnitude faster (~20x);
  * base16/62 enc/dec and from_uri are several times faster (~2-20x);
  * Let FileId::to_base16() reuse the hex encoder (~20x);

- changes:
  * Add to_uri() method;
  * Make from_uri() error handling consistent;
  * Move audio type from string matching to a SpotifyAudioType factory (private);
  * Implent From/Into<&str> for SpotifyAudioType;
  * Add representation sizes as associated constants (private);

- cs/docs:
  * Add rudimentary docs for most public funcs;
  * Add trivial test cases for the codecs;
This commit is contained in:
Michał Chojnacki 2020-04-10 16:00:42 +02:00 committed by johannesd3
parent ed20f357dc
commit 6b09836117

View file

@ -8,6 +8,44 @@ pub enum SpotifyAudioType {
NonPlayable,
}
impl SpotifyAudioType {
fn from_str(src: &str) -> SpotifyAudioType {
match src {
"track" => SpotifyAudioType::Track,
"episode" => SpotifyAudioType::Podcast,
_ => SpotifyAudioType::NonPlayable,
}
}
fn to_str(self) -> &'static str {
match self {
SpotifyAudioType::Track => "track",
SpotifyAudioType::Podcast => "episode",
SpotifyAudioType::NonPlayable => "unknown",
}
}
fn len(self) -> usize {
match self {
SpotifyAudioType::Track => 5,
SpotifyAudioType::Podcast => 7,
SpotifyAudioType::NonPlayable => 7,
}
}
}
impl std::convert::From<&str> for SpotifyAudioType {
fn from(v: &str) -> Self {
SpotifyAudioType::from_str(v)
}
}
impl std::convert::Into<&str> for SpotifyAudioType {
fn into(self) -> &'static str {
self.to_str()
}
}
#[derive(Debug, Clone, Copy, PartialEq, Eq, Hash)]
pub struct SpotifyId {
pub id: u128,
@ -17,104 +55,184 @@ pub struct SpotifyId {
#[derive(Debug, Copy, Clone, PartialEq, Eq, Hash)]
pub struct SpotifyIdError;
const BASE62_DIGITS: &'static [u8] =
b"0123456789abcdefghijklmnopqrstuvwxyzABCDEFGHIJKLMNOPQRSTUVWXYZ";
const BASE16_DIGITS: &'static [u8] = b"0123456789abcdef";
const BASE62_DIGITS: &[u8; 62] = b"0123456789abcdefghijklmnopqrstuvwxyzABCDEFGHIJKLMNOPQRSTUVWXYZ";
const BASE16_DIGITS: &[u8; 16] = b"0123456789abcdef";
impl SpotifyId {
const SIZE: usize = 16;
const SIZE_BASE16: usize = 32;
const SIZE_BASE62: usize = 22;
fn as_track(n: u128) -> SpotifyId {
SpotifyId {
id: n.to_owned(),
id: n,
audio_type: SpotifyAudioType::Track,
}
}
pub fn from_base16(id: &str) -> Result<SpotifyId, SpotifyIdError> {
let data = id.as_bytes();
/// Parses a base16 (hex) encoded [Spotify ID] into a `SpotifyId`.
///
/// `src` is expected to be 32 bytes long and encoded using valid characters.
///
/// [Spotify ID]: https://developer.spotify.com/documentation/web-api/#spotify-uris-and-ids
pub fn from_base16(src: &str) -> Result<SpotifyId, SpotifyIdError> {
let mut dst: u128 = 0;
let mut n = 0u128;
for c in data {
let d = match BASE16_DIGITS.iter().position(|e| e == c) {
None => return Err(SpotifyIdError),
Some(x) => x as u128,
};
n = n * 16;
n = n + d;
for c in src.as_bytes() {
let p = match c {
b'0'..=b'9' => c - b'0',
b'a'..=b'f' => c - b'a' + 10,
_ => return Err(SpotifyIdError),
} as u128;
dst <<= 4;
dst += p;
}
Ok(SpotifyId::as_track(n))
Ok(SpotifyId::as_track(dst))
}
pub fn from_base62(id: &str) -> Result<SpotifyId, SpotifyIdError> {
let data = id.as_bytes();
/// Parses a base62 encoded [Spotify ID] into a `SpotifyId`.
///
/// `src` is expected to be 22 bytes long and encoded using valid characters.
///
/// [Spotify ID]: https://developer.spotify.com/documentation/web-api/#spotify-uris-and-ids
pub fn from_base62(src: &str) -> Result<SpotifyId, SpotifyIdError> {
let mut dst: u128 = 0;
let mut n = 0u128;
for c in data {
let d = match BASE62_DIGITS.iter().position(|e| e == c) {
None => return Err(SpotifyIdError),
Some(x) => x as u128,
};
n = n * 62;
n = n + d;
for c in src.as_bytes() {
let p = match c {
b'0'..=b'9' => c - b'0',
b'a'..=b'z' => c - b'a' + 10,
b'A'..=b'Z' => c - b'A' + 36,
_ => return Err(SpotifyIdError),
} as u128;
dst *= 62;
dst += p;
}
Ok(SpotifyId::as_track(n))
Ok(SpotifyId::as_track(dst))
}
pub fn from_raw(data: &[u8]) -> Result<SpotifyId, SpotifyIdError> {
if data.len() != 16 {
/// Creates a `SpotifyId` from a copy of `SpotifyId::SIZE` (16) bytes in big-endian order.
///
/// The resulting `SpotifyId` will default to a `SpotifyAudioType::TRACK`.
pub fn from_raw(src: &[u8]) -> Result<SpotifyId, SpotifyIdError> {
if src.len() != SpotifyId::SIZE {
return Err(SpotifyIdError);
};
let mut arr: [u8; 16] = Default::default();
arr.copy_from_slice(&data[0..16]);
let mut dst = [0u8; SpotifyId::SIZE];
dst.copy_from_slice(src);
Ok(SpotifyId::as_track(u128::from_be_bytes(arr)))
Ok(SpotifyId::as_track(u128::from_be_bytes(dst)))
}
pub fn from_uri(uri: &str) -> Result<SpotifyId, SpotifyIdError> {
let parts = uri.split(":").collect::<Vec<&str>>();
let gid = parts.last().unwrap();
if uri.contains(":episode:") {
let mut spotify_id = SpotifyId::from_base62(gid).unwrap();
let _ = std::mem::replace(&mut spotify_id.audio_type, SpotifyAudioType::Podcast);
Ok(spotify_id)
} else if uri.contains(":track:") {
SpotifyId::from_base62(gid)
} else {
// show/playlist/artist/album/??
let mut spotify_id = SpotifyId::from_base62(gid).unwrap();
let _ = std::mem::replace(&mut spotify_id.audio_type, SpotifyAudioType::NonPlayable);
Ok(spotify_id)
/// Parses a [Spotify URI] into a `SpotifyId`.
///
/// `uri` is expected to be in the canonical form `spotify:{type}:{id}`, where `{type}`
/// can be arbitrary while `{id}` is a 22-character long, base62 encoded Spotify ID.
///
/// [Spotify URI]: https://developer.spotify.com/documentation/web-api/#spotify-uris-and-ids
pub fn from_uri(src: &str) -> Result<SpotifyId, SpotifyIdError> {
// We expect the ID to be the last colon-delimited item in the URI.
let b = src.as_bytes();
let id_i = b.len() - SpotifyId::SIZE_BASE62;
if b[id_i - 1] != b':' {
return Err(SpotifyIdError);
}
let mut id = match SpotifyId::from_base62(&src[id_i..]) {
Ok(v) => v,
Err(e) => return Err(e),
};
// Slice offset by 8 as we are skipping the "spotify:" prefix.
id.audio_type = src[8..id_i - 1].into();
Ok(id)
}
/// Returns the `SpotifyId` as a base16 (hex) encoded, `SpotifyId::SIZE_BASE62` (22)
/// character long `String`.
pub fn to_base16(&self) -> String {
format!("{:032x}", self.id)
to_base16(&self.to_raw(), &mut [0u8; SpotifyId::SIZE_BASE16])
}
/// Returns the `SpotifyId` as a [canonically] base62 encoded, `SpotifyId::SIZE_BASE62` (22)
/// character long `String`.
///
/// [canonically]: https://developer.spotify.com/documentation/web-api/#spotify-uris-and-ids
pub fn to_base62(&self) -> String {
let &SpotifyId { id: mut n, .. } = self;
let mut dst = [0u8; 22];
let mut i = 0;
let n = self.id;
let mut data = [0u8; 22];
for i in 0..22 {
data[21 - i] = BASE62_DIGITS[(n % 62) as usize];
n /= 62;
// The algorithm is based on:
// https://github.com/trezor/trezor-crypto/blob/c316e775a2152db255ace96b6b65ac0f20525ec0/base58.c
//
// We are not using naive division of self.id as it is an u128 and div + mod are software
// emulated at runtime (and unoptimized into mul + shift) on non-128bit platforms,
// making them very expensive.
//
// Trezor's algorithm allows us to stick to arithmetic on native registers making this
// an order of magnitude faster. Additionally, as our sizes are known, instead of
// dealing with the ID on a byte by byte basis, we decompose it into four u32s and
// use 64-bit arithmetic on them for an additional speedup.
for shift in &[96, 64, 32, 0] {
let mut carry = (n >> shift) as u32 as u64;
for b in &mut dst[..i] {
carry += (*b as u64) << 32;
*b = (carry % 62) as u8;
carry /= 62;
}
while carry > 0 {
dst[i] = (carry % 62) as u8;
carry /= 62;
i += 1;
}
}
std::str::from_utf8(&data).unwrap().to_owned()
}
for b in &mut dst {
*b = BASE62_DIGITS[*b as usize];
}
pub fn to_uri(&self) -> String {
match self.audio_type {
SpotifyAudioType::Track => format!("spotify:track:{}", self.to_base62()),
SpotifyAudioType::Podcast => format!("spotify:episode:{}", self.to_base62()),
SpotifyAudioType::NonPlayable => format!("spotify:unknown:{}", self.to_base62()),
dst.reverse();
unsafe {
// Safety: We are only dealing with ASCII characters.
String::from_utf8_unchecked(dst.to_vec())
}
}
pub fn to_raw(&self) -> [u8; 16] {
/// Returns a copy of the `SpotifyId` as an array of `SpotifyId::SIZE` (16) bytes in
/// big-endian order.
pub fn to_raw(&self) -> [u8; SpotifyId::SIZE] {
self.id.to_be_bytes()
}
/// Returns the `SpotifyId` as a [Spotify URI] in the canonical form `spotify:{type}:{id}`,
/// where `{type}` is an arbitrary string and `{id}` is a 22-character long, base62 encoded
/// Spotify ID.
///
/// If the `SpotifyId` has an associated type unrecognized by the library, `{type}` will
/// be encoded as `unknown`.
///
/// [Spotify URI]: https://developer.spotify.com/documentation/web-api/#spotify-uris-and-ids
pub fn to_uri(&self) -> String {
// 8 chars for the "spotify:" prefix + 1 colon + 22 chars base62 encoded ID = 31
// + unknown size audio_type.
let mut dst = String::with_capacity(31 + self.audio_type.len());
dst.push_str("spotify:");
dst.push_str(self.audio_type.into());
dst.push_str(":");
dst.push_str(&self.to_base62());
dst
}
}
#[derive(Copy, Clone, PartialEq, Eq, PartialOrd, Ord, Hash)]
@ -122,11 +240,7 @@ pub struct FileId(pub [u8; 20]);
impl FileId {
pub fn to_base16(&self) -> String {
self.0
.iter()
.map(|b| format!("{:02x}", b))
.collect::<Vec<String>>()
.concat()
to_base16(&self.0, &mut [0u8; 40])
}
}
@ -141,3 +255,185 @@ impl fmt::Display for FileId {
f.write_str(&self.to_base16())
}
}
#[inline]
fn to_base16(src: &[u8], buf: &mut [u8]) -> String {
let mut i = 0;
for v in src {
buf[i] = BASE16_DIGITS[(v >> 4) as usize];
buf[i + 1] = BASE16_DIGITS[(v & 0x0f) as usize];
i += 2;
}
unsafe {
// Safety: We are only dealing with ASCII characters.
String::from_utf8_unchecked(buf.to_vec())
}
}
#[cfg(test)]
mod tests {
use super::*;
struct ConversionCase {
id: u128,
kind: SpotifyAudioType,
uri: &'static str,
base16: &'static str,
base62: &'static str,
raw: &'static [u8],
}
static CONV_VALID: [ConversionCase; 4] = [
ConversionCase {
id: 238762092608182713602505436543891614649,
kind: SpotifyAudioType::Track,
uri: "spotify:track:5sWHDYs0csV6RS48xBl0tH",
base16: "b39fe8081e1f4c54be38e8d6f9f12bb9",
base62: "5sWHDYs0csV6RS48xBl0tH",
raw: &[
179, 159, 232, 8, 30, 31, 76, 84, 190, 56, 232, 214, 249, 241, 43, 185,
],
},
ConversionCase {
id: 204841891221366092811751085145916697048,
kind: SpotifyAudioType::Track,
uri: "spotify:track:4GNcXTGWmnZ3ySrqvol3o4",
base16: "9a1b1cfbc6f244569ae0356c77bbe9d8",
base62: "4GNcXTGWmnZ3ySrqvol3o4",
raw: &[
154, 27, 28, 251, 198, 242, 68, 86, 154, 224, 53, 108, 119, 187, 233, 216,
],
},
ConversionCase {
id: 204841891221366092811751085145916697048,
kind: SpotifyAudioType::Podcast,
uri: "spotify:episode:4GNcXTGWmnZ3ySrqvol3o4",
base16: "9a1b1cfbc6f244569ae0356c77bbe9d8",
base62: "4GNcXTGWmnZ3ySrqvol3o4",
raw: &[
154, 27, 28, 251, 198, 242, 68, 86, 154, 224, 53, 108, 119, 187, 233, 216,
],
},
ConversionCase {
id: 204841891221366092811751085145916697048,
kind: SpotifyAudioType::NonPlayable,
uri: "spotify:unknown:4GNcXTGWmnZ3ySrqvol3o4",
base16: "9a1b1cfbc6f244569ae0356c77bbe9d8",
base62: "4GNcXTGWmnZ3ySrqvol3o4",
raw: &[
154, 27, 28, 251, 198, 242, 68, 86, 154, 224, 53, 108, 119, 187, 233, 216,
],
},
];
static CONV_INVALID: [ConversionCase; 2] = [
ConversionCase {
id: 0,
kind: SpotifyAudioType::NonPlayable,
// Invalid ID in the URI.
uri: "spotify:arbitrarywhatever:5sWHDYs0Bl0tH",
base16: "ZZZZZ8081e1f4c54be38e8d6f9f12bb9",
base62: "!!!!!Ys0csV6RS48xBl0tH",
raw: &[
// Invalid length.
154, 27, 28, 251, 198, 242, 68, 86, 154, 224, 5, 3, 108, 119, 187, 233, 216, 255,
],
},
ConversionCase {
id: 0,
kind: SpotifyAudioType::NonPlayable,
// Missing colon between ID and type.
uri: "spotify:arbitrarywhatever5sWHDYs0csV6RS48xBl0tH",
base16: "--------------------",
base62: "....................",
raw: &[
// Invalid length.
154, 27, 28, 251,
],
},
];
#[test]
fn from_base62() {
for c in &CONV_VALID {
assert_eq!(SpotifyId::from_base62(c.base62).unwrap().id, c.id);
}
for c in &CONV_INVALID {
assert_eq!(SpotifyId::from_base62(c.base62), Err(SpotifyIdError));
}
}
#[test]
fn to_base62() {
for c in &CONV_VALID {
let id = SpotifyId {
id: c.id,
audio_type: c.kind,
};
assert_eq!(id.to_base62(), c.base62);
}
}
#[test]
fn from_base16() {
for c in &CONV_VALID {
assert_eq!(SpotifyId::from_base16(c.base16).unwrap().id, c.id);
}
for c in &CONV_INVALID {
assert_eq!(SpotifyId::from_base16(c.base16), Err(SpotifyIdError));
}
}
#[test]
fn to_base16() {
for c in &CONV_VALID {
let id = SpotifyId {
id: c.id,
audio_type: c.kind,
};
assert_eq!(id.to_base16(), c.base16);
}
}
#[test]
fn from_uri() {
for c in &CONV_VALID {
let actual = SpotifyId::from_uri(c.uri).unwrap();
assert_eq!(actual.id, c.id);
assert_eq!(actual.audio_type, c.kind);
}
for c in &CONV_INVALID {
assert_eq!(SpotifyId::from_uri(c.uri), Err(SpotifyIdError));
}
}
#[test]
fn to_uri() {
for c in &CONV_VALID {
let id = SpotifyId {
id: c.id,
audio_type: c.kind,
};
assert_eq!(id.to_uri(), c.uri);
}
}
#[test]
fn from_raw() {
for c in &CONV_VALID {
assert_eq!(SpotifyId::from_raw(c.raw).unwrap().id, c.id);
}
for c in &CONV_INVALID {
assert_eq!(SpotifyId::from_raw(c.raw), Err(SpotifyIdError));
}
}
}