From 6b0983611750e55346cb4f5c27d3b5470eb6a98f Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Micha=C5=82=20Chojnacki?= Date: Fri, 10 Apr 2020 16:00:42 +0200 Subject: [PATCH] Refactor SpotifyId - perf: * base62 encoding is an order of magnitude faster (~20x); * base16/62 enc/dec and from_uri are several times faster (~2-20x); * Let FileId::to_base16() reuse the hex encoder (~20x); - changes: * Add to_uri() method; * Make from_uri() error handling consistent; * Move audio type from string matching to a SpotifyAudioType factory (private); * Implent From/Into<&str> for SpotifyAudioType; * Add representation sizes as associated constants (private); - cs/docs: * Add rudimentary docs for most public funcs; * Add trivial test cases for the codecs; --- core/src/spotify_id.rs | 424 ++++++++++++++++++++++++++++++++++------- 1 file changed, 360 insertions(+), 64 deletions(-) diff --git a/core/src/spotify_id.rs b/core/src/spotify_id.rs index 0982f9cb..6adc0206 100644 --- a/core/src/spotify_id.rs +++ b/core/src/spotify_id.rs @@ -8,6 +8,44 @@ pub enum SpotifyAudioType { NonPlayable, } +impl SpotifyAudioType { + fn from_str(src: &str) -> SpotifyAudioType { + match src { + "track" => SpotifyAudioType::Track, + "episode" => SpotifyAudioType::Podcast, + _ => SpotifyAudioType::NonPlayable, + } + } + + fn to_str(self) -> &'static str { + match self { + SpotifyAudioType::Track => "track", + SpotifyAudioType::Podcast => "episode", + SpotifyAudioType::NonPlayable => "unknown", + } + } + + fn len(self) -> usize { + match self { + SpotifyAudioType::Track => 5, + SpotifyAudioType::Podcast => 7, + SpotifyAudioType::NonPlayable => 7, + } + } +} + +impl std::convert::From<&str> for SpotifyAudioType { + fn from(v: &str) -> Self { + SpotifyAudioType::from_str(v) + } +} + +impl std::convert::Into<&str> for SpotifyAudioType { + fn into(self) -> &'static str { + self.to_str() + } +} + #[derive(Debug, Clone, Copy, PartialEq, Eq, Hash)] pub struct SpotifyId { pub id: u128, @@ -17,104 +55,184 @@ pub struct SpotifyId { #[derive(Debug, Copy, Clone, PartialEq, Eq, Hash)] pub struct SpotifyIdError; -const BASE62_DIGITS: &'static [u8] = - b"0123456789abcdefghijklmnopqrstuvwxyzABCDEFGHIJKLMNOPQRSTUVWXYZ"; -const BASE16_DIGITS: &'static [u8] = b"0123456789abcdef"; +const BASE62_DIGITS: &[u8; 62] = b"0123456789abcdefghijklmnopqrstuvwxyzABCDEFGHIJKLMNOPQRSTUVWXYZ"; +const BASE16_DIGITS: &[u8; 16] = b"0123456789abcdef"; impl SpotifyId { + const SIZE: usize = 16; + const SIZE_BASE16: usize = 32; + const SIZE_BASE62: usize = 22; + fn as_track(n: u128) -> SpotifyId { SpotifyId { - id: n.to_owned(), + id: n, audio_type: SpotifyAudioType::Track, } } - pub fn from_base16(id: &str) -> Result { - let data = id.as_bytes(); + /// Parses a base16 (hex) encoded [Spotify ID] into a `SpotifyId`. + /// + /// `src` is expected to be 32 bytes long and encoded using valid characters. + /// + /// [Spotify ID]: https://developer.spotify.com/documentation/web-api/#spotify-uris-and-ids + pub fn from_base16(src: &str) -> Result { + let mut dst: u128 = 0; - let mut n = 0u128; - for c in data { - let d = match BASE16_DIGITS.iter().position(|e| e == c) { - None => return Err(SpotifyIdError), - Some(x) => x as u128, - }; - n = n * 16; - n = n + d; + for c in src.as_bytes() { + let p = match c { + b'0'..=b'9' => c - b'0', + b'a'..=b'f' => c - b'a' + 10, + _ => return Err(SpotifyIdError), + } as u128; + + dst <<= 4; + dst += p; } - Ok(SpotifyId::as_track(n)) + Ok(SpotifyId::as_track(dst)) } - pub fn from_base62(id: &str) -> Result { - let data = id.as_bytes(); + /// Parses a base62 encoded [Spotify ID] into a `SpotifyId`. + /// + /// `src` is expected to be 22 bytes long and encoded using valid characters. + /// + /// [Spotify ID]: https://developer.spotify.com/documentation/web-api/#spotify-uris-and-ids + pub fn from_base62(src: &str) -> Result { + let mut dst: u128 = 0; - let mut n = 0u128; - for c in data { - let d = match BASE62_DIGITS.iter().position(|e| e == c) { - None => return Err(SpotifyIdError), - Some(x) => x as u128, - }; - n = n * 62; - n = n + d; + for c in src.as_bytes() { + let p = match c { + b'0'..=b'9' => c - b'0', + b'a'..=b'z' => c - b'a' + 10, + b'A'..=b'Z' => c - b'A' + 36, + _ => return Err(SpotifyIdError), + } as u128; + + dst *= 62; + dst += p; } - Ok(SpotifyId::as_track(n)) + + Ok(SpotifyId::as_track(dst)) } - pub fn from_raw(data: &[u8]) -> Result { - if data.len() != 16 { + /// Creates a `SpotifyId` from a copy of `SpotifyId::SIZE` (16) bytes in big-endian order. + /// + /// The resulting `SpotifyId` will default to a `SpotifyAudioType::TRACK`. + pub fn from_raw(src: &[u8]) -> Result { + if src.len() != SpotifyId::SIZE { return Err(SpotifyIdError); }; - let mut arr: [u8; 16] = Default::default(); - arr.copy_from_slice(&data[0..16]); + let mut dst = [0u8; SpotifyId::SIZE]; + dst.copy_from_slice(src); - Ok(SpotifyId::as_track(u128::from_be_bytes(arr))) + Ok(SpotifyId::as_track(u128::from_be_bytes(dst))) } - pub fn from_uri(uri: &str) -> Result { - let parts = uri.split(":").collect::>(); - let gid = parts.last().unwrap(); - if uri.contains(":episode:") { - let mut spotify_id = SpotifyId::from_base62(gid).unwrap(); - let _ = std::mem::replace(&mut spotify_id.audio_type, SpotifyAudioType::Podcast); - Ok(spotify_id) - } else if uri.contains(":track:") { - SpotifyId::from_base62(gid) - } else { - // show/playlist/artist/album/?? - let mut spotify_id = SpotifyId::from_base62(gid).unwrap(); - let _ = std::mem::replace(&mut spotify_id.audio_type, SpotifyAudioType::NonPlayable); - Ok(spotify_id) + /// Parses a [Spotify URI] into a `SpotifyId`. + /// + /// `uri` is expected to be in the canonical form `spotify:{type}:{id}`, where `{type}` + /// can be arbitrary while `{id}` is a 22-character long, base62 encoded Spotify ID. + /// + /// [Spotify URI]: https://developer.spotify.com/documentation/web-api/#spotify-uris-and-ids + pub fn from_uri(src: &str) -> Result { + // We expect the ID to be the last colon-delimited item in the URI. + let b = src.as_bytes(); + let id_i = b.len() - SpotifyId::SIZE_BASE62; + if b[id_i - 1] != b':' { + return Err(SpotifyIdError); } + + let mut id = match SpotifyId::from_base62(&src[id_i..]) { + Ok(v) => v, + Err(e) => return Err(e), + }; + + // Slice offset by 8 as we are skipping the "spotify:" prefix. + id.audio_type = src[8..id_i - 1].into(); + + Ok(id) } + /// Returns the `SpotifyId` as a base16 (hex) encoded, `SpotifyId::SIZE_BASE62` (22) + /// character long `String`. pub fn to_base16(&self) -> String { - format!("{:032x}", self.id) + to_base16(&self.to_raw(), &mut [0u8; SpotifyId::SIZE_BASE16]) } + /// Returns the `SpotifyId` as a [canonically] base62 encoded, `SpotifyId::SIZE_BASE62` (22) + /// character long `String`. + /// + /// [canonically]: https://developer.spotify.com/documentation/web-api/#spotify-uris-and-ids pub fn to_base62(&self) -> String { - let &SpotifyId { id: mut n, .. } = self; + let mut dst = [0u8; 22]; + let mut i = 0; + let n = self.id; - let mut data = [0u8; 22]; - for i in 0..22 { - data[21 - i] = BASE62_DIGITS[(n % 62) as usize]; - n /= 62; + // The algorithm is based on: + // https://github.com/trezor/trezor-crypto/blob/c316e775a2152db255ace96b6b65ac0f20525ec0/base58.c + // + // We are not using naive division of self.id as it is an u128 and div + mod are software + // emulated at runtime (and unoptimized into mul + shift) on non-128bit platforms, + // making them very expensive. + // + // Trezor's algorithm allows us to stick to arithmetic on native registers making this + // an order of magnitude faster. Additionally, as our sizes are known, instead of + // dealing with the ID on a byte by byte basis, we decompose it into four u32s and + // use 64-bit arithmetic on them for an additional speedup. + for shift in &[96, 64, 32, 0] { + let mut carry = (n >> shift) as u32 as u64; + + for b in &mut dst[..i] { + carry += (*b as u64) << 32; + *b = (carry % 62) as u8; + carry /= 62; + } + + while carry > 0 { + dst[i] = (carry % 62) as u8; + carry /= 62; + i += 1; + } } - std::str::from_utf8(&data).unwrap().to_owned() - } + for b in &mut dst { + *b = BASE62_DIGITS[*b as usize]; + } - pub fn to_uri(&self) -> String { - match self.audio_type { - SpotifyAudioType::Track => format!("spotify:track:{}", self.to_base62()), - SpotifyAudioType::Podcast => format!("spotify:episode:{}", self.to_base62()), - SpotifyAudioType::NonPlayable => format!("spotify:unknown:{}", self.to_base62()), + dst.reverse(); + + unsafe { + // Safety: We are only dealing with ASCII characters. + String::from_utf8_unchecked(dst.to_vec()) } } - pub fn to_raw(&self) -> [u8; 16] { + /// Returns a copy of the `SpotifyId` as an array of `SpotifyId::SIZE` (16) bytes in + /// big-endian order. + pub fn to_raw(&self) -> [u8; SpotifyId::SIZE] { self.id.to_be_bytes() } + + /// Returns the `SpotifyId` as a [Spotify URI] in the canonical form `spotify:{type}:{id}`, + /// where `{type}` is an arbitrary string and `{id}` is a 22-character long, base62 encoded + /// Spotify ID. + /// + /// If the `SpotifyId` has an associated type unrecognized by the library, `{type}` will + /// be encoded as `unknown`. + /// + /// [Spotify URI]: https://developer.spotify.com/documentation/web-api/#spotify-uris-and-ids + pub fn to_uri(&self) -> String { + // 8 chars for the "spotify:" prefix + 1 colon + 22 chars base62 encoded ID = 31 + // + unknown size audio_type. + let mut dst = String::with_capacity(31 + self.audio_type.len()); + dst.push_str("spotify:"); + dst.push_str(self.audio_type.into()); + dst.push_str(":"); + dst.push_str(&self.to_base62()); + + dst + } } #[derive(Copy, Clone, PartialEq, Eq, PartialOrd, Ord, Hash)] @@ -122,11 +240,7 @@ pub struct FileId(pub [u8; 20]); impl FileId { pub fn to_base16(&self) -> String { - self.0 - .iter() - .map(|b| format!("{:02x}", b)) - .collect::>() - .concat() + to_base16(&self.0, &mut [0u8; 40]) } } @@ -141,3 +255,185 @@ impl fmt::Display for FileId { f.write_str(&self.to_base16()) } } + +#[inline] +fn to_base16(src: &[u8], buf: &mut [u8]) -> String { + let mut i = 0; + for v in src { + buf[i] = BASE16_DIGITS[(v >> 4) as usize]; + buf[i + 1] = BASE16_DIGITS[(v & 0x0f) as usize]; + i += 2; + } + + unsafe { + // Safety: We are only dealing with ASCII characters. + String::from_utf8_unchecked(buf.to_vec()) + } +} + +#[cfg(test)] +mod tests { + use super::*; + + struct ConversionCase { + id: u128, + kind: SpotifyAudioType, + uri: &'static str, + base16: &'static str, + base62: &'static str, + raw: &'static [u8], + } + + static CONV_VALID: [ConversionCase; 4] = [ + ConversionCase { + id: 238762092608182713602505436543891614649, + kind: SpotifyAudioType::Track, + uri: "spotify:track:5sWHDYs0csV6RS48xBl0tH", + base16: "b39fe8081e1f4c54be38e8d6f9f12bb9", + base62: "5sWHDYs0csV6RS48xBl0tH", + raw: &[ + 179, 159, 232, 8, 30, 31, 76, 84, 190, 56, 232, 214, 249, 241, 43, 185, + ], + }, + ConversionCase { + id: 204841891221366092811751085145916697048, + kind: SpotifyAudioType::Track, + uri: "spotify:track:4GNcXTGWmnZ3ySrqvol3o4", + base16: "9a1b1cfbc6f244569ae0356c77bbe9d8", + base62: "4GNcXTGWmnZ3ySrqvol3o4", + raw: &[ + 154, 27, 28, 251, 198, 242, 68, 86, 154, 224, 53, 108, 119, 187, 233, 216, + ], + }, + ConversionCase { + id: 204841891221366092811751085145916697048, + kind: SpotifyAudioType::Podcast, + uri: "spotify:episode:4GNcXTGWmnZ3ySrqvol3o4", + base16: "9a1b1cfbc6f244569ae0356c77bbe9d8", + base62: "4GNcXTGWmnZ3ySrqvol3o4", + raw: &[ + 154, 27, 28, 251, 198, 242, 68, 86, 154, 224, 53, 108, 119, 187, 233, 216, + ], + }, + ConversionCase { + id: 204841891221366092811751085145916697048, + kind: SpotifyAudioType::NonPlayable, + uri: "spotify:unknown:4GNcXTGWmnZ3ySrqvol3o4", + base16: "9a1b1cfbc6f244569ae0356c77bbe9d8", + base62: "4GNcXTGWmnZ3ySrqvol3o4", + raw: &[ + 154, 27, 28, 251, 198, 242, 68, 86, 154, 224, 53, 108, 119, 187, 233, 216, + ], + }, + ]; + + static CONV_INVALID: [ConversionCase; 2] = [ + ConversionCase { + id: 0, + kind: SpotifyAudioType::NonPlayable, + // Invalid ID in the URI. + uri: "spotify:arbitrarywhatever:5sWHDYs0Bl0tH", + base16: "ZZZZZ8081e1f4c54be38e8d6f9f12bb9", + base62: "!!!!!Ys0csV6RS48xBl0tH", + raw: &[ + // Invalid length. + 154, 27, 28, 251, 198, 242, 68, 86, 154, 224, 5, 3, 108, 119, 187, 233, 216, 255, + ], + }, + ConversionCase { + id: 0, + kind: SpotifyAudioType::NonPlayable, + // Missing colon between ID and type. + uri: "spotify:arbitrarywhatever5sWHDYs0csV6RS48xBl0tH", + base16: "--------------------", + base62: "....................", + raw: &[ + // Invalid length. + 154, 27, 28, 251, + ], + }, + ]; + + #[test] + fn from_base62() { + for c in &CONV_VALID { + assert_eq!(SpotifyId::from_base62(c.base62).unwrap().id, c.id); + } + + for c in &CONV_INVALID { + assert_eq!(SpotifyId::from_base62(c.base62), Err(SpotifyIdError)); + } + } + + #[test] + fn to_base62() { + for c in &CONV_VALID { + let id = SpotifyId { + id: c.id, + audio_type: c.kind, + }; + + assert_eq!(id.to_base62(), c.base62); + } + } + + #[test] + fn from_base16() { + for c in &CONV_VALID { + assert_eq!(SpotifyId::from_base16(c.base16).unwrap().id, c.id); + } + + for c in &CONV_INVALID { + assert_eq!(SpotifyId::from_base16(c.base16), Err(SpotifyIdError)); + } + } + + #[test] + fn to_base16() { + for c in &CONV_VALID { + let id = SpotifyId { + id: c.id, + audio_type: c.kind, + }; + + assert_eq!(id.to_base16(), c.base16); + } + } + + #[test] + fn from_uri() { + for c in &CONV_VALID { + let actual = SpotifyId::from_uri(c.uri).unwrap(); + + assert_eq!(actual.id, c.id); + assert_eq!(actual.audio_type, c.kind); + } + + for c in &CONV_INVALID { + assert_eq!(SpotifyId::from_uri(c.uri), Err(SpotifyIdError)); + } + } + + #[test] + fn to_uri() { + for c in &CONV_VALID { + let id = SpotifyId { + id: c.id, + audio_type: c.kind, + }; + + assert_eq!(id.to_uri(), c.uri); + } + } + + #[test] + fn from_raw() { + for c in &CONV_VALID { + assert_eq!(SpotifyId::from_raw(c.raw).unwrap().id, c.id); + } + + for c in &CONV_INVALID { + assert_eq!(SpotifyId::from_raw(c.raw), Err(SpotifyIdError)); + } + } +}