diff --git a/learn/slides.md b/learn/slides.md index 4694d19..e1bc8b7 100755 --- a/learn/slides.md +++ b/learn/slides.md @@ -68,7 +68,7 @@ The terms character set is used interchangably with character encoding and code # Common encoding schemes # -- An encoding scheme will encode the number to one or more bytes. +- *An encoding scheme will encode the number to one or more bytes. ## Multi byte encoding schemes ## @@ -81,14 +81,13 @@ The terms character set is used interchangably with character encoding and code - 99% of the internet uses this encoding scheme. - | Byte 1 | Byte 2 | Byte 3 | Byte 4 | Available bits + | Byte 1 | Byte 2 | Byte 3 | Byte 4 | Available bits | |----------|----------|----------|----------|----------------| | 0xxxxxxx | - | - | - | 7 | | 110xxxxx | 10xxxxxx | - | - | 11 | | 1110xxxx | 10xxxxxx | 10xxxxxx | - | 16 | | 11110xxx | 10xxxxxx | 10xxxxxx | 10xxxxxx | 21 | - ``` ┌────────┬───────┬───────────┬──────┬─────┬─────┬──────────┐ │ U+dec │ U+hex │ character │ byte │ hex │ dec │ bin │ @@ -116,7 +115,6 @@ The terms character set is used interchangably with character encoding and code ## Multi byte encoding schemes ## - ### UTF - 16 ### - Variable byte encoding scheme. @@ -211,7 +209,6 @@ The terms character set is used interchangably with character encoding and code - Some languages (eg: C#) will return the number of utf-16 bytes to encode the complete string. - The below emoji is of length 1 in python and length 4 in c#. - ``` ┌────────┬───────┬───────────┬──────┬─────┬─────┬──────────┐ │ U+dec │ U+hex │ character │ byte │ hex │ dec │ bin │ diff --git a/src/main.rs b/src/main.rs index 2fb5306..643f516 100644 --- a/src/main.rs +++ b/src/main.rs @@ -1,8 +1,8 @@ -use clap::{Parser, Args}; +use clap::{Args, Parser}; use serde::Serialize; use tabled::{ builder::Builder, - settings::{Modify, object::Rows, Alignment, Style} + settings::{object::Rows, Alignment, Modify, Style}, }; struct StringDetail { @@ -16,16 +16,16 @@ struct CharacterDetail { byte: u8, } -impl StringDetail{ - fn parse_utf8(query: &String) -> Self { - let mut details:StringDetail = StringDetail::default(); +impl StringDetail { + fn parse_utf8(query: &str) -> Self { + let mut details: StringDetail = StringDetail::default(); for i in query.chars() { let mut bytes = [0; 4]; - i.encode_utf8(&mut bytes); + let encoded = i.encode_utf8(&mut bytes); - details.push(Some(i), bytes[0]); - for b in 1..i.len_utf8() { - details.push(None, bytes[b]); + let mut citer = vec![i].into_iter(); + for b in encoded.bytes() { + details.push(citer.next(), b); } } @@ -33,13 +33,14 @@ impl StringDetail{ } fn parse_utf16(query: &String) -> Self { - let mut details:StringDetail = StringDetail::default(); + let mut details: StringDetail = StringDetail::default(); for i in query.chars() { let mut bytes = [0; 2]; - i.encode_utf16(&mut bytes); - details.push_utf16(Some(i), bytes[0]); - if bytes[1] != 0x00000000 { - details.push_utf16(None, bytes[1]); + let encoded = i.encode_utf16(&mut bytes); + + let mut citer = vec![i].into_iter(); + for b in encoded { + details.push_utf16(citer.next(), *b); } } @@ -47,20 +48,22 @@ impl StringDetail{ } fn default() -> Self { - Self { characters: Vec::new(), length: 0 } + Self { + characters: Vec::new(), + length: 0, + } } - fn push(&mut self, character:Option, byte:u8){ - self.characters - .push(CharacterDetail { - byte_index: self.length, - character, - byte, - }); + fn push(&mut self, character: Option, byte: u8) { + self.characters.push(CharacterDetail { + byte_index: self.length, + character, + byte, + }); self.length += 1; } - fn push_utf16(&mut self, character:Option, byte: u16){ + fn push_utf16(&mut self, character: Option, byte: u16) { let bytes = byte.to_be_bytes(); self.push(character, bytes[0]); self.push(None, bytes[1]); @@ -96,8 +99,7 @@ impl StringTableRow { unicode = String::from(format!("{}", x as u32)); unicode_hex = String::from(format!("{:x}", x as u32)); } - None => { - } + None => {} }; let byte = format!("{}", char_detail.byte_index); let hex = format!("{:02x}", char_detail.byte); @@ -111,7 +113,7 @@ impl StringTableRow { byte, hex, dec, - bin + bin, } } @@ -141,10 +143,12 @@ impl StringTableRow { } impl StringTable { - fn from (string_details: &StringDetail) -> Self { - let characters = string_details.characters.iter() - .map(StringTableRow::from) - .collect::>(); + fn from(string_details: &StringDetail) -> Self { + let characters = string_details + .characters + .iter() + .map(StringTableRow::from) + .collect::>(); StringTable { characters, @@ -159,7 +163,8 @@ impl StringTable { table_builder.push_record(i.to_table_row()); } - let table = table_builder.build() + let table = table_builder + .build() .with(Style::sharp()) .with(Modify::new(Rows::new(1..)).with(Alignment::left())) .to_string(); @@ -198,12 +203,11 @@ struct InspectArgs { utf16: bool, } - fn main() { let cli = CliArgs::parse(); let details = match cli.inspect.utf8 { true => StringDetail::parse_utf8(&cli.name), - false => StringDetail::parse_utf16(&cli.name) + false => StringDetail::parse_utf16(&cli.name), }; let char_table = StringTable::from(&details);