Minor refactor

This commit is contained in:
surya 2024-03-09 19:56:11 +05:30
parent b504fc57e0
commit 44ae93fc06
2 changed files with 39 additions and 38 deletions

View File

@ -68,7 +68,7 @@ The terms character set is used interchangably with character encoding and code
# Common encoding schemes # # Common encoding schemes #
- An encoding scheme will encode the number to one or more bytes. - *An encoding scheme will encode the number to one or more bytes.
## Multi byte encoding schemes ## ## Multi byte encoding schemes ##
@ -81,14 +81,13 @@ The terms character set is used interchangably with character encoding and code
- 99% of the internet uses this encoding scheme. - 99% of the internet uses this encoding scheme.
| Byte 1 | Byte 2 | Byte 3 | Byte 4 | Available bits | Byte 1 | Byte 2 | Byte 3 | Byte 4 | Available bits |
|----------|----------|----------|----------|----------------| |----------|----------|----------|----------|----------------|
| 0xxxxxxx | - | - | - | 7 | | 0xxxxxxx | - | - | - | 7 |
| 110xxxxx | 10xxxxxx | - | - | 11 | | 110xxxxx | 10xxxxxx | - | - | 11 |
| 1110xxxx | 10xxxxxx | 10xxxxxx | - | 16 | | 1110xxxx | 10xxxxxx | 10xxxxxx | - | 16 |
| 11110xxx | 10xxxxxx | 10xxxxxx | 10xxxxxx | 21 | | 11110xxx | 10xxxxxx | 10xxxxxx | 10xxxxxx | 21 |
``` ```
┌────────┬───────┬───────────┬──────┬─────┬─────┬──────────┐ ┌────────┬───────┬───────────┬──────┬─────┬─────┬──────────┐
│ U+dec │ U+hex │ character │ byte │ hex │ dec │ bin │ │ U+dec │ U+hex │ character │ byte │ hex │ dec │ bin │
@ -116,7 +115,6 @@ The terms character set is used interchangably with character encoding and code
## Multi byte encoding schemes ## ## Multi byte encoding schemes ##
### UTF - 16 ### ### UTF - 16 ###
- Variable byte encoding scheme. - Variable byte encoding scheme.
@ -211,7 +209,6 @@ The terms character set is used interchangably with character encoding and code
- Some languages (eg: C#) will return the number of utf-16 bytes to encode the complete string. - Some languages (eg: C#) will return the number of utf-16 bytes to encode the complete string.
- The below emoji is of length 1 in python and length 4 in c#. - The below emoji is of length 1 in python and length 4 in c#.
``` ```
┌────────┬───────┬───────────┬──────┬─────┬─────┬──────────┐ ┌────────┬───────┬───────────┬──────┬─────┬─────┬──────────┐
│ U+dec │ U+hex │ character │ byte │ hex │ dec │ bin │ │ U+dec │ U+hex │ character │ byte │ hex │ dec │ bin │

View File

@ -1,8 +1,8 @@
use clap::{Parser, Args}; use clap::{Args, Parser};
use serde::Serialize; use serde::Serialize;
use tabled::{ use tabled::{
builder::Builder, builder::Builder,
settings::{Modify, object::Rows, Alignment, Style} settings::{object::Rows, Alignment, Modify, Style},
}; };
struct StringDetail { struct StringDetail {
@ -17,15 +17,15 @@ struct CharacterDetail {
} }
impl StringDetail { impl StringDetail {
fn parse_utf8(query: &String) -> Self { fn parse_utf8(query: &str) -> Self {
let mut details: StringDetail = StringDetail::default(); let mut details: StringDetail = StringDetail::default();
for i in query.chars() { for i in query.chars() {
let mut bytes = [0; 4]; let mut bytes = [0; 4];
i.encode_utf8(&mut bytes); let encoded = i.encode_utf8(&mut bytes);
details.push(Some(i), bytes[0]); let mut citer = vec![i].into_iter();
for b in 1..i.len_utf8() { for b in encoded.bytes() {
details.push(None, bytes[b]); details.push(citer.next(), b);
} }
} }
@ -36,10 +36,11 @@ impl StringDetail{
let mut details: StringDetail = StringDetail::default(); let mut details: StringDetail = StringDetail::default();
for i in query.chars() { for i in query.chars() {
let mut bytes = [0; 2]; let mut bytes = [0; 2];
i.encode_utf16(&mut bytes); let encoded = i.encode_utf16(&mut bytes);
details.push_utf16(Some(i), bytes[0]);
if bytes[1] != 0x00000000 { let mut citer = vec![i].into_iter();
details.push_utf16(None, bytes[1]); for b in encoded {
details.push_utf16(citer.next(), *b);
} }
} }
@ -47,12 +48,14 @@ impl StringDetail{
} }
fn default() -> Self { fn default() -> Self {
Self { characters: Vec::new(), length: 0 } Self {
characters: Vec::new(),
length: 0,
}
} }
fn push(&mut self, character: Option<char>, byte: u8) { fn push(&mut self, character: Option<char>, byte: u8) {
self.characters self.characters.push(CharacterDetail {
.push(CharacterDetail {
byte_index: self.length, byte_index: self.length,
character, character,
byte, byte,
@ -96,8 +99,7 @@ impl StringTableRow {
unicode = String::from(format!("{}", x as u32)); unicode = String::from(format!("{}", x as u32));
unicode_hex = String::from(format!("{:x}", x as u32)); unicode_hex = String::from(format!("{:x}", x as u32));
} }
None => { None => {}
}
}; };
let byte = format!("{}", char_detail.byte_index); let byte = format!("{}", char_detail.byte_index);
let hex = format!("{:02x}", char_detail.byte); let hex = format!("{:02x}", char_detail.byte);
@ -111,7 +113,7 @@ impl StringTableRow {
byte, byte,
hex, hex,
dec, dec,
bin bin,
} }
} }
@ -142,7 +144,9 @@ impl StringTableRow {
impl StringTable { impl StringTable {
fn from(string_details: &StringDetail) -> Self { fn from(string_details: &StringDetail) -> Self {
let characters = string_details.characters.iter() let characters = string_details
.characters
.iter()
.map(StringTableRow::from) .map(StringTableRow::from)
.collect::<Vec<StringTableRow>>(); .collect::<Vec<StringTableRow>>();
@ -159,7 +163,8 @@ impl StringTable {
table_builder.push_record(i.to_table_row()); table_builder.push_record(i.to_table_row());
} }
let table = table_builder.build() let table = table_builder
.build()
.with(Style::sharp()) .with(Style::sharp())
.with(Modify::new(Rows::new(1..)).with(Alignment::left())) .with(Modify::new(Rows::new(1..)).with(Alignment::left()))
.to_string(); .to_string();
@ -198,12 +203,11 @@ struct InspectArgs {
utf16: bool, utf16: bool,
} }
fn main() { fn main() {
let cli = CliArgs::parse(); let cli = CliArgs::parse();
let details = match cli.inspect.utf8 { let details = match cli.inspect.utf8 {
true => StringDetail::parse_utf8(&cli.name), true => StringDetail::parse_utf8(&cli.name),
false => StringDetail::parse_utf16(&cli.name) false => StringDetail::parse_utf16(&cli.name),
}; };
let char_table = StringTable::from(&details); let char_table = StringTable::from(&details);