Minor refactor

This commit is contained in:
surya 2024-03-09 19:56:11 +05:30
parent b504fc57e0
commit 44ae93fc06
2 changed files with 39 additions and 38 deletions

View File

@ -68,7 +68,7 @@ The terms character set is used interchangably with character encoding and code
# Common encoding schemes #
- An encoding scheme will encode the number to one or more bytes.
- *An encoding scheme will encode the number to one or more bytes.
## Multi byte encoding schemes ##
@ -81,14 +81,13 @@ The terms character set is used interchangably with character encoding and code
- 99% of the internet uses this encoding scheme.
| Byte 1 | Byte 2 | Byte 3 | Byte 4 | Available bits
| Byte 1 | Byte 2 | Byte 3 | Byte 4 | Available bits |
|----------|----------|----------|----------|----------------|
| 0xxxxxxx | - | - | - | 7 |
| 110xxxxx | 10xxxxxx | - | - | 11 |
| 1110xxxx | 10xxxxxx | 10xxxxxx | - | 16 |
| 11110xxx | 10xxxxxx | 10xxxxxx | 10xxxxxx | 21 |
```
┌────────┬───────┬───────────┬──────┬─────┬─────┬──────────┐
│ U+dec │ U+hex │ character │ byte │ hex │ dec │ bin │
@ -116,7 +115,6 @@ The terms character set is used interchangably with character encoding and code
## Multi byte encoding schemes ##
### UTF - 16 ###
- Variable byte encoding scheme.
@ -211,7 +209,6 @@ The terms character set is used interchangably with character encoding and code
- Some languages (eg: C#) will return the number of utf-16 bytes to encode the complete string.
- The below emoji is of length 1 in python and length 4 in c#.
```
┌────────┬───────┬───────────┬──────┬─────┬─────┬──────────┐
│ U+dec │ U+hex │ character │ byte │ hex │ dec │ bin │

View File

@ -1,8 +1,8 @@
use clap::{Parser, Args};
use clap::{Args, Parser};
use serde::Serialize;
use tabled::{
builder::Builder,
settings::{Modify, object::Rows, Alignment, Style}
settings::{object::Rows, Alignment, Modify, Style},
};
struct StringDetail {
@ -16,16 +16,16 @@ struct CharacterDetail {
byte: u8,
}
impl StringDetail{
fn parse_utf8(query: &String) -> Self {
let mut details:StringDetail = StringDetail::default();
impl StringDetail {
fn parse_utf8(query: &str) -> Self {
let mut details: StringDetail = StringDetail::default();
for i in query.chars() {
let mut bytes = [0; 4];
i.encode_utf8(&mut bytes);
let encoded = i.encode_utf8(&mut bytes);
details.push(Some(i), bytes[0]);
for b in 1..i.len_utf8() {
details.push(None, bytes[b]);
let mut citer = vec![i].into_iter();
for b in encoded.bytes() {
details.push(citer.next(), b);
}
}
@ -33,13 +33,14 @@ impl StringDetail{
}
fn parse_utf16(query: &String) -> Self {
let mut details:StringDetail = StringDetail::default();
let mut details: StringDetail = StringDetail::default();
for i in query.chars() {
let mut bytes = [0; 2];
i.encode_utf16(&mut bytes);
details.push_utf16(Some(i), bytes[0]);
if bytes[1] != 0x00000000 {
details.push_utf16(None, bytes[1]);
let encoded = i.encode_utf16(&mut bytes);
let mut citer = vec![i].into_iter();
for b in encoded {
details.push_utf16(citer.next(), *b);
}
}
@ -47,20 +48,22 @@ impl StringDetail{
}
fn default() -> Self {
Self { characters: Vec::new(), length: 0 }
Self {
characters: Vec::new(),
length: 0,
}
}
fn push(&mut self, character:Option<char>, byte:u8){
self.characters
.push(CharacterDetail {
byte_index: self.length,
character,
byte,
});
fn push(&mut self, character: Option<char>, byte: u8) {
self.characters.push(CharacterDetail {
byte_index: self.length,
character,
byte,
});
self.length += 1;
}
fn push_utf16(&mut self, character:Option<char>, byte: u16){
fn push_utf16(&mut self, character: Option<char>, byte: u16) {
let bytes = byte.to_be_bytes();
self.push(character, bytes[0]);
self.push(None, bytes[1]);
@ -96,8 +99,7 @@ impl StringTableRow {
unicode = String::from(format!("{}", x as u32));
unicode_hex = String::from(format!("{:x}", x as u32));
}
None => {
}
None => {}
};
let byte = format!("{}", char_detail.byte_index);
let hex = format!("{:02x}", char_detail.byte);
@ -111,7 +113,7 @@ impl StringTableRow {
byte,
hex,
dec,
bin
bin,
}
}
@ -141,10 +143,12 @@ impl StringTableRow {
}
impl StringTable {
fn from (string_details: &StringDetail) -> Self {
let characters = string_details.characters.iter()
.map(StringTableRow::from)
.collect::<Vec<StringTableRow>>();
fn from(string_details: &StringDetail) -> Self {
let characters = string_details
.characters
.iter()
.map(StringTableRow::from)
.collect::<Vec<StringTableRow>>();
StringTable {
characters,
@ -159,7 +163,8 @@ impl StringTable {
table_builder.push_record(i.to_table_row());
}
let table = table_builder.build()
let table = table_builder
.build()
.with(Style::sharp())
.with(Modify::new(Rows::new(1..)).with(Alignment::left()))
.to_string();
@ -198,12 +203,11 @@ struct InspectArgs {
utf16: bool,
}
fn main() {
let cli = CliArgs::parse();
let details = match cli.inspect.utf8 {
true => StringDetail::parse_utf8(&cli.name),
false => StringDetail::parse_utf16(&cli.name)
false => StringDetail::parse_utf16(&cli.name),
};
let char_table = StringTable::from(&details);