Minor refactor
This commit is contained in:
parent
b504fc57e0
commit
44ae93fc06
@ -68,7 +68,7 @@ The terms character set is used interchangably with character encoding and code
|
|||||||
|
|
||||||
# Common encoding schemes #
|
# Common encoding schemes #
|
||||||
|
|
||||||
- An encoding scheme will encode the number to one or more bytes.
|
- *An encoding scheme will encode the number to one or more bytes.
|
||||||
|
|
||||||
## Multi byte encoding schemes ##
|
## Multi byte encoding schemes ##
|
||||||
|
|
||||||
@ -81,14 +81,13 @@ The terms character set is used interchangably with character encoding and code
|
|||||||
- 99% of the internet uses this encoding scheme.
|
- 99% of the internet uses this encoding scheme.
|
||||||
|
|
||||||
|
|
||||||
| Byte 1 | Byte 2 | Byte 3 | Byte 4 | Available bits
|
| Byte 1 | Byte 2 | Byte 3 | Byte 4 | Available bits |
|
||||||
|----------|----------|----------|----------|----------------|
|
|----------|----------|----------|----------|----------------|
|
||||||
| 0xxxxxxx | - | - | - | 7 |
|
| 0xxxxxxx | - | - | - | 7 |
|
||||||
| 110xxxxx | 10xxxxxx | - | - | 11 |
|
| 110xxxxx | 10xxxxxx | - | - | 11 |
|
||||||
| 1110xxxx | 10xxxxxx | 10xxxxxx | - | 16 |
|
| 1110xxxx | 10xxxxxx | 10xxxxxx | - | 16 |
|
||||||
| 11110xxx | 10xxxxxx | 10xxxxxx | 10xxxxxx | 21 |
|
| 11110xxx | 10xxxxxx | 10xxxxxx | 10xxxxxx | 21 |
|
||||||
|
|
||||||
|
|
||||||
```
|
```
|
||||||
┌────────┬───────┬───────────┬──────┬─────┬─────┬──────────┐
|
┌────────┬───────┬───────────┬──────┬─────┬─────┬──────────┐
|
||||||
│ U+dec │ U+hex │ character │ byte │ hex │ dec │ bin │
|
│ U+dec │ U+hex │ character │ byte │ hex │ dec │ bin │
|
||||||
@ -116,7 +115,6 @@ The terms character set is used interchangably with character encoding and code
|
|||||||
|
|
||||||
## Multi byte encoding schemes ##
|
## Multi byte encoding schemes ##
|
||||||
|
|
||||||
|
|
||||||
### UTF - 16 ###
|
### UTF - 16 ###
|
||||||
|
|
||||||
- Variable byte encoding scheme.
|
- Variable byte encoding scheme.
|
||||||
@ -211,7 +209,6 @@ The terms character set is used interchangably with character encoding and code
|
|||||||
- Some languages (eg: C#) will return the number of utf-16 bytes to encode the complete string.
|
- Some languages (eg: C#) will return the number of utf-16 bytes to encode the complete string.
|
||||||
- The below emoji is of length 1 in python and length 4 in c#.
|
- The below emoji is of length 1 in python and length 4 in c#.
|
||||||
|
|
||||||
|
|
||||||
```
|
```
|
||||||
┌────────┬───────┬───────────┬──────┬─────┬─────┬──────────┐
|
┌────────┬───────┬───────────┬──────┬─────┬─────┬──────────┐
|
||||||
│ U+dec │ U+hex │ character │ byte │ hex │ dec │ bin │
|
│ U+dec │ U+hex │ character │ byte │ hex │ dec │ bin │
|
||||||
|
|||||||
58
src/main.rs
58
src/main.rs
@ -1,8 +1,8 @@
|
|||||||
use clap::{Parser, Args};
|
use clap::{Args, Parser};
|
||||||
use serde::Serialize;
|
use serde::Serialize;
|
||||||
use tabled::{
|
use tabled::{
|
||||||
builder::Builder,
|
builder::Builder,
|
||||||
settings::{Modify, object::Rows, Alignment, Style}
|
settings::{object::Rows, Alignment, Modify, Style},
|
||||||
};
|
};
|
||||||
|
|
||||||
struct StringDetail {
|
struct StringDetail {
|
||||||
@ -16,16 +16,16 @@ struct CharacterDetail {
|
|||||||
byte: u8,
|
byte: u8,
|
||||||
}
|
}
|
||||||
|
|
||||||
impl StringDetail{
|
impl StringDetail {
|
||||||
fn parse_utf8(query: &String) -> Self {
|
fn parse_utf8(query: &str) -> Self {
|
||||||
let mut details:StringDetail = StringDetail::default();
|
let mut details: StringDetail = StringDetail::default();
|
||||||
for i in query.chars() {
|
for i in query.chars() {
|
||||||
let mut bytes = [0; 4];
|
let mut bytes = [0; 4];
|
||||||
i.encode_utf8(&mut bytes);
|
let encoded = i.encode_utf8(&mut bytes);
|
||||||
|
|
||||||
details.push(Some(i), bytes[0]);
|
let mut citer = vec![i].into_iter();
|
||||||
for b in 1..i.len_utf8() {
|
for b in encoded.bytes() {
|
||||||
details.push(None, bytes[b]);
|
details.push(citer.next(), b);
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
@ -33,13 +33,14 @@ impl StringDetail{
|
|||||||
}
|
}
|
||||||
|
|
||||||
fn parse_utf16(query: &String) -> Self {
|
fn parse_utf16(query: &String) -> Self {
|
||||||
let mut details:StringDetail = StringDetail::default();
|
let mut details: StringDetail = StringDetail::default();
|
||||||
for i in query.chars() {
|
for i in query.chars() {
|
||||||
let mut bytes = [0; 2];
|
let mut bytes = [0; 2];
|
||||||
i.encode_utf16(&mut bytes);
|
let encoded = i.encode_utf16(&mut bytes);
|
||||||
details.push_utf16(Some(i), bytes[0]);
|
|
||||||
if bytes[1] != 0x00000000 {
|
let mut citer = vec![i].into_iter();
|
||||||
details.push_utf16(None, bytes[1]);
|
for b in encoded {
|
||||||
|
details.push_utf16(citer.next(), *b);
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
@ -47,12 +48,14 @@ impl StringDetail{
|
|||||||
}
|
}
|
||||||
|
|
||||||
fn default() -> Self {
|
fn default() -> Self {
|
||||||
Self { characters: Vec::new(), length: 0 }
|
Self {
|
||||||
|
characters: Vec::new(),
|
||||||
|
length: 0,
|
||||||
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
fn push(&mut self, character:Option<char>, byte:u8){
|
fn push(&mut self, character: Option<char>, byte: u8) {
|
||||||
self.characters
|
self.characters.push(CharacterDetail {
|
||||||
.push(CharacterDetail {
|
|
||||||
byte_index: self.length,
|
byte_index: self.length,
|
||||||
character,
|
character,
|
||||||
byte,
|
byte,
|
||||||
@ -60,7 +63,7 @@ impl StringDetail{
|
|||||||
self.length += 1;
|
self.length += 1;
|
||||||
}
|
}
|
||||||
|
|
||||||
fn push_utf16(&mut self, character:Option<char>, byte: u16){
|
fn push_utf16(&mut self, character: Option<char>, byte: u16) {
|
||||||
let bytes = byte.to_be_bytes();
|
let bytes = byte.to_be_bytes();
|
||||||
self.push(character, bytes[0]);
|
self.push(character, bytes[0]);
|
||||||
self.push(None, bytes[1]);
|
self.push(None, bytes[1]);
|
||||||
@ -96,8 +99,7 @@ impl StringTableRow {
|
|||||||
unicode = String::from(format!("{}", x as u32));
|
unicode = String::from(format!("{}", x as u32));
|
||||||
unicode_hex = String::from(format!("{:x}", x as u32));
|
unicode_hex = String::from(format!("{:x}", x as u32));
|
||||||
}
|
}
|
||||||
None => {
|
None => {}
|
||||||
}
|
|
||||||
};
|
};
|
||||||
let byte = format!("{}", char_detail.byte_index);
|
let byte = format!("{}", char_detail.byte_index);
|
||||||
let hex = format!("{:02x}", char_detail.byte);
|
let hex = format!("{:02x}", char_detail.byte);
|
||||||
@ -111,7 +113,7 @@ impl StringTableRow {
|
|||||||
byte,
|
byte,
|
||||||
hex,
|
hex,
|
||||||
dec,
|
dec,
|
||||||
bin
|
bin,
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
@ -141,8 +143,10 @@ impl StringTableRow {
|
|||||||
}
|
}
|
||||||
|
|
||||||
impl StringTable {
|
impl StringTable {
|
||||||
fn from (string_details: &StringDetail) -> Self {
|
fn from(string_details: &StringDetail) -> Self {
|
||||||
let characters = string_details.characters.iter()
|
let characters = string_details
|
||||||
|
.characters
|
||||||
|
.iter()
|
||||||
.map(StringTableRow::from)
|
.map(StringTableRow::from)
|
||||||
.collect::<Vec<StringTableRow>>();
|
.collect::<Vec<StringTableRow>>();
|
||||||
|
|
||||||
@ -159,7 +163,8 @@ impl StringTable {
|
|||||||
table_builder.push_record(i.to_table_row());
|
table_builder.push_record(i.to_table_row());
|
||||||
}
|
}
|
||||||
|
|
||||||
let table = table_builder.build()
|
let table = table_builder
|
||||||
|
.build()
|
||||||
.with(Style::sharp())
|
.with(Style::sharp())
|
||||||
.with(Modify::new(Rows::new(1..)).with(Alignment::left()))
|
.with(Modify::new(Rows::new(1..)).with(Alignment::left()))
|
||||||
.to_string();
|
.to_string();
|
||||||
@ -198,12 +203,11 @@ struct InspectArgs {
|
|||||||
utf16: bool,
|
utf16: bool,
|
||||||
}
|
}
|
||||||
|
|
||||||
|
|
||||||
fn main() {
|
fn main() {
|
||||||
let cli = CliArgs::parse();
|
let cli = CliArgs::parse();
|
||||||
let details = match cli.inspect.utf8 {
|
let details = match cli.inspect.utf8 {
|
||||||
true => StringDetail::parse_utf8(&cli.name),
|
true => StringDetail::parse_utf8(&cli.name),
|
||||||
false => StringDetail::parse_utf16(&cli.name)
|
false => StringDetail::parse_utf16(&cli.name),
|
||||||
};
|
};
|
||||||
|
|
||||||
let char_table = StringTable::from(&details);
|
let char_table = StringTable::from(&details);
|
||||||
|
|||||||
Loading…
x
Reference in New Issue
Block a user