From b99d16962fc695ecd2225b6d7580721d2910a92e Mon Sep 17 00:00:00 2001 From: cool-mist Date: Thu, 19 Dec 2024 17:13:14 +0530 Subject: [PATCH] Initial --- .gitignore | 1 + Cargo.lock | 96 +++++++++++++++++ Cargo.toml | 7 ++ README.md | 22 ++++ example.txt | 13 +++ src/filters.rs | 38 +++++++ src/main.rs | 35 ++++++ src/stats.rs | 285 +++++++++++++++++++++++++++++++++++++++++++++++++ 8 files changed, 497 insertions(+) create mode 100644 .gitignore create mode 100644 Cargo.lock create mode 100644 Cargo.toml create mode 100644 README.md create mode 100644 example.txt create mode 100644 src/filters.rs create mode 100644 src/main.rs create mode 100644 src/stats.rs diff --git a/.gitignore b/.gitignore new file mode 100644 index 0000000..ea8c4bf --- /dev/null +++ b/.gitignore @@ -0,0 +1 @@ +/target diff --git a/Cargo.lock b/Cargo.lock new file mode 100644 index 0000000..f707787 --- /dev/null +++ b/Cargo.lock @@ -0,0 +1,96 @@ +# This file is automatically @generated by Cargo. +# It is not intended for manual editing. +version = 3 + +[[package]] +name = "argh" +version = "0.1.12" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "7af5ba06967ff7214ce4c7419c7d185be7ecd6cc4965a8f6e1d8ce0398aad219" +dependencies = [ + "argh_derive", + "argh_shared", +] + +[[package]] +name = "argh_derive" +version = "0.1.12" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "56df0aeedf6b7a2fc67d06db35b09684c3e8da0c95f8f27685cb17e08413d87a" +dependencies = [ + "argh_shared", + "proc-macro2", + "quote", + "syn", +] + +[[package]] +name = "argh_shared" +version = "0.1.12" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "5693f39141bda5760ecc4111ab08da40565d1771038c4a0250f03457ec707531" +dependencies = [ + "serde", +] + +[[package]] +name = "lf_fixer" +version = "0.1.0" +dependencies = [ + "argh", +] + +[[package]] +name = "proc-macro2" +version = "1.0.92" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "37d3544b3f2748c54e147655edb5025752e2303145b5aefb3c3ea2c78b973bb0" +dependencies = [ + "unicode-ident", +] + +[[package]] +name = "quote" +version = "1.0.37" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "b5b9d34b8991d19d98081b46eacdd8eb58c6f2b201139f7c5f643cc155a633af" +dependencies = [ + "proc-macro2", +] + +[[package]] +name = "serde" +version = "1.0.216" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "0b9781016e935a97e8beecf0c933758c97a5520d32930e460142b4cd80c6338e" +dependencies = [ + "serde_derive", +] + +[[package]] +name = "serde_derive" +version = "1.0.216" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "46f859dbbf73865c6627ed570e78961cd3ac92407a2d117204c49232485da55e" +dependencies = [ + "proc-macro2", + "quote", + "syn", +] + +[[package]] +name = "syn" +version = "2.0.90" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "919d3b74a5dd0ccd15aeb8f93e7006bd9e14c295087c9896a110f490752bcf31" +dependencies = [ + "proc-macro2", + "quote", + "unicode-ident", +] + +[[package]] +name = "unicode-ident" +version = "1.0.14" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "adb9e6ca4f869e1180728b7950e35922a7fc6397f7b641499e8f3ef06e50dc83" diff --git a/Cargo.toml b/Cargo.toml new file mode 100644 index 0000000..ba9b5f4 --- /dev/null +++ b/Cargo.toml @@ -0,0 +1,7 @@ +[package] +name = "lf_fixer" +version = "0.1.0" +edition = "2021" + +[dependencies] +argh = "0.1.12" diff --git a/README.md b/README.md new file mode 100644 index 0000000..d48f63a --- /dev/null +++ b/README.md @@ -0,0 +1,22 @@ +# LF Fixer + +View the LF stats of files in your repository. + +## Installation + +```bash +cargo install --git https://github.com/cool-mist/lf_fixer +``` + +## Usage + +```bash +lf_fixer --help +``` + +## TODO + +- Edit files to fix line ending mismatches + - To user defined line ending, + - To the line ending of the majority of the files +- Add tests diff --git a/example.txt b/example.txt new file mode 100644 index 0000000..83512d6 --- /dev/null +++ b/example.txt @@ -0,0 +1,13 @@ +alskjd +askldja'sdj +askldja'sdj +asa'sdj aska'sdj +laksjkajs'sdj +asjdha +djasdashjkda'skdas +jashdasd +laksjkajs'sdj +ajsda +sdashdasjhd +laksjkajs'sdj +ajshjhad diff --git a/src/filters.rs b/src/filters.rs new file mode 100644 index 0000000..dc6d783 --- /dev/null +++ b/src/filters.rs @@ -0,0 +1,38 @@ +pub(crate) enum FileFilter { + None, + Extension(ExtensionFilter), +} + +impl FileFilter { + pub(crate) fn apply(&self, file_name: &str) -> bool { + match self { + FileFilter::None => true, + FileFilter::Extension(filter) => filter.apply(file_name), + } + } + + pub(crate) fn extension(ext: &str) -> FileFilter { + FileFilter::Extension(ExtensionFilter::new(ext)) + } +} + +pub(crate) struct ExtensionFilter { + ext: String, +} + +impl ExtensionFilter { + fn new(ext: &str) -> ExtensionFilter { + ExtensionFilter { + ext: ext.to_string(), + } + } + + fn apply(&self, file_name: &str) -> bool { + let ext = file_name.split('.').last(); + if let Some(ext) = ext { + return ext == self.ext; + } + + false + } +} diff --git a/src/main.rs b/src/main.rs new file mode 100644 index 0000000..f715f9a --- /dev/null +++ b/src/main.rs @@ -0,0 +1,35 @@ +use argh::FromArgs; +use filters::FileFilter; +use stats::{FileNames, FileStats, FileStatsAggregate}; +mod filters; +mod stats; + +fn main() { + let args: Args = argh::from_env(); + + let filter = match args.ext { + Some(ext) => FileFilter::extension(&ext), + None => FileFilter::None, + }; + + let stats = FileNames::generate(args.dir, filter, args.recursive) + .filter_map(FileStats::generate) + .fold(FileStatsAggregate::new(), FileStatsAggregate::fold); + stats.print_table(); +} + +/// Line endings fixer tool +#[derive(FromArgs)] +struct Args { + /// directory to search for files in + #[argh(positional)] + dir: Option, + + /// extension to filter files by : Ex: -e .txt + #[argh(option, short = 'e')] + ext: Option, + + /// recursively traverse directories + #[argh(switch, short = 'r')] + recursive: bool, +} diff --git a/src/stats.rs b/src/stats.rs new file mode 100644 index 0000000..434ca45 --- /dev/null +++ b/src/stats.rs @@ -0,0 +1,285 @@ +use std::fmt::Display; +use std::fs::{self, File, ReadDir}; +use std::io::Read; + +use crate::filters::FileFilter; + +pub(crate) struct FileStatsAggregate { + stats: Vec, + crlf: usize, + cr: usize, + lf: usize, + max: Option, + lines: usize, +} + +impl FileStatsAggregate { + pub(crate) fn new() -> Self { + FileStatsAggregate { + stats: Vec::new(), + crlf: 0, + cr: 0, + lf: 0, + max: None, + lines: 0, + } + } + pub(crate) fn fold(mut accumulator: FileStatsAggregate, stat: FileStats) -> FileStatsAggregate { + accumulator.crlf += stat.crlf; + accumulator.cr += stat.cr; + accumulator.lf += stat.lf; + accumulator.lines += stat.lines; + + if (accumulator.crlf > accumulator.lf) && (accumulator.crlf > accumulator.cr) { + accumulator.max = Some(LineSep::CrLf); + } else if (accumulator.lf > accumulator.cr) && (accumulator.lf > accumulator.crlf) { + accumulator.max = Some(LineSep::Lf); + } else if (accumulator.cr > accumulator.crlf) && (accumulator.cr > accumulator.lf) { + accumulator.max = Some(LineSep::Cr); + } else { + accumulator.max = None; + } + + accumulator.stats.push(stat); + + accumulator + } + + pub(crate) fn print_table(&self) { + println!( + "{:<4} | {:<4} | {:<4} | {:<4} | {:<4} | {}", + "#", "CRLF", "CR", "LF", "Max", "File" + ); + + println!("============================================="); + + for stat in &self.stats { + let max = match &stat.max { + Some(max) => max.to_string(), + None => "".to_string(), + }; + + println!( + "{:<4} | {:<4} | {:<4} | {:<4} | {:<4} | {}", + stat.lines, stat.crlf, stat.cr, stat.lf, max, stat.name + ); + } + + println!("---------------------------------------------"); + let max = match &self.max { + Some(max) => max.to_string(), + None => "".to_string(), + }; + println!( + "{:<4} | {:<4} | {:<4} | {:<4} | {:<4} | -", + self.lines, self.crlf, self.cr, self.lf, max, + ); + } +} + +pub(crate) struct FileStats { + name: String, + lines: usize, + crlf: usize, + cr: usize, + lf: usize, + max: Option, +} + +impl FileStats { + fn new(name: &str) -> FileStats { + FileStats { + name: name.to_string(), + lines: 0, + crlf: 0, + cr: 0, + lf: 0, + max: None, + } + } + + pub(crate) fn generate(file_name: String) -> Option { + let file = File::open(&file_name); + if file.is_err() { + println!("Could not open file: {}", file_name); + return None; + } + let mut file = file.unwrap(); + + let mut buf = Vec::new(); + + let bytes_read = file.read_to_end(&mut buf); + if bytes_read.is_err() { + println!("Could not read file: {}", file_name); + return None; + } + + let bytes_read = bytes_read.unwrap(); + + let mut stats = FileStats::new(&file_name); + let mut i = 0; + loop { + if i == bytes_read { + break Some(stats); + } + + let byte = buf[i]; + if byte == b'\r' { + if i + 1 < bytes_read && buf[i + 1] == b'\n' { + stats.update(LineSep::CrLf); + i = i + 1; + } else { + stats.update(LineSep::Cr); + } + } else if byte == b'\n' { + stats.update(LineSep::Lf); + } + + i = i + 1; + } + } + + fn update(&mut self, line: LineSep) { + self.lines += 1; + match line { + LineSep::CrLf => { + self.crlf += 1; + } + LineSep::Cr => { + self.cr += 1; + } + LineSep::Lf => { + self.lf += 1; + } + } + + if (self.crlf > self.lf) && (self.crlf > self.cr) { + self.max = Some(LineSep::CrLf); + } else if (self.lf > self.cr) && (self.lf > self.crlf) { + self.max = Some(LineSep::Lf); + } else { + self.max = Some(LineSep::Cr); + } + } +} + +#[derive(Clone)] +enum LineSep { + Lf, + CrLf, + Cr, +} + +impl Display for LineSep { + fn fmt(&self, f: &mut std::fmt::Formatter<'_>) -> std::fmt::Result { + match self { + LineSep::Lf => write!(f, "LF"), + LineSep::CrLf => write!(f, "CRLF"), + LineSep::Cr => write!(f, "CR"), + } + } +} + +pub(crate) struct FileNames { + cur_dir: Option, + filter: FileFilter, + all_dirs: Vec, + cur_dir_files: Vec, + recursive: bool, +} + +impl FileNames { + pub(crate) fn generate(dir: Option, pattern: FileFilter, recursive: bool) -> FileNames { + let cur_dir = match dir { + Some(dir) => dir, + None => "./".to_string(), + }; + + FileNames { + cur_dir: Some(cur_dir), + filter: pattern, + all_dirs: Vec::new(), + cur_dir_files: Vec::new(), + recursive, + } + } + + fn get_next_dir(&mut self) -> Option { + loop { + if let Some(dir) = self.all_dirs.pop() { + let entries = fs::read_dir(&dir); + if entries.is_err() { + println!("Could not read dir: {}", dir); + continue; + } + + return Some(entries.unwrap()); + } + + return None; + } + } + + fn populate_files(&mut self) -> bool { + if self.cur_dir_files.len() > 0 { + return true; + } + + let next_dir = self.get_next_dir(); + if next_dir.is_none() { + return false; + } + + let next_dir = next_dir.unwrap(); + for entry in next_dir { + if entry.is_err() { + println!( + "Could not read file {}, skipping", + entry.unwrap().path().display() + ); + continue; + } + + let entry = entry.unwrap(); + let path = entry.path(); + if path.is_dir() { + if self.recursive { + self.all_dirs.push(path.to_str().unwrap().to_string()); + } + } else if path.is_file() { + if self.filter.apply(path.to_str().unwrap()) { + self.cur_dir_files.push(path.to_str().unwrap().to_string()); + } + } else { + println!("Skipping {}, unknown file type", path.display()); + } + } + + true + } +} + +impl Iterator for FileNames { + type Item = String; + + fn next(&mut self) -> Option { + if let Some(dir) = self.cur_dir.take() { + self.all_dirs.push(dir); + } + + if let Some(file) = self.cur_dir_files.pop() { + return Some(file); + } + + loop { + let populated = self.populate_files(); + if !populated { + return None; + } + + if self.cur_dir_files.len() > 0 { + return Some(self.cur_dir_files.pop().unwrap()); + } + } + } +}