diff --git a/README.md b/README.md index d48f63a..d634b7d 100644 --- a/README.md +++ b/README.md @@ -1,6 +1,7 @@ # LF Fixer -View the LF stats of files in your repository. +- View the line ending stats. +- Normalize line endings. ## Installation @@ -16,7 +17,7 @@ lf_fixer --help ## TODO -- Edit files to fix line ending mismatches - - To user defined line ending, - - To the line ending of the majority of the files -- Add tests +- [x] Edit files to fix line ending mismatches + - [x] To user defined line ending, + - [x] To the line ending of the majority of the files +- [] Add tests diff --git a/src/fixer.rs b/src/fixer.rs index 13774aa..14564a2 100644 --- a/src/fixer.rs +++ b/src/fixer.rs @@ -22,16 +22,16 @@ impl Fixer { } pub(crate) fn fix(&mut self) { - let file = File::options().read(true).write(true).open(&self.file_name); + let file = File::options().read(true).open(&self.file_name); if file.is_err() { println!("Could not open file: {}", self.file_name); return; } let mut file = file.unwrap(); - let mut buf = Vec::new(); - let mut write_buf = Vec::new(); - let bytes_read = file.read_to_end(&mut buf); + let mut read_buf = Vec::with_capacity(1024 * 1024); + let mut write_buf = Vec::with_capacity(1024 * 1024); + let bytes_read = file.read_to_end(&mut read_buf); if bytes_read.is_err() { println!("Could not read file: {}", self.file_name); @@ -39,44 +39,66 @@ impl Fixer { } let bytes_read = bytes_read.unwrap(); - let mut next_read_head = 0; + let mut read_head: usize = 0; + let mut write_head: usize = 0; loop { - if next_read_head == bytes_read { + if read_head == bytes_read { break; } - next_read_head = match buf[next_read_head] { + let byte = read_buf[read_head]; + read_head = match byte { b'\r' => { + self.normalize_ending(&mut write_buf, &mut write_head); + let lf_index = read_head + 1; // LL(1) to see if the next byte is '\n' - if next_read_head < bytes_read && buf[next_read_head] == b'\n' { - write_buf.extend(&self.to); - next_read_head + 2 + if lf_index < bytes_read && read_buf[lf_index] == b'\n' { + lf_index + 1 } else { - next_read_head + 1 + read_head + 1 } } b'\n' => { - write_buf.extend(&self.to); - next_read_head + 1 + self.normalize_ending(&mut write_buf, &mut write_head); + read_head + 1 } - any_other_byte => { - write_buf.push(any_other_byte); - next_read_head + 1 + _ => { + write_buf.push(byte); + write_head = write_head + 1; + read_head + 1 } } } - let seeked = file.seek(SeekFrom::Start(0)); - if seeked.is_err() { - println!("Could not seek to start of file: {}", self.file_name); + drop(file); + + let file = File::options() + .write(true) + .truncate(true) + .open(&self.file_name); + if file.is_err() { + println!("Could not open file: {}", self.file_name); + return; + } + let mut file = file.unwrap(); + let written = file.write_all(&write_buf[0..write_head]); + if written.is_err() { + println!( + "Could not write to file: {} {:?}", + self.file_name, + written.err() + ); return; } - let written = file.write_all(&write_buf); - if written.is_err() { - println!("Could not write to file: {}", self.file_name); - println!("Error: {:?}", written.err()); + println!("Fixed line endings in file: {}", self.file_name); + } + + fn normalize_ending(&self, write_buf: &mut Vec, write_head: &mut usize) { + for &b in &self.to { + write_buf.push(b); + *write_head += 1; } } } diff --git a/src/iter.rs b/src/iter.rs new file mode 100644 index 0000000..24d629a --- /dev/null +++ b/src/iter.rs @@ -0,0 +1,146 @@ +use std::fs::{self, ReadDir}; + +use crate::{filters::FileFilter, Args}; + +pub(crate) struct SingleFileName { + file_name: Option, +} + +pub(crate) struct MultipleFileNames { + cur_dir: Option, + filter: FileFilter, + all_dirs: Vec, + cur_dir_files: Vec, + recursive: bool, +} + +pub(crate) enum FileNames { + Single(SingleFileName), + Multiple(MultipleFileNames), +} + +impl SingleFileName { + fn new(file_name: String) -> SingleFileName { + SingleFileName { + file_name: Some(file_name), + } + } +} + +impl MultipleFileNames { + fn get_next_dir(&mut self) -> Option { + loop { + if let Some(dir) = self.all_dirs.pop() { + let entries = fs::read_dir(&dir); + if entries.is_err() { + println!("Could not read dir: {}", dir); + continue; + } + + return Some(entries.unwrap()); + } + + return None; + } + } + + fn populate_files(&mut self) -> bool { + if self.cur_dir_files.len() > 0 { + return true; + } + + let next_dir = self.get_next_dir(); + if next_dir.is_none() { + return false; + } + + let next_dir = next_dir.unwrap(); + for entry in next_dir { + if entry.is_err() { + println!( + "Could not read file {}, skipping", + entry.unwrap().path().display() + ); + continue; + } + + let entry = entry.unwrap(); + let path = entry.path(); + if path.is_dir() { + if self.recursive { + self.all_dirs.push(path.to_str().unwrap().to_string()); + } + } else if path.is_file() { + if self.filter.apply(path.to_str().unwrap()) { + self.cur_dir_files.push(path.to_str().unwrap().to_string()); + } + } else { + println!("Skipping {}, unknown file type", path.display()); + } + } + + true + } +} + +impl Iterator for MultipleFileNames { + type Item = String; + + fn next(&mut self) -> Option { + if let Some(dir) = self.cur_dir.take() { + self.all_dirs.push(dir); + } + + if let Some(file) = self.cur_dir_files.pop() { + return Some(file); + } + + loop { + let populated = self.populate_files(); + if !populated { + return None; + } + + if self.cur_dir_files.len() > 0 { + return Some(self.cur_dir_files.pop().unwrap()); + } + } + } +} + +impl FileNames { + pub(crate) fn new(args: &Args) -> FileNames { + if let Some(file_name) = &args.file_name { + return FileNames::Single(SingleFileName::new(file_name.to_string())); + } + + let cur_dir = match &args.dir { + Some(dir) => dir.to_string(), + None => "./".to_string(), + }; + + let filter = match &args.ext { + Some(ext) => FileFilter::extension(&ext), + None => FileFilter::None, + }; + + FileNames::Multiple(MultipleFileNames { + cur_dir: Some(cur_dir), + filter, + all_dirs: Vec::new(), + cur_dir_files: Vec::new(), + recursive: args.recursive, + }) + } +} + +impl Iterator for FileNames { + type Item = String; + + fn next(&mut self) -> Option { + match self { + FileNames::Single(file_name) => file_name.file_name.take(), + FileNames::Multiple(file_names) => file_names.next(), + } + } +} diff --git a/src/main.rs b/src/main.rs index 375a111..72d1e98 100644 --- a/src/main.rs +++ b/src/main.rs @@ -1,54 +1,23 @@ use argh::FromArgs; -use filters::FileFilter; use fixer::Fixer; -use stats::{FileNames, FileStats, FileStatsAggregate, LineSep}; +use iter::FileNames; +use stats::{FileStats, FileStatsAggregate, LineSep}; mod filters; mod fixer; +mod iter; mod stats; -fn main() { - let args: Args = argh::from_env(); - - let filter = match args.ext { - Some(ext) => FileFilter::extension(&ext), - None => FileFilter::None, - }; - - let stats = FileNames::generate(args.dir.clone(), filter.clone(), args.recursive) - .filter_map(FileStats::generate) - .fold(FileStatsAggregate::new(), FileStatsAggregate::fold); - stats.print_table(); - - if args.fix { - let target = match args.target { - Some(target) => { - println!("Fixing line endings to provided line ending - {:}", target); - target - } - None => match stats.max() { - Some(max) => { - println!("Fixing line endings to most common line ending - {:}", max); - max - } - None => { - panic!("No line endings found to fix"); - } - }, - }; - - FileNames::generate(args.dir, filter, args.recursive) - .map(|file_name| Fixer::new(file_name, target.clone())) - .for_each(|mut fixer| fixer.fix()); - } -} - /// Line endings fixer tool #[derive(FromArgs)] struct Args { /// directory to search for files in - #[argh(positional)] + #[argh(option, short = 'd')] dir: Option, + /// specific file to evaluate. All other options are ignored + #[argh(positional)] + file_name: Option, + /// extension to filter files by : Ex: -e .txt #[argh(option, short = 'e')] ext: Option, @@ -57,20 +26,60 @@ struct Args { #[argh(switch, short = 'r')] recursive: bool, - /// fix line endings - #[argh(switch, short = 'f')] - fix: bool, - - /// target line ending, applicable only with -f - #[argh(option, short = 't', from_str_fn(parse_line_sep))] - target: Option, + /// normalize line endings + #[argh(option, short = 'n', from_str_fn(parse_norm_option))] + normalize: Option, } -fn parse_line_sep(s: &str) -> Result { +enum NormalizeOption { + /// normalize to the most frequent line ending + /// across all files matching the filter. + MostFrequent, + + /// normalize to lf + Lf, + + /// normalize to crlf + CrLf, + + /// normalize to cr + Cr, +} + +fn main() { + let args: Args = argh::from_env(); + + let stats = FileNames::new(&args) + .filter_map(FileStats::generate) + .fold(FileStatsAggregate::new(), FileStatsAggregate::fold); + stats.print_table(); + + if let Some(normalize_option) = &args.normalize { + let target = match normalize_option { + NormalizeOption::Lf => LineSep::Lf, + NormalizeOption::CrLf => LineSep::CrLf, + NormalizeOption::Cr => LineSep::Cr, + NormalizeOption::MostFrequent => match stats.max() { + Some(max) => max, + None => { + println!("Target line ending could not be determined. Skipping normalization"); + return; + } + }, + }; + + FileNames::new(&args) + .map(|file_name| Fixer::new(file_name, target.clone())) + .for_each(|mut fixer| fixer.fix()); + } +} + +fn parse_norm_option(s: &str) -> Result { match s { - "lf" => Ok(LineSep::Lf), - "crlf" => Ok(LineSep::CrLf), - "cr" => Ok(LineSep::Cr), + "lf" => Ok(NormalizeOption::Lf), + "crlf" => Ok(NormalizeOption::CrLf), + "cr" => Ok(NormalizeOption::Cr), + "any" => Ok(NormalizeOption::MostFrequent), _ => Err("Invalid line ending".to_string()), } } diff --git a/src/stats.rs b/src/stats.rs index 8c61d62..8ea55a0 100644 --- a/src/stats.rs +++ b/src/stats.rs @@ -1,8 +1,15 @@ use std::fmt::Display; -use std::fs::{self, File, ReadDir}; +use std::fs::File; use std::io::Read; -use crate::filters::FileFilter; +pub(crate) struct FileStats { + name: String, + lines: usize, + crlf: usize, + cr: usize, + lf: usize, + max: Option, +} pub(crate) struct FileStatsAggregate { stats: Vec, @@ -13,6 +20,91 @@ pub(crate) struct FileStatsAggregate { lines: usize, } +#[derive(Clone)] +pub(crate) enum LineSep { + Lf, + CrLf, + Cr, +} + +impl FileStats { + fn new(name: &str) -> FileStats { + FileStats { + name: name.to_string(), + lines: 0, + crlf: 0, + cr: 0, + lf: 0, + max: None, + } + } + + pub(crate) fn generate(file_name: String) -> Option { + let file = File::open(&file_name); + if file.is_err() { + println!("Could not open file: {}", file_name); + return None; + } + let mut file = file.unwrap(); + let mut buf = Vec::with_capacity(1024 * 1024); + + let bytes_read = file.read_to_end(&mut buf); + if bytes_read.is_err() { + println!("Could not read file: {}", file_name); + return None; + } + + let bytes_read = bytes_read.unwrap(); + + let mut stats = FileStats::new(&file_name); + let mut i = 0; + loop { + if i == bytes_read { + break Some(stats); + } + + let byte = buf[i]; + if byte == b'\r' { + if i + 1 < bytes_read && buf[i + 1] == b'\n' { + stats.update(LineSep::CrLf); + i = i + 1; + } else { + stats.update(LineSep::Cr); + } + } else if byte == b'\n' { + stats.update(LineSep::Lf); + } + + i = i + 1; + } + } + + fn update(&mut self, line: LineSep) { + self.lines += 1; + match line { + LineSep::CrLf => { + self.crlf += 1; + } + LineSep::Cr => { + self.cr += 1; + } + LineSep::Lf => { + self.lf += 1; + } + } + + if (self.crlf > self.lf) && (self.crlf > self.cr) { + self.max = Some(LineSep::CrLf); + } else if (self.lf > self.cr) && (self.lf > self.crlf) { + self.max = Some(LineSep::Lf); + } else if (self.cr > self.lf) && (self.cr > self.crlf) { + self.max = Some(LineSep::Cr); + } else { + self.max = None; + } + } +} + impl FileStatsAggregate { pub(crate) fn new() -> Self { FileStatsAggregate { @@ -82,99 +174,6 @@ impl FileStatsAggregate { } } -pub(crate) struct FileStats { - name: String, - lines: usize, - crlf: usize, - cr: usize, - lf: usize, - max: Option, -} - -impl FileStats { - fn new(name: &str) -> FileStats { - FileStats { - name: name.to_string(), - lines: 0, - crlf: 0, - cr: 0, - lf: 0, - max: None, - } - } - - pub(crate) fn generate(file_name: String) -> Option { - let file = File::open(&file_name); - if file.is_err() { - println!("Could not open file: {}", file_name); - return None; - } - let mut file = file.unwrap(); - - let mut buf = Vec::new(); - - let bytes_read = file.read_to_end(&mut buf); - if bytes_read.is_err() { - println!("Could not read file: {}", file_name); - return None; - } - - let bytes_read = bytes_read.unwrap(); - - let mut stats = FileStats::new(&file_name); - let mut i = 0; - loop { - if i == bytes_read { - break Some(stats); - } - - let byte = buf[i]; - if byte == b'\r' { - if i + 1 < bytes_read && buf[i + 1] == b'\n' { - stats.update(LineSep::CrLf); - i = i + 1; - } else { - stats.update(LineSep::Cr); - } - } else if byte == b'\n' { - stats.update(LineSep::Lf); - } - - i = i + 1; - } - } - - fn update(&mut self, line: LineSep) { - self.lines += 1; - match line { - LineSep::CrLf => { - self.crlf += 1; - } - LineSep::Cr => { - self.cr += 1; - } - LineSep::Lf => { - self.lf += 1; - } - } - - if (self.crlf > self.lf) && (self.crlf > self.cr) { - self.max = Some(LineSep::CrLf); - } else if (self.lf > self.cr) && (self.lf > self.crlf) { - self.max = Some(LineSep::Lf); - } else { - self.max = Some(LineSep::Cr); - } - } -} - -#[derive(Clone)] -pub(crate) enum LineSep { - Lf, - CrLf, - Cr, -} - impl Display for LineSep { fn fmt(&self, f: &mut std::fmt::Formatter<'_>) -> std::fmt::Result { match self { @@ -184,107 +183,3 @@ impl Display for LineSep { } } } - -pub(crate) struct FileNames { - cur_dir: Option, - filter: FileFilter, - all_dirs: Vec, - cur_dir_files: Vec, - recursive: bool, -} - -impl FileNames { - pub(crate) fn generate(dir: Option, pattern: FileFilter, recursive: bool) -> FileNames { - let cur_dir = match dir { - Some(dir) => dir, - None => "./".to_string(), - }; - - FileNames { - cur_dir: Some(cur_dir), - filter: pattern, - all_dirs: Vec::new(), - cur_dir_files: Vec::new(), - recursive, - } - } - - fn get_next_dir(&mut self) -> Option { - loop { - if let Some(dir) = self.all_dirs.pop() { - let entries = fs::read_dir(&dir); - if entries.is_err() { - println!("Could not read dir: {}", dir); - continue; - } - - return Some(entries.unwrap()); - } - - return None; - } - } - - fn populate_files(&mut self) -> bool { - if self.cur_dir_files.len() > 0 { - return true; - } - - let next_dir = self.get_next_dir(); - if next_dir.is_none() { - return false; - } - - let next_dir = next_dir.unwrap(); - for entry in next_dir { - if entry.is_err() { - println!( - "Could not read file {}, skipping", - entry.unwrap().path().display() - ); - continue; - } - - let entry = entry.unwrap(); - let path = entry.path(); - if path.is_dir() { - if self.recursive { - self.all_dirs.push(path.to_str().unwrap().to_string()); - } - } else if path.is_file() { - if self.filter.apply(path.to_str().unwrap()) { - self.cur_dir_files.push(path.to_str().unwrap().to_string()); - } - } else { - println!("Skipping {}, unknown file type", path.display()); - } - } - - true - } -} - -impl Iterator for FileNames { - type Item = String; - - fn next(&mut self) -> Option { - if let Some(dir) = self.cur_dir.take() { - self.all_dirs.push(dir); - } - - if let Some(file) = self.cur_dir_files.pop() { - return Some(file); - } - - loop { - let populated = self.populate_files(); - if !populated { - return None; - } - - if self.cur_dir_files.len() > 0 { - return Some(self.cur_dir_files.pop().unwrap()); - } - } - } -}