Add fixer

This commit is contained in:
cool-mist 2024-12-26 00:01:36 +05:30
parent 31fa74a342
commit 8dbe165185
5 changed files with 350 additions and 277 deletions

View File

@ -1,6 +1,7 @@
# LF Fixer
View the LF stats of files in your repository.
- View the line ending stats.
- Normalize line endings.
## Installation
@ -16,7 +17,7 @@ lf_fixer --help
## TODO
- Edit files to fix line ending mismatches
- To user defined line ending,
- To the line ending of the majority of the files
- Add tests
- [x] Edit files to fix line ending mismatches
- [x] To user defined line ending,
- [x] To the line ending of the majority of the files
- [] Add tests

View File

@ -22,16 +22,16 @@ impl Fixer {
}
pub(crate) fn fix(&mut self) {
let file = File::options().read(true).write(true).open(&self.file_name);
let file = File::options().read(true).open(&self.file_name);
if file.is_err() {
println!("Could not open file: {}", self.file_name);
return;
}
let mut file = file.unwrap();
let mut buf = Vec::new();
let mut write_buf = Vec::new();
let bytes_read = file.read_to_end(&mut buf);
let mut read_buf = Vec::with_capacity(1024 * 1024);
let mut write_buf = Vec::with_capacity(1024 * 1024);
let bytes_read = file.read_to_end(&mut read_buf);
if bytes_read.is_err() {
println!("Could not read file: {}", self.file_name);
@ -39,44 +39,66 @@ impl Fixer {
}
let bytes_read = bytes_read.unwrap();
let mut next_read_head = 0;
let mut read_head: usize = 0;
let mut write_head: usize = 0;
loop {
if next_read_head == bytes_read {
if read_head == bytes_read {
break;
}
next_read_head = match buf[next_read_head] {
let byte = read_buf[read_head];
read_head = match byte {
b'\r' => {
self.normalize_ending(&mut write_buf, &mut write_head);
let lf_index = read_head + 1;
// LL(1) to see if the next byte is '\n'
if next_read_head < bytes_read && buf[next_read_head] == b'\n' {
write_buf.extend(&self.to);
next_read_head + 2
if lf_index < bytes_read && read_buf[lf_index] == b'\n' {
lf_index + 1
} else {
next_read_head + 1
read_head + 1
}
}
b'\n' => {
write_buf.extend(&self.to);
next_read_head + 1
self.normalize_ending(&mut write_buf, &mut write_head);
read_head + 1
}
any_other_byte => {
write_buf.push(any_other_byte);
next_read_head + 1
_ => {
write_buf.push(byte);
write_head = write_head + 1;
read_head + 1
}
}
}
let seeked = file.seek(SeekFrom::Start(0));
if seeked.is_err() {
println!("Could not seek to start of file: {}", self.file_name);
drop(file);
let file = File::options()
.write(true)
.truncate(true)
.open(&self.file_name);
if file.is_err() {
println!("Could not open file: {}", self.file_name);
return;
}
let mut file = file.unwrap();
let written = file.write_all(&write_buf[0..write_head]);
if written.is_err() {
println!(
"Could not write to file: {} {:?}",
self.file_name,
written.err()
);
return;
}
let written = file.write_all(&write_buf);
if written.is_err() {
println!("Could not write to file: {}", self.file_name);
println!("Error: {:?}", written.err());
println!("Fixed line endings in file: {}", self.file_name);
}
fn normalize_ending(&self, write_buf: &mut Vec<u8>, write_head: &mut usize) {
for &b in &self.to {
write_buf.push(b);
*write_head += 1;
}
}
}

146
src/iter.rs Normal file
View File

@ -0,0 +1,146 @@
use std::fs::{self, ReadDir};
use crate::{filters::FileFilter, Args};
pub(crate) struct SingleFileName {
file_name: Option<String>,
}
pub(crate) struct MultipleFileNames {
cur_dir: Option<String>,
filter: FileFilter,
all_dirs: Vec<String>,
cur_dir_files: Vec<String>,
recursive: bool,
}
pub(crate) enum FileNames {
Single(SingleFileName),
Multiple(MultipleFileNames),
}
impl SingleFileName {
fn new(file_name: String) -> SingleFileName {
SingleFileName {
file_name: Some(file_name),
}
}
}
impl MultipleFileNames {
fn get_next_dir(&mut self) -> Option<ReadDir> {
loop {
if let Some(dir) = self.all_dirs.pop() {
let entries = fs::read_dir(&dir);
if entries.is_err() {
println!("Could not read dir: {}", dir);
continue;
}
return Some(entries.unwrap());
}
return None;
}
}
fn populate_files(&mut self) -> bool {
if self.cur_dir_files.len() > 0 {
return true;
}
let next_dir = self.get_next_dir();
if next_dir.is_none() {
return false;
}
let next_dir = next_dir.unwrap();
for entry in next_dir {
if entry.is_err() {
println!(
"Could not read file {}, skipping",
entry.unwrap().path().display()
);
continue;
}
let entry = entry.unwrap();
let path = entry.path();
if path.is_dir() {
if self.recursive {
self.all_dirs.push(path.to_str().unwrap().to_string());
}
} else if path.is_file() {
if self.filter.apply(path.to_str().unwrap()) {
self.cur_dir_files.push(path.to_str().unwrap().to_string());
}
} else {
println!("Skipping {}, unknown file type", path.display());
}
}
true
}
}
impl Iterator for MultipleFileNames {
type Item = String;
fn next(&mut self) -> Option<Self::Item> {
if let Some(dir) = self.cur_dir.take() {
self.all_dirs.push(dir);
}
if let Some(file) = self.cur_dir_files.pop() {
return Some(file);
}
loop {
let populated = self.populate_files();
if !populated {
return None;
}
if self.cur_dir_files.len() > 0 {
return Some(self.cur_dir_files.pop().unwrap());
}
}
}
}
impl FileNames {
pub(crate) fn new(args: &Args) -> FileNames {
if let Some(file_name) = &args.file_name {
return FileNames::Single(SingleFileName::new(file_name.to_string()));
}
let cur_dir = match &args.dir {
Some(dir) => dir.to_string(),
None => "./".to_string(),
};
let filter = match &args.ext {
Some(ext) => FileFilter::extension(&ext),
None => FileFilter::None,
};
FileNames::Multiple(MultipleFileNames {
cur_dir: Some(cur_dir),
filter,
all_dirs: Vec::new(),
cur_dir_files: Vec::new(),
recursive: args.recursive,
})
}
}
impl Iterator for FileNames {
type Item = String;
fn next(&mut self) -> Option<Self::Item> {
match self {
FileNames::Single(file_name) => file_name.file_name.take(),
FileNames::Multiple(file_names) => file_names.next(),
}
}
}

View File

@ -1,54 +1,23 @@
use argh::FromArgs;
use filters::FileFilter;
use fixer::Fixer;
use stats::{FileNames, FileStats, FileStatsAggregate, LineSep};
use iter::FileNames;
use stats::{FileStats, FileStatsAggregate, LineSep};
mod filters;
mod fixer;
mod iter;
mod stats;
fn main() {
let args: Args = argh::from_env();
let filter = match args.ext {
Some(ext) => FileFilter::extension(&ext),
None => FileFilter::None,
};
let stats = FileNames::generate(args.dir.clone(), filter.clone(), args.recursive)
.filter_map(FileStats::generate)
.fold(FileStatsAggregate::new(), FileStatsAggregate::fold);
stats.print_table();
if args.fix {
let target = match args.target {
Some(target) => {
println!("Fixing line endings to provided line ending - {:}", target);
target
}
None => match stats.max() {
Some(max) => {
println!("Fixing line endings to most common line ending - {:}", max);
max
}
None => {
panic!("No line endings found to fix");
}
},
};
FileNames::generate(args.dir, filter, args.recursive)
.map(|file_name| Fixer::new(file_name, target.clone()))
.for_each(|mut fixer| fixer.fix());
}
}
/// Line endings fixer tool
#[derive(FromArgs)]
struct Args {
/// directory to search for files in
#[argh(positional)]
#[argh(option, short = 'd')]
dir: Option<String>,
/// specific file to evaluate. All other options are ignored
#[argh(positional)]
file_name: Option<String>,
/// extension to filter files by : Ex: -e .txt
#[argh(option, short = 'e')]
ext: Option<String>,
@ -57,20 +26,60 @@ struct Args {
#[argh(switch, short = 'r')]
recursive: bool,
/// fix line endings
#[argh(switch, short = 'f')]
fix: bool,
/// target line ending, applicable only with -f
#[argh(option, short = 't', from_str_fn(parse_line_sep))]
target: Option<LineSep>,
/// normalize line endings
#[argh(option, short = 'n', from_str_fn(parse_norm_option))]
normalize: Option<NormalizeOption>,
}
fn parse_line_sep(s: &str) -> Result<LineSep, String> {
enum NormalizeOption {
/// normalize to the most frequent line ending
/// across all files matching the filter.
MostFrequent,
/// normalize to lf
Lf,
/// normalize to crlf
CrLf,
/// normalize to cr
Cr,
}
fn main() {
let args: Args = argh::from_env();
let stats = FileNames::new(&args)
.filter_map(FileStats::generate)
.fold(FileStatsAggregate::new(), FileStatsAggregate::fold);
stats.print_table();
if let Some(normalize_option) = &args.normalize {
let target = match normalize_option {
NormalizeOption::Lf => LineSep::Lf,
NormalizeOption::CrLf => LineSep::CrLf,
NormalizeOption::Cr => LineSep::Cr,
NormalizeOption::MostFrequent => match stats.max() {
Some(max) => max,
None => {
println!("Target line ending could not be determined. Skipping normalization");
return;
}
},
};
FileNames::new(&args)
.map(|file_name| Fixer::new(file_name, target.clone()))
.for_each(|mut fixer| fixer.fix());
}
}
fn parse_norm_option(s: &str) -> Result<NormalizeOption, String> {
match s {
"lf" => Ok(LineSep::Lf),
"crlf" => Ok(LineSep::CrLf),
"cr" => Ok(LineSep::Cr),
"lf" => Ok(NormalizeOption::Lf),
"crlf" => Ok(NormalizeOption::CrLf),
"cr" => Ok(NormalizeOption::Cr),
"any" => Ok(NormalizeOption::MostFrequent),
_ => Err("Invalid line ending".to_string()),
}
}

View File

@ -1,8 +1,15 @@
use std::fmt::Display;
use std::fs::{self, File, ReadDir};
use std::fs::File;
use std::io::Read;
use crate::filters::FileFilter;
pub(crate) struct FileStats {
name: String,
lines: usize,
crlf: usize,
cr: usize,
lf: usize,
max: Option<LineSep>,
}
pub(crate) struct FileStatsAggregate {
stats: Vec<FileStats>,
@ -13,6 +20,91 @@ pub(crate) struct FileStatsAggregate {
lines: usize,
}
#[derive(Clone)]
pub(crate) enum LineSep {
Lf,
CrLf,
Cr,
}
impl FileStats {
fn new(name: &str) -> FileStats {
FileStats {
name: name.to_string(),
lines: 0,
crlf: 0,
cr: 0,
lf: 0,
max: None,
}
}
pub(crate) fn generate(file_name: String) -> Option<FileStats> {
let file = File::open(&file_name);
if file.is_err() {
println!("Could not open file: {}", file_name);
return None;
}
let mut file = file.unwrap();
let mut buf = Vec::with_capacity(1024 * 1024);
let bytes_read = file.read_to_end(&mut buf);
if bytes_read.is_err() {
println!("Could not read file: {}", file_name);
return None;
}
let bytes_read = bytes_read.unwrap();
let mut stats = FileStats::new(&file_name);
let mut i = 0;
loop {
if i == bytes_read {
break Some(stats);
}
let byte = buf[i];
if byte == b'\r' {
if i + 1 < bytes_read && buf[i + 1] == b'\n' {
stats.update(LineSep::CrLf);
i = i + 1;
} else {
stats.update(LineSep::Cr);
}
} else if byte == b'\n' {
stats.update(LineSep::Lf);
}
i = i + 1;
}
}
fn update(&mut self, line: LineSep) {
self.lines += 1;
match line {
LineSep::CrLf => {
self.crlf += 1;
}
LineSep::Cr => {
self.cr += 1;
}
LineSep::Lf => {
self.lf += 1;
}
}
if (self.crlf > self.lf) && (self.crlf > self.cr) {
self.max = Some(LineSep::CrLf);
} else if (self.lf > self.cr) && (self.lf > self.crlf) {
self.max = Some(LineSep::Lf);
} else if (self.cr > self.lf) && (self.cr > self.crlf) {
self.max = Some(LineSep::Cr);
} else {
self.max = None;
}
}
}
impl FileStatsAggregate {
pub(crate) fn new() -> Self {
FileStatsAggregate {
@ -82,99 +174,6 @@ impl FileStatsAggregate {
}
}
pub(crate) struct FileStats {
name: String,
lines: usize,
crlf: usize,
cr: usize,
lf: usize,
max: Option<LineSep>,
}
impl FileStats {
fn new(name: &str) -> FileStats {
FileStats {
name: name.to_string(),
lines: 0,
crlf: 0,
cr: 0,
lf: 0,
max: None,
}
}
pub(crate) fn generate(file_name: String) -> Option<FileStats> {
let file = File::open(&file_name);
if file.is_err() {
println!("Could not open file: {}", file_name);
return None;
}
let mut file = file.unwrap();
let mut buf = Vec::new();
let bytes_read = file.read_to_end(&mut buf);
if bytes_read.is_err() {
println!("Could not read file: {}", file_name);
return None;
}
let bytes_read = bytes_read.unwrap();
let mut stats = FileStats::new(&file_name);
let mut i = 0;
loop {
if i == bytes_read {
break Some(stats);
}
let byte = buf[i];
if byte == b'\r' {
if i + 1 < bytes_read && buf[i + 1] == b'\n' {
stats.update(LineSep::CrLf);
i = i + 1;
} else {
stats.update(LineSep::Cr);
}
} else if byte == b'\n' {
stats.update(LineSep::Lf);
}
i = i + 1;
}
}
fn update(&mut self, line: LineSep) {
self.lines += 1;
match line {
LineSep::CrLf => {
self.crlf += 1;
}
LineSep::Cr => {
self.cr += 1;
}
LineSep::Lf => {
self.lf += 1;
}
}
if (self.crlf > self.lf) && (self.crlf > self.cr) {
self.max = Some(LineSep::CrLf);
} else if (self.lf > self.cr) && (self.lf > self.crlf) {
self.max = Some(LineSep::Lf);
} else {
self.max = Some(LineSep::Cr);
}
}
}
#[derive(Clone)]
pub(crate) enum LineSep {
Lf,
CrLf,
Cr,
}
impl Display for LineSep {
fn fmt(&self, f: &mut std::fmt::Formatter<'_>) -> std::fmt::Result {
match self {
@ -184,107 +183,3 @@ impl Display for LineSep {
}
}
}
pub(crate) struct FileNames {
cur_dir: Option<String>,
filter: FileFilter,
all_dirs: Vec<String>,
cur_dir_files: Vec<String>,
recursive: bool,
}
impl FileNames {
pub(crate) fn generate(dir: Option<String>, pattern: FileFilter, recursive: bool) -> FileNames {
let cur_dir = match dir {
Some(dir) => dir,
None => "./".to_string(),
};
FileNames {
cur_dir: Some(cur_dir),
filter: pattern,
all_dirs: Vec::new(),
cur_dir_files: Vec::new(),
recursive,
}
}
fn get_next_dir(&mut self) -> Option<ReadDir> {
loop {
if let Some(dir) = self.all_dirs.pop() {
let entries = fs::read_dir(&dir);
if entries.is_err() {
println!("Could not read dir: {}", dir);
continue;
}
return Some(entries.unwrap());
}
return None;
}
}
fn populate_files(&mut self) -> bool {
if self.cur_dir_files.len() > 0 {
return true;
}
let next_dir = self.get_next_dir();
if next_dir.is_none() {
return false;
}
let next_dir = next_dir.unwrap();
for entry in next_dir {
if entry.is_err() {
println!(
"Could not read file {}, skipping",
entry.unwrap().path().display()
);
continue;
}
let entry = entry.unwrap();
let path = entry.path();
if path.is_dir() {
if self.recursive {
self.all_dirs.push(path.to_str().unwrap().to_string());
}
} else if path.is_file() {
if self.filter.apply(path.to_str().unwrap()) {
self.cur_dir_files.push(path.to_str().unwrap().to_string());
}
} else {
println!("Skipping {}, unknown file type", path.display());
}
}
true
}
}
impl Iterator for FileNames {
type Item = String;
fn next(&mut self) -> Option<Self::Item> {
if let Some(dir) = self.cur_dir.take() {
self.all_dirs.push(dir);
}
if let Some(file) = self.cur_dir_files.pop() {
return Some(file);
}
loop {
let populated = self.populate_files();
if !populated {
return None;
}
if self.cur_dir_files.len() > 0 {
return Some(self.cur_dir_files.pop().unwrap());
}
}
}
}