Add parser
This commit is contained in:
commit
2478032e61
1
.gitignore
vendored
Normal file
1
.gitignore
vendored
Normal file
@ -0,0 +1 @@
|
||||
/target
|
1594
Cargo.lock
generated
Normal file
1594
Cargo.lock
generated
Normal file
File diff suppressed because it is too large
Load Diff
10
Cargo.toml
Normal file
10
Cargo.toml
Normal file
@ -0,0 +1,10 @@
|
||||
[package]
|
||||
name = "trs"
|
||||
version = "0.1.0"
|
||||
edition = "2021"
|
||||
|
||||
[dependencies]
|
||||
hyper = "1.6.0"
|
||||
reqwest = "0.12.20"
|
||||
tokio = { version = "1.45.1", features = ["full"] }
|
||||
xml-rs = "0.8.26"
|
12
docs/design.markdown
Normal file
12
docs/design.markdown
Normal file
@ -0,0 +1,12 @@
|
||||
# Terminal RSs
|
||||
|
||||
|
||||
Keep the goal simple.
|
||||
|
||||
- just list the unread articles
|
||||
- Have a way to mark an article as read/unread
|
||||
https://www.rssboard.org/rss-specification
|
||||
|
||||
https://brycev.com/rss.xml
|
||||
|
||||
|
2373
sample/rss.xml
Normal file
2373
sample/rss.xml
Normal file
File diff suppressed because it is too large
Load Diff
2754
sample/rss2.xml
Normal file
2754
sample/rss2.xml
Normal file
File diff suppressed because it is too large
Load Diff
22
src/error.rs
Normal file
22
src/error.rs
Normal file
@ -0,0 +1,22 @@
|
||||
use std::fmt::Display;
|
||||
|
||||
use xml::reader::Error;
|
||||
|
||||
#[derive(Debug)]
|
||||
pub enum TrsError {
|
||||
XmlParseError(String),
|
||||
XmlRsError(Error),
|
||||
}
|
||||
|
||||
impl Display for TrsError {
|
||||
fn fmt(&self, f: &mut std::fmt::Formatter<'_>) -> std::fmt::Result {
|
||||
write!(
|
||||
f,
|
||||
"{}",
|
||||
match self {
|
||||
TrsError::XmlParseError(msg) => format!("XML Parse Error: {}", msg),
|
||||
TrsError::XmlRsError(err) => format!("XML Reader Error: {}", err),
|
||||
}
|
||||
)
|
||||
}
|
||||
}
|
26
src/main.rs
Normal file
26
src/main.rs
Normal file
@ -0,0 +1,26 @@
|
||||
use error::TrsError;
|
||||
use xml::ParserConfig;
|
||||
pub mod error;
|
||||
pub mod parser;
|
||||
|
||||
fn main() -> Result<(), TrsError> {
|
||||
let bytes = include_bytes!("../sample/rss2.xml");
|
||||
let xml_source_stream = ParserConfig::new()
|
||||
.ignore_invalid_encoding_declarations(true)
|
||||
.create_reader(&bytes[..]);
|
||||
let rss_channel = parser::parse_rss_channel(xml_source_stream)?;
|
||||
|
||||
println!("{}", rss_channel.title);
|
||||
println!("{}", rss_channel.link);
|
||||
println!("{}", rss_channel.description);
|
||||
for article in &rss_channel.articles {
|
||||
println!("{} {:^50} {:<}", article.date, article.title, article.link);
|
||||
}
|
||||
|
||||
println!(
|
||||
"There are {} articles in the channel.",
|
||||
rss_channel.articles.len()
|
||||
);
|
||||
|
||||
Ok(())
|
||||
}
|
221
src/parser.rs
Normal file
221
src/parser.rs
Normal file
@ -0,0 +1,221 @@
|
||||
use std::io::Read;
|
||||
|
||||
use xml::{reader::XmlEvent, EventReader};
|
||||
|
||||
use crate::error::TrsError;
|
||||
|
||||
pub struct RssChannel {
|
||||
pub title: String,
|
||||
pub link: String,
|
||||
pub description: String,
|
||||
pub articles: Vec<Article>,
|
||||
}
|
||||
|
||||
impl RssChannel {
|
||||
fn new() -> Self {
|
||||
RssChannel {
|
||||
title: String::new(),
|
||||
link: String::new(),
|
||||
description: String::new(),
|
||||
articles: Vec::new(),
|
||||
}
|
||||
}
|
||||
|
||||
fn update_channel_field(&mut self, field: &XmlTagField, value: String) -> Result<(), TrsError> {
|
||||
let last_article = self.articles.last_mut();
|
||||
let no_item_error = || {
|
||||
TrsError::XmlParseError(format!(
|
||||
"No item found to update field <{}>",
|
||||
field.hierarchical_tag
|
||||
))
|
||||
};
|
||||
|
||||
match field.field {
|
||||
XmlField::ArticleTitle => self.title = value,
|
||||
XmlField::ArticleLink => self.link = value,
|
||||
XmlField::ArticleDescription => self.description = value,
|
||||
XmlField::ItemTitle => last_article.ok_or_else(no_item_error)?.title = value,
|
||||
XmlField::ItemLink => last_article.ok_or_else(no_item_error)?.link = value,
|
||||
XmlField::ItemPubDate => last_article.ok_or_else(no_item_error)?.date = value,
|
||||
}
|
||||
|
||||
Ok(())
|
||||
}
|
||||
}
|
||||
|
||||
pub struct Article {
|
||||
pub title: String,
|
||||
pub link: String,
|
||||
pub date: String,
|
||||
}
|
||||
|
||||
impl Article {
|
||||
fn new() -> Self {
|
||||
Article {
|
||||
title: String::new(),
|
||||
link: String::new(),
|
||||
date: String::new(),
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
enum XmlField {
|
||||
ItemTitle,
|
||||
ItemLink,
|
||||
ItemPubDate,
|
||||
ArticleTitle,
|
||||
ArticleLink,
|
||||
ArticleDescription,
|
||||
}
|
||||
|
||||
struct XmlTagField {
|
||||
hierarchical_tag: &'static str,
|
||||
tag: &'static str,
|
||||
field: XmlField,
|
||||
}
|
||||
|
||||
impl XmlTagField {
|
||||
const fn mapping(hierarchical_tag: &'static str, tag: &'static str, field: XmlField) -> Self {
|
||||
XmlTagField {
|
||||
hierarchical_tag,
|
||||
tag,
|
||||
field,
|
||||
}
|
||||
}
|
||||
|
||||
fn corresponding_field(hierarchical_tag: &str) -> Option<&'static XmlTagField> {
|
||||
for field in FIELD_TAG_MAPPINGS.iter() {
|
||||
if field.hierarchical_tag == hierarchical_tag {
|
||||
return Some(field);
|
||||
}
|
||||
}
|
||||
|
||||
None
|
||||
}
|
||||
}
|
||||
|
||||
const FIELD_TAG_MAPPINGS: [XmlTagField; 6] = [
|
||||
XmlTagField::mapping("title", "title", XmlField::ArticleTitle),
|
||||
XmlTagField::mapping("link", "link", XmlField::ArticleLink),
|
||||
XmlTagField::mapping("description", "description", XmlField::ArticleDescription),
|
||||
XmlTagField::mapping("item > title", "title", XmlField::ItemTitle),
|
||||
XmlTagField::mapping("item > link", "link", XmlField::ItemLink),
|
||||
XmlTagField::mapping("item > pubDate", "pubDate", XmlField::ItemPubDate),
|
||||
];
|
||||
|
||||
pub fn parse_rss_channel<R: Read>(
|
||||
xml_source_stream: EventReader<R>,
|
||||
) -> Result<RssChannel, TrsError> {
|
||||
let mut channel = RssChannel::new();
|
||||
let mut tag_prefix = "";
|
||||
let mut current_field: Option<&XmlTagField> = None;
|
||||
for e in xml_source_stream {
|
||||
match e {
|
||||
Ok(XmlEvent::StartElement { name, .. }) => match name.local_name.as_str() {
|
||||
"item" => {
|
||||
tag_prefix = "item > ";
|
||||
channel.articles.push(Article::new());
|
||||
}
|
||||
tag => {
|
||||
let None = current_field else {
|
||||
let current_field_name = current_field.unwrap();
|
||||
return Err(TrsError::XmlParseError(format!(
|
||||
"Unexpected <{}> start tag without closing existing tag <{}>",
|
||||
tag, current_field_name.hierarchical_tag
|
||||
)));
|
||||
};
|
||||
|
||||
let tag_name_with_prefix = format!("{}{}", tag_prefix, tag);
|
||||
current_field = XmlTagField::corresponding_field(&tag_name_with_prefix);
|
||||
}
|
||||
},
|
||||
Ok(XmlEvent::EndElement { name }) => match name.local_name.as_str() {
|
||||
"item" => {
|
||||
let None = current_field else {
|
||||
let current_field_name = current_field.unwrap();
|
||||
return Err(TrsError::XmlParseError(format!(
|
||||
"Unexpected </item> end tag without closing field {}",
|
||||
current_field_name.hierarchical_tag
|
||||
)));
|
||||
};
|
||||
tag_prefix = "";
|
||||
}
|
||||
tag => {
|
||||
if let Some(field) = current_field.take() {
|
||||
if field.tag == tag {
|
||||
current_field = None;
|
||||
} else {
|
||||
return Err(TrsError::XmlParseError(format!(
|
||||
"Unexpected </{}> end tag, expected </{}>",
|
||||
tag, field.hierarchical_tag
|
||||
)));
|
||||
}
|
||||
}
|
||||
}
|
||||
},
|
||||
Ok(XmlEvent::Characters(data)) => {
|
||||
if let Some(field) = current_field {
|
||||
let err = channel.update_channel_field(field, data);
|
||||
if let Err(e) = err {
|
||||
eprintln!("Error updating channel field: {}", e);
|
||||
return Err(e);
|
||||
}
|
||||
}
|
||||
}
|
||||
Err(e) => {
|
||||
eprintln!("Error parsing XML: {}", e);
|
||||
return Err(TrsError::XmlRsError(e));
|
||||
}
|
||||
_ => {}
|
||||
}
|
||||
}
|
||||
|
||||
Ok(channel)
|
||||
}
|
||||
|
||||
#[cfg(test)]
|
||||
mod tests {
|
||||
use super::*;
|
||||
use xml::ParserConfig;
|
||||
|
||||
macro_rules! validate_sample {
|
||||
($test_name:ident, $file_name:literal, $title:literal, $link:literal, $description: literal, $article_count: literal) => {
|
||||
#[test]
|
||||
fn $test_name() {
|
||||
let bytes = include_bytes!(concat!("../sample/", $file_name));
|
||||
let xml_source_stream = ParserConfig::new()
|
||||
.ignore_invalid_encoding_declarations(true)
|
||||
.create_reader(&bytes[..]);
|
||||
let rss_channel = parse_rss_channel(xml_source_stream).unwrap();
|
||||
|
||||
assert_eq!(rss_channel.title, $title);
|
||||
assert_eq!(rss_channel.link, $link);
|
||||
assert_eq!(rss_channel.description, $description);
|
||||
assert_eq!(rss_channel.articles.len(), $article_count);
|
||||
for article in &rss_channel.articles {
|
||||
assert!(!article.title.is_empty());
|
||||
assert!(!article.link.is_empty());
|
||||
assert!(!article.date.is_empty());
|
||||
}
|
||||
}
|
||||
};
|
||||
}
|
||||
|
||||
validate_sample!(
|
||||
sample1,
|
||||
"rss.xml",
|
||||
"Bryce Vandegrift's Website",
|
||||
"https://brycev.com/",
|
||||
"Updates to Bryce Vandegrift's blog",
|
||||
28
|
||||
);
|
||||
|
||||
validate_sample!(
|
||||
sample2,
|
||||
"rss2.xml",
|
||||
"ploeh blog",
|
||||
"https://blog.ploeh.dk",
|
||||
"danish software design",
|
||||
10
|
||||
);
|
||||
}
|
Loading…
x
Reference in New Issue
Block a user