From: Scott Worley Date: Thu, 29 Aug 2024 11:09:56 +0000 (-0700) Subject: Don't be O(n^2) unnecessarily X-Git-Tag: v0.3.0~17 X-Git-Url: http://git.scottworley.com/tablify/commitdiff_plain/88a08162523cf4a0a7c1286b1438fdf16ca6c802?ds=inline Don't be O(n^2) unnecessarily Use a HashMap to store a row's entries. The InputLine refactor got tangled up in this too. --- diff --git a/Changelog b/Changelog index a0f0693..33d7162 100644 --- a/Changelog +++ b/Changelog @@ -1,6 +1,7 @@ ## [Unreleased] - Center text in each cell - Escape HTML characters properly +- Fix an unnecessary O(n^2)ism ## [0.2.1] - 2024-08-20 - A little more space up top diff --git a/src/lib.rs b/src/lib.rs index 7292f02..b406f95 100644 --- a/src/lib.rs +++ b/src/lib.rs @@ -1,4 +1,5 @@ -use std::collections::{HashMap, HashSet}; +use std::borrow::ToOwned; +use std::collections::HashMap; use std::fmt::Write; use std::io::BufRead; use std::iter::Iterator; @@ -75,37 +76,39 @@ impl std::fmt::Display for HTML { } } -#[derive(Debug, PartialEq, Eq, Hash)] -struct Entry<'a> { - col: &'a str, - instance: Option<&'a str>, +#[derive(Debug, PartialEq, Eq)] +enum InputLine<'a> { + Blank, + RowHeader(&'a str), + Entry(&'a str, Option<&'a str>), } -impl<'a> From<&'a str> for Entry<'a> { - fn from(value: &'a str) -> Entry<'a> { - match value.split_once(':') { - None => Entry { - col: value, - instance: None, - }, - Some((col, instance)) => Entry { - col: col.trim(), - instance: Some(instance.trim()), - }, +impl<'a> From<&'a str> for InputLine<'a> { + fn from(value: &'a str) -> InputLine<'a> { + let trimmed = value.trim_end(); + if trimmed.is_empty() { + InputLine::Blank + } else if !trimmed.starts_with(' ') { + InputLine::RowHeader(value.trim()) + } else { + match value.split_once(':') { + None => InputLine::Entry(value.trim(), None), + Some((col, instance)) => InputLine::Entry(col.trim(), Some(instance.trim())), + } } } } #[derive(Debug, PartialEq, Eq)] -struct RowInput<'a> { - label: &'a str, - entries: Vec>, +struct Row { + label: String, + entries: HashMap>>, } -struct Reader<'a, Input: Iterator>> { +struct Reader>> { input: std::iter::Enumerate, - row: Option>, + row: Option, } -impl<'a, Input: Iterator>> Reader<'a, Input> { +impl>> Reader { fn new(input: Input) -> Self { Self { input: input.enumerate(), @@ -113,61 +116,58 @@ impl<'a, Input: Iterator>> Reader<'a, Inpu } } } -impl<'a, Input: Iterator>> Iterator for Reader<'a, Input> { - type Item = Result, std::io::Error>; +impl>> Iterator for Reader { + type Item = Result; fn next(&mut self) -> Option { loop { match self.input.next() { None => return Ok(std::mem::take(&mut self.row)).transpose(), Some((_, Err(e))) => return Some(Err(e)), - Some((_, Ok(line))) if line.trim_end().is_empty() && self.row.is_some() => { - return Ok(std::mem::take(&mut self.row)).transpose() - } - Some((_, Ok(line))) if line.trim_end().is_empty() => {} - Some((n, Ok(line))) if line.starts_with(' ') => match &mut self.row { - None => { - return Some(Err(std::io::Error::other(format!( - "{}: Entry with no header", - n + 1 - )))) + Some((n, Ok(line))) => match InputLine::from(line.as_ref()) { + InputLine::Blank if self.row.is_some() => { + return Ok(std::mem::take(&mut self.row)).transpose() } - // TODO: Don't leak - Some(ref mut row) => row.entries.push(Entry::from(line.leak().trim())), - }, - Some((_, Ok(line))) => { - let prev = std::mem::take(&mut self.row); - self.row = Some(RowInput { - // TODO: Don't leak - label: line.leak().trim_end(), - entries: vec![], - }); - if prev.is_some() { - return Ok(prev).transpose(); + InputLine::Blank => {} + InputLine::Entry(col, instance) => match &mut self.row { + None => { + return Some(Err(std::io::Error::other(format!( + "{}: Entry with no header", + n + 1 + )))) + } + Some(ref mut row) => { + row.entries + .entry(col.to_owned()) + .and_modify(|is| is.push(instance.map(ToOwned::to_owned))) + .or_insert_with(|| vec![instance.map(ToOwned::to_owned)]); + } + }, + InputLine::RowHeader(row) => { + let prev = std::mem::take(&mut self.row); + self.row = Some(Row { + label: row.to_owned(), + entries: HashMap::new(), + }); + if prev.is_some() { + return Ok(prev).transpose(); + } } - } + }, } } } } -fn read_rows( - input: impl std::io::Read, -) -> impl Iterator, std::io::Error>> { +fn read_rows(input: impl std::io::Read) -> impl Iterator> { Reader::new(std::io::BufReader::new(input).lines()) } -fn column_counts(rows: &[RowInput]) -> Vec<(usize, String)> { +fn column_counts(rows: &[Row]) -> Vec<(usize, String)> { let mut counts: Vec<_> = rows .iter() - .flat_map(|r| { - r.entries - .iter() - .map(|e| &e.col) - .collect::>() - .into_iter() - }) + .flat_map(|r| r.entries.keys()) .fold(HashMap::new(), |mut cs, col| { - cs.entry(String::from(*col)) + cs.entry(col.to_owned()) .and_modify(|n| *n += 1) .or_insert(1); cs @@ -178,35 +178,39 @@ fn column_counts(rows: &[RowInput]) -> Vec<(usize, String)> { counts.sort_unstable_by(|(an, acol), (bn, bcol)| bn.cmp(an).then(acol.cmp(bcol))); counts } -fn column_order(rows: &[RowInput]) -> Vec { +fn column_order(rows: &[Row]) -> Vec { column_counts(rows) .into_iter() .map(|(_, col)| col) .collect() } -fn render_instance(entry: &Entry) -> HTML { - match &entry.instance { +fn render_instance(instance: &Option) -> HTML { + match instance { None => HTML::from("✓"), Some(instance) => HTML::escape(instance.as_ref()), } } -fn render_cell(col: &str, row: &RowInput) -> HTML { +fn render_cell(col: &str, row: &Row) -> HTML { let row_label = HTML::escape(row.label.as_ref()); let col_label = HTML::escape(col); - let entries: Vec<&Entry> = row.entries.iter().filter(|e| e.col == col).collect(); - let class = HTML::from(if entries.is_empty() { "" } else { "yes" }); - let all_empty = entries.iter().all(|e| e.instance.is_none()); - let contents = if entries.is_empty() || (all_empty && entries.len() == 1) { + let instances: Option<&Vec>> = row.entries.get(col); + let class = HTML::from(if instances.is_none() { "" } else { "yes" }); + let all_empty = instances + .iter() + .flat_map(|is| is.iter()) + .all(Option::is_none); + let contents = if instances.is_none() || (all_empty && instances.unwrap().len() == 1) { HTML::from("") } else if all_empty { - HTML(format!("{}", entries.len())) + HTML(format!("{}", instances.unwrap().len())) } else { HTML( - entries + instances + .unwrap() .iter() - .map(|i| render_instance(i)) + .map(render_instance) .map(|html| html.0) // Waiting for slice_concat_trait to stabilize .collect::>() .join(" "), @@ -215,8 +219,7 @@ fn render_cell(col: &str, row: &RowInput) -> HTML { HTML(format!("{contents}")) } -fn render_row(columns: &[String], row: &RowInput) -> HTML { - // This is O(n^2) & doesn't need to be +fn render_row(columns: &[String], row: &Row) -> HTML { let row_label = HTML::escape(row.label.as_ref()); HTML(format!( "{row_label}{}\n", @@ -266,27 +269,31 @@ mod tests { use super::*; #[test] - fn test_parse_entry() { + fn test_parse_line() { + assert_eq!(InputLine::from(""), InputLine::Blank); + assert_eq!(InputLine::from(" "), InputLine::Blank); + assert_eq!(InputLine::from("foo"), InputLine::RowHeader("foo")); + assert_eq!(InputLine::from("foo "), InputLine::RowHeader("foo")); + assert_eq!(InputLine::from(" foo"), InputLine::Entry("foo", None)); assert_eq!( - Entry::from("foo"), - Entry { - col: "foo", - instance: None - } + InputLine::from(" foo:bar"), + InputLine::Entry("foo", Some("bar")) ); assert_eq!( - Entry::from("foo:bar"), - Entry { - col: "foo", - instance: Some("bar") - } + InputLine::from(" foo: bar"), + InputLine::Entry("foo", Some("bar")) ); assert_eq!( - Entry::from("foo: bar"), - Entry { - col: "foo", - instance: Some("bar") - } + InputLine::from(" foo: bar "), + InputLine::Entry("foo", Some("bar")) + ); + assert_eq!( + InputLine::from(" foo: bar "), + InputLine::Entry("foo", Some("bar")) + ); + assert_eq!( + InputLine::from(" foo : bar "), + InputLine::Entry("foo", Some("bar")) ); } @@ -294,45 +301,48 @@ mod tests { fn test_read_rows() { assert_eq!( read_rows(&b"foo"[..]).flatten().collect::>(), - vec![RowInput { - label: "foo", - entries: vec![] + vec![Row { + label: "foo".to_owned(), + entries: HashMap::new(), }] ); assert_eq!( read_rows(&b"bar"[..]).flatten().collect::>(), - vec![RowInput { - label: "bar", - entries: vec![] + vec![Row { + label: "bar".to_owned(), + entries: HashMap::new(), }] ); assert_eq!( read_rows(&b"foo\nbar\n"[..]).flatten().collect::>(), vec![ - RowInput { - label: "foo", - entries: vec![] + Row { + label: "foo".to_owned(), + entries: HashMap::new(), }, - RowInput { - label: "bar", - entries: vec![] + Row { + label: "bar".to_owned(), + entries: HashMap::new(), } ] ); assert_eq!( read_rows(&b"foo\n bar\n"[..]).flatten().collect::>(), - vec![RowInput { - label: "foo", - entries: vec![Entry::from("bar")] + vec![Row { + label: "foo".to_owned(), + entries: HashMap::from([("bar".to_owned(), vec![None])]), }] ); assert_eq!( read_rows(&b"foo\n bar\n baz\n"[..]) .flatten() .collect::>(), - vec![RowInput { - label: "foo", - entries: vec![Entry::from("bar"), Entry::from("baz")] + vec![Row { + label: "foo".to_owned(), + entries: HashMap::from([ + ("bar".to_owned(), vec![None]), + ("baz".to_owned(), vec![None]) + ]), }] ); assert_eq!( @@ -340,13 +350,13 @@ mod tests { .flatten() .collect::>(), vec![ - RowInput { - label: "foo", - entries: vec![] + Row { + label: "foo".to_owned(), + entries: HashMap::new(), }, - RowInput { - label: "bar", - entries: vec![] + Row { + label: "bar".to_owned(), + entries: HashMap::new(), } ] ); @@ -355,13 +365,13 @@ mod tests { .flatten() .collect::>(), vec![ - RowInput { - label: "foo", - entries: vec![] + Row { + label: "foo".to_owned(), + entries: HashMap::new(), }, - RowInput { - label: "bar", - entries: vec![] + Row { + label: "bar".to_owned(), + entries: HashMap::new(), } ] ); @@ -369,9 +379,9 @@ mod tests { read_rows(&b"foo \n bar \n"[..]) .flatten() .collect::>(), - vec![RowInput { - label: "foo", - entries: vec![Entry::from("bar")] + vec![Row { + label: "foo".to_owned(), + entries: HashMap::from([("bar".to_owned(), vec![None])]), }] ); @@ -425,9 +435,9 @@ mod tests { assert_eq!( render_cell( "foo", - &RowInput { - label: "nope", - entries: vec![] + &Row { + label: "nope".to_owned(), + entries: HashMap::new(), } ), HTML::from("") @@ -435,9 +445,9 @@ mod tests { assert_eq!( render_cell( "foo", - &RowInput { - label: "nope", - entries: vec![Entry::from("bar")] + &Row { + label: "nope".to_owned(), + entries: HashMap::from([("bar".to_owned(), vec![None])]), } ), HTML::from("") @@ -445,9 +455,9 @@ mod tests { assert_eq!( render_cell( "foo", - &RowInput { - label: "nope", - entries: vec![Entry::from("foo")] + &Row { + label: "nope".to_owned(), + entries: HashMap::from([("foo".to_owned(), vec![None])]), } ), HTML::from("") @@ -455,9 +465,9 @@ mod tests { assert_eq!( render_cell( "foo", - &RowInput { - label: "nope", - entries: vec![Entry::from("foo"), Entry::from("foo")] + &Row { + label: "nope".to_owned(), + entries: HashMap::from([("foo".to_owned(), vec![None, None])]), } ), HTML::from("2") @@ -465,9 +475,9 @@ mod tests { assert_eq!( render_cell( "foo", - &RowInput { - label: "nope", - entries: vec![Entry::from("foo: 5"), Entry::from("foo: 10")] + &Row { + label: "nope".to_owned(), + entries: HashMap::from([("foo".to_owned(), vec![Some("5".to_owned()), Some("10".to_owned())])]), } ), HTML::from("5 10") @@ -475,9 +485,9 @@ mod tests { assert_eq!( render_cell( "foo", - &RowInput { - label: "nope", - entries: vec![Entry::from("foo: 5"), Entry::from("foo")] + &Row { + label: "nope".to_owned(), + entries: HashMap::from([("foo".to_owned(), vec![Some("5".to_owned()), None])]), } ), HTML::from("5 ✓") @@ -485,9 +495,9 @@ mod tests { assert_eq!( render_cell( "heart", - &RowInput { - label: "nope", - entries: vec![Entry::from("heart: <3")] + &Row { + label: "nope".to_owned(), + entries: HashMap::from([("heart".to_owned(), vec![Some("<3".to_owned())])]), } ), HTML::from("<3") @@ -495,9 +505,9 @@ mod tests { assert_eq!( render_cell( "foo", - &RowInput { - label: "bob's", - entries: vec![Entry::from("foo")] + &Row { + label: "bob's".to_owned(), + entries: HashMap::from([("foo".to_owned(), vec![None])]), } ), HTML::from("")