]> git.scottworley.com Git - tablify/commitdiff
Proper log parsing
authorScott Worley <scottworley@scottworley.com>
Mon, 19 Aug 2024 08:38:40 +0000 (01:38 -0700)
committerScott Worley <scottworley@scottworley.com>
Mon, 19 Aug 2024 08:53:37 +0000 (01:53 -0700)
I'm bummed that this couldn't be a simple Iterator::scan().  Scan
doesn't have a way to get control one last time at end-of-stream to dump
the accumulator state.

src/lib.rs

index 1d098b03d1d314ec7d6c4e94a72f2f7e6b5b94ce..b583c3eefb0b3e3f1ea74f0aae24556b711865a1 100644 (file)
@@ -9,12 +9,49 @@ struct RowInput {
     entries: Vec<String>,
 }
 
+struct Reader<Input: Iterator<Item = Result<String, std::io::Error>>> {
+    input: Input,
+    row: Option<RowInput>,
+}
+impl<Input: Iterator<Item = Result<String, std::io::Error>>> Reader<Input> {
+    #[cfg(test)]
+    fn new(input: Input) -> Self {
+        Self { input, row: None }
+    }
+}
+impl<Input: Iterator<Item = Result<String, std::io::Error>>> Iterator for Reader<Input> {
+    type Item = Result<RowInput, std::io::Error>;
+    fn next(&mut self) -> Option<Self::Item> {
+        loop {
+            match self.input.next() {
+                None => return Ok(std::mem::take(&mut self.row)).transpose(),
+                Some(Err(e)) => return Some(Err(e)),
+                Some(Ok(line)) if line.is_empty() && self.row.is_some() => {
+                    return Ok(std::mem::take(&mut self.row)).transpose()
+                }
+                Some(Ok(line)) if line.is_empty() => {}
+                Some(Ok(line)) if line.starts_with(' ') => match &mut self.row {
+                    None => return Some(Err(std::io::Error::other("Entry with no header"))),
+                    Some(ref mut row) => row.entries.push(String::from(line.trim())),
+                },
+                Some(Ok(line)) => {
+                    let prev = std::mem::take(&mut self.row);
+                    self.row = Some(RowInput {
+                        label: line,
+                        entries: vec![],
+                    });
+                    if prev.is_some() {
+                        return Ok(prev).transpose();
+                    }
+                }
+            }
+        }
+    }
+}
+
 #[cfg(test)]
-fn read_rows(input: impl std::io::Read) -> impl Iterator<Item = RowInput> {
-    std::io::BufReader::new(input).lines().map(|line| RowInput {
-        label: line.unwrap(),
-        entries: vec![],
-    })
+fn read_rows(input: impl std::io::Read) -> impl Iterator<Item = Result<RowInput, std::io::Error>> {
+    Reader::new(std::io::BufReader::new(input).lines())
 }
 
 pub fn tablify(_input: &impl std::io::Read) -> String {
@@ -28,21 +65,21 @@ mod tests {
     #[test]
     fn test_read_rows() {
         assert_eq!(
-            read_rows(&b"foo"[..]).collect::<Vec<_>>(),
+            read_rows(&b"foo"[..]).flatten().collect::<Vec<_>>(),
             vec![RowInput {
                 label: String::from("foo"),
                 entries: vec![]
             }]
         );
         assert_eq!(
-            read_rows(&b"bar"[..]).collect::<Vec<_>>(),
+            read_rows(&b"bar"[..]).flatten().collect::<Vec<_>>(),
             vec![RowInput {
                 label: String::from("bar"),
                 entries: vec![]
             }]
         );
         assert_eq!(
-            read_rows(&b"foo\nbar\n"[..]).collect::<Vec<_>>(),
+            read_rows(&b"foo\nbar\n"[..]).flatten().collect::<Vec<_>>(),
             vec![
                 RowInput {
                     label: String::from("foo"),
@@ -54,5 +91,44 @@ mod tests {
                 }
             ]
         );
+        assert_eq!(
+            read_rows(&b"foo\n bar\n"[..]).flatten().collect::<Vec<_>>(),
+            vec![RowInput {
+                label: String::from("foo"),
+                entries: vec![String::from("bar")]
+            }]
+        );
+        assert_eq!(
+            read_rows(&b"foo\n bar\n baz\n"[..])
+                .flatten()
+                .collect::<Vec<_>>(),
+            vec![RowInput {
+                label: String::from("foo"),
+                entries: vec![String::from("bar"), String::from("baz")]
+            }]
+        );
+        assert_eq!(
+            read_rows(&b"foo\n\nbar\n"[..])
+                .flatten()
+                .collect::<Vec<_>>(),
+            vec![
+                RowInput {
+                    label: String::from("foo"),
+                    entries: vec![]
+                },
+                RowInput {
+                    label: String::from("bar"),
+                    entries: vec![]
+                }
+            ]
+        );
+
+        let bad = read_rows(&b" foo"[..]).next().unwrap();
+        assert!(bad.is_err());
+        assert!(format!("{bad:?}").contains("Entry with no header"));
+
+        let bad2 = read_rows(&b"foo\n\n bar"[..]).nth(1).unwrap();
+        assert!(bad2.is_err());
+        assert!(format!("{bad2:?}").contains("Entry with no header"));
     }
 }