bookdata/goodreads/
author.rs1use parquet_derive::ParquetRecordWriter;
3use serde::Deserialize;
4
5use crate::arrow::*;
6use crate::parsing::*;
7use crate::prelude::*;
8
9const OUT_FILE: &'static str = "gr-author-info.parquet";
10
11#[derive(Deserialize)]
13pub struct RawAuthor {
14 pub author_id: String,
15 pub name: String,
16}
17
18#[derive(ParquetRecordWriter)]
20pub struct AuthorRecord {
21 pub author_id: i32,
22 pub name: Option<String>,
23}
24
25pub struct AuthorWriter {
27 writer: TableWriter<AuthorRecord>,
28 n_recs: usize,
29}
30
31impl AuthorWriter {
32 pub fn open() -> Result<AuthorWriter> {
34 let writer = TableWriter::open(OUT_FILE)?;
35 Ok(AuthorWriter { writer, n_recs: 0 })
36 }
37}
38
39impl DataSink for AuthorWriter {
40 fn output_files(&self) -> Vec<PathBuf> {
41 path_list(&[OUT_FILE])
42 }
43}
44
45impl ObjectWriter<RawAuthor> for AuthorWriter {
46 fn write_object(&mut self, row: RawAuthor) -> Result<()> {
47 let author_id: i32 = row.author_id.parse()?;
48
49 self.writer.write_object(AuthorRecord {
50 author_id,
51 name: trim_owned(&row.name),
52 })?;
53
54 self.n_recs += 1;
55 Ok(())
56 }
57
58 fn finish(self) -> Result<usize> {
59 self.writer.finish()?;
60 Ok(self.n_recs)
61 }
62}