bookdata/goodreads/
work.rs1use parquet_derive::ParquetRecordWriter;
3use serde::Deserialize;
4
5use crate::arrow::*;
6use crate::parsing::*;
7use crate::prelude::*;
8
9const OUT_FILE: &'static str = "gr-work-info.parquet";
10
11#[derive(Deserialize)]
13pub struct RawWork {
14 pub work_id: String,
15 #[serde(default)]
16 pub original_title: String,
17 #[serde(default)]
18 pub original_publication_year: String,
19 #[serde(default)]
20 pub original_publication_month: String,
21 #[serde(default)]
22 pub original_publication_day: String,
23}
24
25#[derive(ParquetRecordWriter)]
27pub struct WorkRecord {
28 pub work_id: i32,
29 pub title: Option<String>,
30 pub pub_year: Option<i16>,
31 pub pub_month: Option<u8>,
32}
33
34pub struct WorkWriter {
36 writer: TableWriter<WorkRecord>,
37 n_recs: usize,
38}
39
40impl WorkWriter {
41 pub fn open() -> Result<WorkWriter> {
43 let writer = TableWriter::open(OUT_FILE)?;
44 Ok(WorkWriter { writer, n_recs: 0 })
45 }
46}
47
48impl DataSink for WorkWriter {
49 fn output_files(&self) -> Vec<PathBuf> {
50 path_list(&[OUT_FILE])
51 }
52}
53
54impl ObjectWriter<RawWork> for WorkWriter {
55 fn write_object(&mut self, row: RawWork) -> Result<()> {
56 let work_id: i32 = row.work_id.parse()?;
57
58 let pub_year = parse_opt(&row.original_publication_year)?;
59 let pub_month = parse_opt(&row.original_publication_month)?;
60
61 self.writer.write_object(WorkRecord {
62 work_id,
63 title: trim_owned(&row.original_title),
64 pub_year,
65 pub_month,
66 })?;
67 self.n_recs += 1;
68 Ok(())
69 }
70
71 fn finish(self) -> Result<usize> {
72 self.writer.finish()?;
73 Ok(self.n_recs)
74 }
75}