bookdata/cli/goodreads/
scan.rsuse crate::goodreads::*;
use crate::io::object::{ChunkWriter, ThreadObjectWriter, UnchunkWriter};
use crate::prelude::*;
use crate::util::logging::data_progress;
use serde::de::DeserializeOwned;
#[derive(clap::Subcommand, Debug)]
pub enum GRScan {
Works(ScanInput),
Books(ScanInput),
Genres(ScanInput),
Authors(ScanInput),
Interactions(ScanInput),
Reviews(ScanInput),
}
#[derive(Args, Debug)]
pub struct ScanInput {
#[arg(name = "INPUT")]
infile: PathBuf,
}
fn scan_gr<R, W>(path: &Path, proc: W) -> Result<()>
where
W: ObjectWriter<R> + DataSink + Send + Sync + 'static,
R: DeserializeOwned + Send + Sync + 'static,
{
let outs: Vec<_> = proc.output_files();
info!("reading data from {}", path.display());
let pb = data_progress(0);
let read = LineProcessor::open_gzip(path, pb.clone())?;
let proc = ChunkWriter::new(proc);
let writer = ThreadObjectWriter::wrap(proc).with_name("output").spawn();
let mut writer = UnchunkWriter::new(writer);
read.process_json(&mut writer)?;
pb.finish_and_clear();
writer.finish()?;
for out in outs {
let outf = out.as_path();
info!(
"output {} is {}",
outf.display(),
friendly::bytes(file_size(outf)?)
);
}
Ok(())
}
impl GRScan {
pub fn exec(&self) -> Result<()> {
match self {
GRScan::Works(opts) => {
info!("scanning GoodReads works");
scan_gr(&opts.infile, work::WorkWriter::open()?)?;
}
GRScan::Books(opts) => {
info!("scanning GoodReads books");
scan_gr(&opts.infile, book::BookWriter::open()?)?;
}
GRScan::Genres(opts) => {
info!("scanning GoodReads book genres");
scan_gr(&opts.infile, genres::BookGenreWriter::open()?)?;
}
GRScan::Authors(opts) => {
info!("scanning GoodReads book genres");
scan_gr(&opts.infile, author::AuthorWriter::open()?)?;
}
GRScan::Interactions(opts) => {
info!("scanning GoodReads interactions");
scan_gr(&opts.infile, interaction::IntWriter::open()?)?;
}
GRScan::Reviews(opts) => {
info!("scanning GoodReads reviews");
scan_gr(&opts.infile, review::ReviewWriter::open()?)?;
}
};
Ok(())
}
}