bookdata/cli/
mod.rs

1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
//! Command line interface for book data.
//!
//! The book data tools are implemented as a single monolithic executable (to reduce
//! compilation time and disk space in common configurations, with different tools
//! implemented as subcommands.  Each subcommand implements the [Command] trait, which
//! exposes the command line arguments and invocation.
pub mod amazon;
pub mod bx;
pub mod cluster;
pub mod cluster_books;
pub mod collect_isbns;
pub mod extract_graph;
pub mod filter_marc;
pub mod goodreads;
pub mod index_names;
pub mod kcore;
pub mod link_isbns;
pub mod openlib;
pub mod pqinfo;
pub mod scan_marc;
pub mod stats;

use anyhow::Result;
use clap::{Parser, Subcommand};
use cpu_time::ProcessTime;
use enum_dispatch::enum_dispatch;
use happylog::clap::LogOpts;
use log::*;
use paste::paste;

use crate::util::process;
use crate::util::Timer;

/// Macro to generate wrappers for subcommand enums.
///
/// This is for subcommands that only exist to contain further subcommands,
/// to make it easier to implement their wrapper classes.
macro_rules! wrap_subcommands {
    ($name:ty) => {
        paste! {
          #[derive(clap::Args, Debug)]
          pub struct [<$name Wrapper>] {
            #[command(subcommand)]
            command: $name
          }

          impl Command for [<$name Wrapper>] {
            fn exec(&self) -> Result<()> {
              self.command.exec()
            }
          }
        }
    };
}

/// Trait implemented by book data commands.
#[enum_dispatch]
pub trait Command {
    /// Run the command with options
    fn exec(&self) -> Result<()>;
}

/// Enum to collect and dispatch CLI commands.
#[enum_dispatch(Command)]
#[derive(Subcommand, Debug)]
pub enum RootCommand {
    ScanMARC(scan_marc::ScanMARC),
    FilterMARC(filter_marc::FilterMARC),
    ClusterBooks(cluster_books::ClusterBooks),
    IndexNames(index_names::IndexNames),
    ExtractGraph(extract_graph::ExtractGraph),
    CollectISBNS(collect_isbns::CollectISBNs),
    LinkISBNIds(link_isbns::LinkISBNIds),
    /// Commands for processing Amazon data.
    Amazon(AmazonCommandWrapper),
    /// Commands for processing OpenLibrary data.
    Openlib(openlib::OpenLib),
    /// Commands for processing BookCrossing data.
    BX(BXCommandWrapper),
    /// Commands for processing GoodReads data.
    Goodreads(goodreads::Goodreads),
    /// Commands for working with clusters.
    Cluster(ClusterCommandWrapper),
    Kcore(kcore::Kcore),
    PQInfo(pqinfo::PQInfo),
    IntegrationStats(stats::IntegrationStats),
}

wrap_subcommands!(AmazonCommand);
wrap_subcommands!(BXCommand);
wrap_subcommands!(ClusterCommand);

#[enum_dispatch(Command)]
#[derive(Subcommand, Debug)]
enum AmazonCommand {
    ScanRatings(amazon::ScanRatings),
    ScanReviews(amazon::ScanReviews),
    ClusterRatings(amazon::ClusterRatings),
}

#[enum_dispatch(Command)]
#[derive(Subcommand, Debug)]
enum BXCommand {
    /// Extract BX from source data and clean.
    Extract(bx::Extract),
    /// Match BX interactions with clusters.
    ClusterActions(bx::Cluster),
}

#[enum_dispatch(Command)]
#[derive(Subcommand, Debug)]
pub enum ClusterCommand {
    Hash(cluster::hash::HashCmd),
    ExtractBooks(cluster::books::ExtractBooks),
    ExtractAuthors(cluster::authors::ClusterAuthors),
    ExtractAuthorGender(cluster::author_gender::AuthorGender),
}

/// Entry point for the Book Data Tools.
///
/// This program runs the various book data tools, exposed as subcommands.
#[derive(Parser, Debug)]
#[command(name = "bookdata")]
pub struct CLI {
    #[command(flatten)]
    logging: LogOpts,

    #[command(subcommand)]
    command: RootCommand,
}

impl CLI {
    pub fn exec(self) -> Result<()> {
        self.logging.init()?;

        process::maybe_exit_early()?;

        let timer = Timer::new();

        let res = self.command.exec();
        if let Err(e) = &res {
            error!("command failed: {}", e);
            return res;
        }

        info!("work completed in {}", timer.human_elapsed());
        match ProcessTime::try_now() {
            Ok(pt) => info!("used {} CPU time", friendly::duration(pt.as_duration())),
            Err(e) => error!("error fetching CPU time: {}", e),
        };

        process::log_process_stats();
        res
    }
}