bookdata/io/
mod.rs

1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
use friendly::bytes;
use indicatif::ProgressBar;
use log::*;
use std::fs;
use std::io::{BufRead, BufReader, Result as IOResult};
use std::path::{Path, PathBuf};

pub mod background;
pub mod compress;
pub mod ext;
pub mod lines;
pub mod object;

pub use compress::open_gzin_progress;
pub use lines::LineProcessor;
pub use object::ObjectWriter;

/// Trait for data processing sinks with input and ouptut files.
pub trait DataSink {
    /// Get the output files for the sink.
    fn output_files(&self) -> Vec<PathBuf>;

    /// Get auxillary input files for the sink.
    ///
    /// Most sinks are also an [ObjectWriter], and the primary input is written
    /// to the sink; that input file is not reported here.  However, sinks may
    /// require additional input files to process, and those files can be reported
    /// here.
    fn input_files(&self) -> Vec<PathBuf> {
        Vec::new()
    }
}

/// Log the sizes of a set of files.
pub fn log_file_info<P: AsRef<Path>, S: IntoIterator<Item = P>>(files: S) -> IOResult<()> {
    for path in files {
        let path = path.as_ref();
        let size = file_size(path)?;
        info!("output {:?}: {}", path, bytes(size));
    }

    Ok(())
}

/// Convert a list of strings into owned [PathBuf]s.
pub fn path_list(paths: &[&str]) -> Vec<PathBuf> {
    paths.into_iter().map(|p| PathBuf::from(p)).collect()
}

/// Get the size of a file.
pub fn file_size<P: AsRef<Path>>(path: P) -> IOResult<u64> {
    let meta = fs::metadata(path)?;
    Ok(meta.len())
}

/// Open a file as a buffered reader with a progress bar.
pub fn open_progress(path: &Path, pb: ProgressBar) -> IOResult<impl BufRead> {
    let name = path.file_name().unwrap().to_string_lossy();
    let read = fs::File::open(path)?;
    pb.set_length(read.metadata()?.len());
    pb.set_prefix(name.to_string());

    let read = pb.wrap_read(read);
    let read = BufReader::new(read);
    Ok(read)
}