bookdata/openlib/
edition.rs1use friendly::scalar;
3use parquet_derive::ParquetRecordWriter;
4
5use crate::arrow::*;
6use crate::cleaning::isbns::clean_asin_chars;
7use crate::cleaning::isbns::clean_isbn_chars;
8use crate::prelude::*;
9
10use super::key::parse_ol_key;
11use super::key::KS_WORK;
12pub use super::source::OLEditionRecord;
13use super::source::Row;
14use super::subject::SubjectEntry;
15
16#[derive(ParquetRecordWriter)]
18pub struct EditionRec {
19 pub id: u32,
20 pub key: String,
21 pub title: Option<String>,
22}
23
24#[derive(ParquetRecordWriter)]
26pub struct LinkRec {
27 pub edition: u32,
28 pub work: u32,
29}
30
31#[derive(ParquetRecordWriter)]
33pub struct ISBNrec {
34 pub edition: u32,
35 pub isbn: String,
36}
37
38#[derive(ParquetRecordWriter)]
40pub struct EditionAuthorRec {
41 pub edition: u32,
42 pub pos: i16,
43 pub author: u32,
44}
45
46#[derive(ParquetRecordWriter)]
48pub struct EditionSubjectRec {
49 pub id: u32,
50 pub subj_type: u8,
51 pub subject: String,
52}
53
54impl From<SubjectEntry> for EditionSubjectRec {
55 fn from(value: SubjectEntry) -> Self {
56 EditionSubjectRec {
57 id: value.entity,
58 subj_type: value.subj_type.into(),
59 subject: value.subject,
60 }
61 }
62}
63
64pub struct EditionProcessor {
68 last_id: u32,
69 rec_writer: TableWriter<EditionRec>,
70 link_writer: TableWriter<LinkRec>,
71 isbn_writer: TableWriter<ISBNrec>,
72 author_writer: TableWriter<EditionAuthorRec>,
73 subject_writer: TableWriter<EditionSubjectRec>,
74}
75
76impl EditionProcessor {
77 pub fn new() -> Result<EditionProcessor> {
78 Ok(EditionProcessor {
79 last_id: 0,
80 rec_writer: TableWriter::open("editions.parquet")?,
81 link_writer: TableWriter::open("edition-works.parquet")?,
82 isbn_writer: TableWriter::open("edition-isbns.parquet")?,
83 author_writer: TableWriter::open("edition-authors.parquet")?,
84 subject_writer: TableWriter::open("edition-subjects.parquet")?,
85 })
86 }
87
88 fn save_isbns(
89 &mut self,
90 edition: u32,
91 isbns: Vec<String>,
92 clean: fn(&str) -> String,
93 ) -> Result<()> {
94 for isbn in isbns {
95 let isbn = clean(&isbn);
96 if isbn.len() >= 8 {
98 self.isbn_writer.write_object(ISBNrec { edition, isbn })?;
99 }
100 }
101
102 Ok(())
103 }
104}
105
106impl ObjectWriter<Row<OLEditionRecord>> for EditionProcessor {
107 fn write_object(&mut self, row: Row<OLEditionRecord>) -> Result<()> {
108 self.last_id += 1;
109 let id = self.last_id;
110
111 self.rec_writer.write_object(EditionRec {
112 id,
113 key: row.key.clone(),
114 title: row.record.title.clone(),
115 })?;
116
117 self.save_isbns(id, row.record.isbn_10, clean_isbn_chars)?;
118 self.save_isbns(id, row.record.isbn_13, clean_isbn_chars)?;
119 self.save_isbns(id, row.record.asin, clean_asin_chars)?;
120
121 for work in row.record.works {
122 let key = work.key;
123 let work = parse_ol_key(&key, KS_WORK)?;
124 self.link_writer
125 .write_object(LinkRec { edition: id, work })?;
126 }
127
128 for pos in 0..row.record.authors.len() {
129 let author = row.record.authors[pos].id()?;
130 if let Some(aid) = author {
131 let pos = pos as i16;
132 self.author_writer.write_object(EditionAuthorRec {
133 edition: id,
134 pos,
135 author: aid,
136 })?;
137 }
138 }
139
140 for sr in row.record.subjects.subject_records(id) {
141 self.subject_writer.write_object(sr.into())?;
142 }
143
144 Ok(())
145 }
146
147 fn finish(self) -> Result<usize> {
148 let n = self.rec_writer.finish()?;
149 info!("wrote {} edition records", scalar(n));
150 let n = self.author_writer.finish()?;
151 info!("wrote {} edition-author records", scalar(n));
152 let n = self.link_writer.finish()?;
153 info!("wrote {} edition-work records", scalar(n));
154 let n = self.isbn_writer.finish()?;
155 info!("wrote {} edition-isbn records", scalar(n));
156 let n = self.subject_writer.finish()?;
157 info!("wrote {} edition-subject records", scalar(n));
158 Ok(self.last_id as usize)
159 }
160}
161
162impl EditionProcessor {}