bookdata/goodreads/
ids.rs1use std::{collections::HashMap, fs::File};
3
4use anyhow::Result;
5use log::*;
6use polars::prelude::*;
7use serde::{Deserialize, Serialize};
8
9use crate::{
10 ids::codes::{NS_GR_BOOK, NS_GR_WORK},
11 prelude::BDPath,
12};
13
14pub type BookLinkMap = HashMap<i32, BookLinkRecord>;
15
16const GR_LINK_FILE: BDPath<'static> = BDPath::new("goodreads/gr-book-link.parquet");
17
18#[derive(Debug, Serialize, Deserialize)]
20pub struct BookLinkRecord {
21 pub book_id: i32,
22 pub work_id: Option<i32>,
23 pub cluster: i32,
24}
25
26impl BookLinkRecord {
27 pub fn item_id(&self) -> i32 {
29 if let Some(w) = &self.work_id {
30 NS_GR_WORK.base() + w
31 } else {
32 NS_GR_BOOK.base() + self.book_id
33 }
34 }
35}
36
37pub fn load_id_links() -> Result<BookLinkMap> {
39 let path = GR_LINK_FILE.resolve()?;
40 let file = File::open(path)?;
41 let pqf = ParquetReader::new(file);
42 let df = pqf.finish()?;
43
44 let mut map = HashMap::with_capacity(df.height());
45
46 let c_book = df.column("book_id")?.i32()?;
47 let c_work = df.column("work_id")?.i32()?;
48 let c_cluster = df.column("cluster")?.i32()?;
49
50 for i in 0..df.height() {
51 let rec: BookLinkRecord = BookLinkRecord {
52 book_id: c_book.get(i).unwrap(),
53 work_id: c_work.get(i),
54 cluster: c_cluster.get(i).unwrap(),
55 };
56 map.insert(rec.book_id, rec);
57 }
58
59 info!("read {} book links from {}", map.len(), GR_LINK_FILE);
60 Ok(map)
61}