bookdata/marc/
record.rs

1//! MARC record representation.
2//!
3//! The code in this module supports records in the MARC format. It can be
4//! used for processing both [bibliographic][] and [name authority][] records.
5//!
6//! [bibliographic]: https://www.loc.gov/marc/bibliographic/
7//! [name authority]: https://www.loc.gov/marc/authority/
8use serde::{Deserialize, Serialize};
9use std::convert::TryFrom;
10use std::fmt;
11use std::str::FromStr;
12
13use thiserror::Error;
14
15// use crate::arrow::types::ArrowTypeWrapper;
16
17/// An indicator or subfield code.
18#[derive(Debug, Deserialize, Serialize, Clone, Copy, Default, PartialEq, Eq, Hash)]
19#[repr(transparent)]
20#[serde(transparent)]
21pub struct Code {
22    value: u8,
23}
24
25/// Error parsing a code
26#[derive(Error, Debug)]
27pub enum ParseCodeError {
28    /// Error parsing a character
29    #[error("failed to parse character: {0}")]
30    CharError(#[from] std::char::ParseCharError),
31    /// Error converting to valid bounds
32    #[error("character not an ASCII character: {0}")]
33    IntError(#[from] std::num::TryFromIntError),
34}
35
36/// A MARC record.
37#[derive(Debug, Clone)]
38pub struct MARCRecord {
39    pub leader: String,
40    pub control: Vec<ControlField>,
41    pub fields: Vec<Field>,
42}
43
44/// A control field (00X) in a MARC record.
45#[derive(Debug, Clone, Default)]
46pub struct ControlField {
47    pub tag: i8,
48    pub content: String,
49}
50
51/// A field in a MARC record.
52#[derive(Debug, Clone, Default)]
53pub struct Field {
54    pub tag: i16,
55    pub ind1: Code,
56    pub ind2: Code,
57    pub subfields: Vec<Subfield>,
58}
59
60/// A subfield in a MARC record.
61#[derive(Debug, Clone)]
62pub struct Subfield {
63    pub code: Code,
64    pub content: String,
65}
66
67impl MARCRecord {
68    /// Get the MARC control number.
69    pub fn marc_control<'a>(&'a self) -> Option<&'a str> {
70        for cf in &self.control {
71            if cf.tag == 1 {
72                return Some(cf.content.trim());
73            }
74        }
75
76        None
77    }
78
79    /// Get the LCCN.
80    pub fn lccn<'a>(&'a self) -> Option<&'a str> {
81        for df in &self.fields {
82            if df.tag == 10 {
83                for sf in &df.subfields {
84                    if sf.code == 'a' {
85                        return Some(sf.content.trim());
86                    }
87                }
88            }
89        }
90
91        None
92    }
93
94    /// Get the record status.
95    pub fn rec_status(&self) -> Option<u8> {
96        if self.leader.len() > 5 {
97            Some(self.leader.as_bytes()[5])
98        } else {
99            None
100        }
101    }
102
103    /// Get the record type.
104    pub fn rec_type(&self) -> Option<u8> {
105        if self.leader.len() > 6 {
106            Some(self.leader.as_bytes()[6])
107        } else {
108            None
109        }
110    }
111
112    /// Get the record bibliographic level.
113    pub fn rec_bib_level(&self) -> Option<u8> {
114        if self.leader.len() > 7 {
115            Some(self.leader.as_bytes()[7])
116        } else {
117            None
118        }
119    }
120
121    /// Get the government publication code
122    pub fn gov_pub_code(&self) -> Option<u8> {
123        for cf in &self.control {
124            if cf.tag == 8 {
125                if cf.content.len() > 28 {
126                    return Some(cf.content.as_bytes()[28]);
127                } else {
128                    return None;
129                }
130            }
131        }
132
133        None
134    }
135
136    /// Query whether this record is a book.
137    ///
138    /// A record is a book if it meets the following:
139    /// - MARC type a or t
140    /// - Not a government document
141    pub fn is_book(&self) -> bool {
142        let ty = self.rec_type().unwrap_or_default();
143        if ty == b'a' || ty == b't' {
144            match self.gov_pub_code() {
145                None | Some(b' ') | Some(b'|') => true,
146                _ => false, // government document
147            }
148        } else {
149            false
150        }
151    }
152}
153
154impl From<u8> for Code {
155    #[inline]
156    fn from(value: u8) -> Code {
157        Code { value }
158    }
159}
160
161impl From<char> for Code {
162    #[inline]
163    fn from(mut value: char) -> Code {
164        assert!(value.is_ascii(), "value must be ASCII");
165        // unify blanks
166        if value == '|' {
167            value = ' ';
168        }
169        Code { value: value as u8 }
170    }
171}
172
173impl From<&Code> for char {
174    #[inline]
175    fn from(c: &Code) -> char {
176        c.value as char
177    }
178}
179
180impl From<Code> for char {
181    #[inline]
182    fn from(c: Code) -> char {
183        c.value as char
184    }
185}
186
187impl From<&Code> for u8 {
188    #[inline]
189    fn from(c: &Code) -> u8 {
190        c.value
191    }
192}
193
194impl From<Code> for u8 {
195    #[inline]
196    fn from(c: Code) -> u8 {
197        c.value
198    }
199}
200
201impl PartialEq<char> for Code {
202    #[inline]
203    fn eq(&self, other: &char) -> bool {
204        let c: char = self.into();
205        c == *other
206    }
207}
208
209impl FromStr for Code {
210    type Err = ParseCodeError;
211
212    fn from_str(s: &str) -> Result<Code, Self::Err> {
213        let c = char::from_str(s)?;
214        let v: u8 = u8::try_from(c as u32)?;
215        Ok(Code { value: v })
216    }
217}
218
219impl fmt::Display for Code {
220    fn fmt(&self, f: &mut fmt::Formatter<'_>) -> fmt::Result {
221        if self.value == 0 {
222            f.write_str("∅")
223        } else {
224            (self.value as char).fmt(f)
225        }
226    }
227}