1use lazy_static::lazy_static;
14use regex::RegexSet;
15
16use crate::util::unicode::NONSPACING_MARK;
17
18#[derive(Debug, PartialEq)]
20pub struct ISBN {
21 pub text: String,
22 pub tags: Vec<String>,
23}
24
25#[derive(Debug, PartialEq)]
27pub enum ParseResult {
28 Valid(Vec<ISBN>, String),
30 Ignored(String),
32 Unmatched(String),
34}
35
36static IGNORES: &'static [&'static str] = &[
39 r"^[$]?[[:digit:]., ]+(?:[a-zA-Z*]{1,4})?(\s+\(.*?\))?$",
40 r"^[[:digit:].]+(/[[:digit:].]+)+$",
41 r"^[A-Z]-[A-Z]-\d{8,}",
42 r"^\s*$",
43];
44
45lazy_static! {
46 static ref IGNORE_RE: RegexSet = RegexSet::new(IGNORES).unwrap();
47}
48
49peg::parser! {
50 grammar isbn_parser() for str {
51 rule space() = quiet!{[' ' | '\n' | '\r' | '\t']}
52
53 rule lead() =
54 [';' | '.']? space()* prefix()?
55 rule prefix()
56 = ['a'..='z'] space()+
57 / "(" ['0'..='9']+ ")" space()+
58 / "*"
59 / "ISBN" space()+
60
61 rule single_tag() -> String = s:$([^ ':' | ')' | ']']+) { s.trim().to_string() }
62
63 rule tags() -> Vec<String>
64 = space()* ['[' | '('] tags:(single_tag() ** ":") [']' | ')'] { tags }
65
66 rule tail_skip() = space()* [';' | ':' | '/' | '.']?
67
68 rule digit_char() -> char
70 = mc:['0'..='9' | '-' | 'O'] { mc }
71 / mc:[c if NONSPACING_MARK.contains(c)] { mc }
72
73 rule inner_junk() = ['a'..='z' | 'A'..='Z']+ / [' ' | '+']
75
76 rule isbn_text() -> String
77 = s:$(digit_char()*<8,> ['X' | 'x']?) { clean_isbn_chars(s) }
78 / s:$(['0'..='9']*<1,5> inner_junk() ['0'..='9' | '-']*<4,>) { clean_isbn_chars(s) }
79
80 rule isbn() -> ISBN
81 = lead() i:isbn_text() tags:tags()* { ISBN {
82 text: i,
83 tags: tags.into_iter().flatten().collect()
84 }}
85
86 pub rule parse_isbns() -> (Vec<ISBN>, String)
87 = v:(isbn() ** tail_skip()) tail_skip() t:$([_]*) { (v, t.into()) }
88 }
89}
90
91pub fn clean_isbn_chars(isbn: &str) -> String {
96 let mut vec = Vec::with_capacity(isbn.len());
97 let bytes = isbn.as_bytes();
98 for i in 0..bytes.len() {
99 let b = bytes[i];
100 if b.is_ascii_digit() || b == b'X' {
101 vec.push(b)
102 } else if b == b'x' {
103 vec.push(b'X')
104 } else if b == b'O' {
105 vec.push(b'0')
106 }
107 }
108 unsafe {
109 String::from_utf8_unchecked(vec)
111 }
112}
113
114pub fn clean_asin_chars(isbn: &str) -> String {
116 let mut vec = Vec::with_capacity(isbn.len());
117 let bytes = isbn.as_bytes();
118 for i in 0..bytes.len() {
119 let b = bytes[i];
120 if b.is_ascii_alphanumeric() {
121 vec.push(b.to_ascii_uppercase())
122 }
123 }
124 unsafe {
125 String::from_utf8_unchecked(vec)
127 }
128}
129
130pub fn parse_isbn_string(s: &str) -> ParseResult {
132 if let Ok((isbns, tail)) = isbn_parser::parse_isbns(s) {
135 if isbns.is_empty() {
136 if IGNORE_RE.is_match(s) {
137 ParseResult::Ignored(s.to_owned())
138 } else {
139 ParseResult::Unmatched(s.to_owned())
140 }
141 } else {
142 ParseResult::Valid(isbns, tail)
143 }
144 } else {
145 ParseResult::Unmatched(s.to_owned())
146 }
147}
148
149#[test]
150fn test_parse_empty() {
151 let res = parse_isbn_string("");
152 assert_eq!(res, ParseResult::Ignored("".to_owned()));
153}
154
155#[test]
156fn test_parse_ws() {
157 let res = parse_isbn_string(" ");
158 assert_eq!(res, ParseResult::Ignored(" ".to_owned()));
159}
160
161#[test]
162fn test_parse_isbn() {
163 let isbn = "349224010X";
164
165 let res = parse_isbn_string(isbn);
166 match res {
167 ParseResult::Valid(isbns, trail) => {
168 assert_eq!(isbns.len(), 1);
169 assert_eq!(isbns[0].text, isbn);
170 assert_eq!(isbns[0].tags.len(), 0);
171 assert_eq!(trail, "");
172 }
173 x => panic!("bad parse: {:?}", x),
174 }
175}
176
177#[test]
178fn test_parse_isbn_trail() {
179 let src = "349224010X :";
180 let isbn = "349224010X";
181
182 let res = parse_isbn_string(src);
183 match res {
184 ParseResult::Valid(isbns, trail) => {
185 assert_eq!(isbns.len(), 1);
186 assert_eq!(isbns[0].text, isbn);
187 assert_eq!(isbns[0].tags.len(), 0);
188 assert_eq!(trail, "");
189 }
190 x => panic!("bad parse: {:?}", x),
191 }
192}
193
194#[test]
195fn test_parse_isbn_caron() {
196 let src = "349̌224010X";
197 let isbn = "349224010X";
198
199 let res = parse_isbn_string(src);
200 match res {
201 ParseResult::Valid(isbns, trail) => {
202 assert_eq!(isbns.len(), 1);
203 assert_eq!(isbns[0].text, isbn);
204 assert_eq!(isbns[0].tags.len(), 0);
205 assert_eq!(trail, "");
206 }
207 x => panic!("bad parse: {:?}", x),
208 }
209}
210
211#[test]
212fn test_parse_hyphen_isbn() {
213 let src = "978-03-2948-9391";
214 let isbn = "9780329489391";
215
216 let res = parse_isbn_string(src);
217 match res {
218 ParseResult::Valid(isbns, trail) => {
219 assert_eq!(isbns.len(), 1);
220 assert_eq!(isbns[0].text, isbn);
221 assert_eq!(isbns[0].tags.len(), 0);
222 assert_eq!(trail, "");
223 }
224 x => panic!("bad parse: {:?}", x),
225 }
226}
227
228#[test]
229fn test_parse_space_isbn() {
230 let src = "978 032948-9391";
231 let isbn = "9780329489391";
232
233 let res = parse_isbn_string(src);
234 match res {
235 ParseResult::Valid(isbns, trail) => {
236 assert_eq!(isbns.len(), 1);
237 assert_eq!(isbns[0].text, isbn);
238 assert_eq!(isbns[0].tags.len(), 0);
239 assert_eq!(trail, "");
240 }
241 x => panic!("bad parse: {:?}", x),
242 }
243}
244
245#[test]
246fn test_parse_isbn_tag() {
247 let src = "34922401038 (set)";
248 let isbn = "34922401038";
249 let tag = "set";
250
251 let res = parse_isbn_string(src);
252 match res {
253 ParseResult::Valid(isbns, trail) => {
254 assert_eq!(isbns.len(), 1);
255 assert_eq!(isbns[0].text, isbn);
256 assert_eq!(isbns[0].tags, vec![tag]);
257 assert_eq!(trail, "");
258 }
259 x => panic!("bad parse: {:?}", x),
260 }
261}
262
263#[test]
264fn test_parse_isbn_square_tag() {
265 let src = "34922401038 [set]";
266 let isbn = "34922401038";
267 let tag = "set";
268
269 let res = parse_isbn_string(src);
270 match res {
271 ParseResult::Valid(isbns, trail) => {
272 assert_eq!(isbns.len(), 1);
273 assert_eq!(isbns[0].text, isbn);
274 assert_eq!(isbns[0].tags, vec![tag]);
275 assert_eq!(trail, "");
276 }
277 x => panic!("bad parse: {:?}", x),
278 }
279}
280
281#[test]
282fn test_parse_isbn_multi_tag_sep() {
283 let src = "34922401038 (set : alk. paper)";
284 let isbn = "34922401038";
285
286 let res = parse_isbn_string(src);
287 match res {
288 ParseResult::Valid(isbns, trail) => {
289 assert_eq!(isbns.len(), 1);
290 assert_eq!(isbns[0].text, isbn);
291 assert_eq!(isbns[0].tags, vec!["set", "alk. paper"]);
292 assert_eq!(trail, "");
293 }
294 x => panic!("bad parse: {:?}", x),
295 }
296}
297
298#[test]
299fn test_parse_isbn_tags() {
300 let src = "34922401038 (pbk.) (set)";
301 let isbn = "34922401038";
302
303 let res = parse_isbn_string(src);
304 match res {
305 ParseResult::Valid(isbns, trail) => {
306 assert_eq!(isbns.len(), 1);
307 assert_eq!(isbns[0].text, isbn);
308 assert_eq!(isbns[0].tags, vec!["pbk.", "set"]);
309 assert_eq!(trail, "");
310 }
311 x => panic!("bad parse: {:?}", x),
312 }
313}
314
315#[test]
316fn test_parse_isbn_leader() {
317 let src = "a 970238408138";
318 let isbn = "970238408138";
319
320 let res = parse_isbn_string(src);
321 match res {
322 ParseResult::Valid(isbns, trail) => {
323 assert_eq!(isbns.len(), 1);
324 assert_eq!(isbns[0].text, isbn);
325 assert_eq!(isbns[0].tags.len(), 0);
326 assert_eq!(trail, "");
327 }
328 x => panic!("bad parse: {:?}", x),
329 }
330}
331
332#[test]
333fn test_parse_two_isbns_ws() {
334 let src = "970238408138 30148100103";
335 let isbn1 = "970238408138";
336 let isbn2 = "30148100103";
337
338 let res = parse_isbn_string(src);
339 match res {
340 ParseResult::Valid(isbns, trail) => {
341 assert_eq!(isbns.len(), 2);
342 assert_eq!(isbns[0].text, isbn1);
343 assert_eq!(isbns[0].tags.len(), 0);
344 assert_eq!(isbns[1].text, isbn2);
345 assert_eq!(isbns[1].tags.len(), 0);
346 assert_eq!(trail, "");
347 }
348 x => panic!("bad parse: {:?}", x),
349 }
350}
351
352#[test]
353fn test_parse_two_isbns_semi() {
354 let src = "970238408138; ISBN 30148100103";
355 let isbn1 = "970238408138";
356 let isbn2 = "30148100103";
357
358 let res = parse_isbn_string(src);
359 match res {
360 ParseResult::Valid(isbns, trail) => {
361 assert_eq!(isbns.len(), 2);
362 assert_eq!(isbns[0].text, isbn1);
363 assert_eq!(isbns[0].tags.len(), 0);
364 assert_eq!(isbns[1].text, isbn2);
365 assert_eq!(isbns[1].tags.len(), 0);
366 assert_eq!(trail, "");
367 }
368 x => panic!("bad parse: {:?}", x),
369 }
370}
371
372#[test]
373fn test_parse_two_isbns_real() {
374 let src = "8719359022. ISBN 8719359004 (pbk.)";
376 let isbn1 = "8719359022";
377 let isbn2 = "8719359004";
378
379 let res = parse_isbn_string(src);
380 match res {
381 ParseResult::Valid(isbns, trail) => {
382 assert_eq!(isbns.len(), 2);
383 assert_eq!(isbns[0].text, isbn1);
384 assert_eq!(isbns[0].tags.len(), 0);
385 assert_eq!(isbns[1].text, isbn2);
386 assert_eq!(isbns[1].tags, vec!["pbk."]);
387 assert_eq!(trail, "");
388 }
389 x => panic!("bad parse: {:?}", x),
390 }
391}
392
393#[test]
394pub fn test_parse_isbn_junk_colon() {
395 let src = "95l3512401 :";
396 let isbn = "953512401";
397 let isbns = parse_isbn_string(src);
398 if let ParseResult::Valid(isbns, _tail) = isbns {
399 assert_eq!(isbns.len(), 1);
400 assert_eq!(&isbns[0].text, isbn);
401 } else {
402 panic!("failed to parse {}: {:?}", src, isbns);
403 }
404}
405
406#[test]
407pub fn test_parse_isbn_oh() {
408 let src = "O882970208 (pbk.)";
409 let isbn = "0882970208";
410
411 let isbns = parse_isbn_string(src);
412 if let ParseResult::Valid(isbns, _tail) = isbns {
413 assert_eq!(isbns.len(), 1);
414 assert_eq!(&isbns[0].text, isbn);
415 } else {
416 panic!("failed to parse {}: {:?}", src, isbns);
417 }
418}