bookdata/cleaning/
strings.rs

1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
//! Utilities for cleaning strings.
use std::iter::FromIterator;

use std::borrow::Cow;
use unicode_normalization::*;

/// Normalize Unicode character representations in a string.
pub fn norm_unicode<'a>(s: &'a str) -> Cow<'a, str> {
    if is_nfd_quick(s.chars()) == IsNormalized::Yes {
        s.into()
    } else {
        String::from_iter(s.nfd()).into()
    }
}

#[test]
fn test_nu_empty() {
    let text = "";
    let res = norm_unicode(&text);
    assert_eq!(res.as_ref(), "");
}

#[test]
fn test_nu_basic() {
    let text = "foo";
    let res = norm_unicode(&text);
    assert_eq!(res.as_ref(), "foo");
}

#[test]
fn test_nu_metal() {
    let text = "metäl";
    let res = norm_unicode(&text);
    assert_eq!(res.as_ref(), "meta\u{0308}l");
}