bookdata/
amazon.rs

1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
//! Structs defining Amazon data sets.
use parquet_derive::ParquetRecordWriter;
use serde::{Deserialize, Serialize};

/// A rating as described in a source CSV file.
#[derive(Serialize, Deserialize)]
pub struct SourceRating {
    pub user: String,
    pub asin: String,
    pub rating: f32,
    pub timestamp: i64,
}

/// A review as it is described in a source JSON file.
#[derive(Serialize, Deserialize)]
pub struct SourceReview {
    #[serde(rename = "reviewerID")]
    pub user: String,
    pub asin: String,
    #[serde(rename = "overall")]
    pub rating: f32,
    #[serde(rename = "unixReviewTime")]
    pub timestamp: i64,
    pub summary: Option<String>,
    #[serde(rename = "reviewText")]
    pub text: Option<String>,
    pub verified: bool,
}

/// Structure for scanned ratings.
///
/// This data structure is serialized to `ratings.parquet` in the Amazon directories.
#[derive(ParquetRecordWriter, Serialize, Deserialize)]
pub struct RatingRow {
    pub user: i32,
    pub asin: String,
    pub rating: f32,
    pub timestamp: i64,
}

/// Structure for scanned reviews.
///
/// This data structure is serialized to `reviews.parquet` in the Amazon directories.
#[derive(ParquetRecordWriter, Serialize, Deserialize)]
pub struct ReviewRow {
    pub user: i32,
    pub asin: String,
    pub rating: f32,
    pub timestamp: i64,
    pub summary: String,
    pub text: String,
}