use crate::ingestion::db::Insertion; use anyhow::{anyhow, Context}; use chrono::{DateTime, NaiveDate, NaiveDateTime, NaiveTime}; use csv::StringRecord; use entity::expenditure::ActiveModel; use entity::transaction; use num_traits::FromPrimitive; use sea_orm::prelude::Decimal; use sea_orm::IntoActiveModel; use serde_json::Value; use std::hash::Hash; #[allow(dead_code)] pub(crate) mod headings { pub const TRANSACTION_ID: usize = 0; pub const DATE: usize = 1; pub const TIME: usize = 2; pub const TYPE: usize = 3; pub const NAME: usize = 4; pub const EMOJI: usize = 5; pub const CATEGORY: usize = 6; pub const AMOUNT: usize = 7; pub const CURRENCY: usize = 8; pub const LOCAL_AMOUNT: usize = 9; pub const LOCAL_CURRENCY: usize = 10; pub const NOTES_AND_TAGS: usize = 11; pub const ADDRESS: usize = 12; pub const RECEIPT: usize = 13; pub const DESCRIPTION: usize = 14; pub const CATEGORY_SPLIT: usize = 15; } #[derive(Debug, Eq, PartialEq, Hash, Clone)] pub struct MonzoRow { pub category_split: Option, pub primary_category: String, pub total_amount: Decimal, pub receipt: Option, pub notes: Option, pub emoji: Option, pub description: Option, pub transaction_type: String, pub title: Option, pub timestamp: NaiveDateTime, pub transaction_id: String, } impl MonzoRow { fn parse_section(monzo_transaction_id: &str, section: &str) -> anyhow::Result { let mut components = section.split(':'); let category: String = components .next() .context("Missing Missing category")? .to_string(); let amount = components .next() .context("Missing amount")? .parse::()?; Ok(entity::expenditure::Model { transaction_id: monzo_transaction_id.to_string(), category, amount, } .into_active_model()) } /// Compute a hash of this row, returning the number as an i64 to be used as a unique constraint /// in the database. pub fn compute_hash(&self) -> i64 { use std::collections::hash_map::DefaultHasher; use std::hash::Hasher; let mut hasher = DefaultHasher::new(); self.hash(&mut hasher); hasher.finish() as i64 } pub fn into_insertion(self) -> Result { let identity_hash = self.compute_hash(); let expenditures: Vec<_> = match &self.category_split { Some(split) if !split.is_empty() => split .split(',') .map(|section| Self::parse_section(&self.transaction_id, section)) .collect::, anyhow::Error>>()?, _ => vec![entity::expenditure::Model { category: self.primary_category.clone(), amount: self.total_amount, transaction_id: self.transaction_id.clone(), } .into_active_model()], }; Ok(Insertion { transaction: transaction::Model { id: self.transaction_id, transaction_type: self.transaction_type, timestamp: self.timestamp, title: self.title, emoji: self.emoji, notes: self.notes, receipt: self.receipt, total_amount: self.total_amount, description: self.description, identity_hash: Some(identity_hash), } .into_active_model(), contained_expenditures: expenditures, identity_hash, }) } } fn json_opt(value: &Value) -> Option { match value { Value::String(string) if string.is_empty() => None, Value::String(string) => Some(string.to_string()), _ => None, } } fn json_required_str(value: &Value, label: &str) -> anyhow::Result { match value { Value::String(string) if string.is_empty() => Err(anyhow!("{} is empty", label)), Value::String(string) => Ok(string.to_string()), _ => Err(anyhow!("{} is not a string", label)), } } fn parse_timestamp(date: &str, time: &str) -> anyhow::Result { let date = NaiveDate::parse_from_str(date, "%Y-%m-%d")?; let time = NaiveTime::parse_from_str(time, "%H:%M:%S")?; Ok(date.and_time(time)) } pub fn from_json_row(row: Vec) -> anyhow::Result { let date = DateTime::parse_from_rfc3339(row[headings::DATE].as_str().context("No date")?) .context("Failed to parse date")?; let time = DateTime::parse_from_rfc3339(row[headings::TIME].as_str().context("No time")?) .context("Failed to parse date")? .time(); let timestamp = date.date_naive().and_time(time); let total_amount = Decimal::from_f64(row[headings::AMOUNT].as_f64().context("No amount")?) .context("Failed to parse date")?; Ok(MonzoRow { transaction_id: json_required_str(&row[headings::TRANSACTION_ID], "Transaction ID")?, title: json_opt(&row[headings::NAME]), transaction_type: json_required_str(&row[headings::TYPE], "Transaction type")?, description: json_opt(&row[headings::DESCRIPTION]), emoji: json_opt(&row[headings::EMOJI]), notes: json_opt(&row[headings::NOTES_AND_TAGS]), receipt: json_opt(&row[headings::RECEIPT]), primary_category: json_required_str(&row[headings::CATEGORY], "Primary Category")?, category_split: json_opt(&row[headings::CATEGORY_SPLIT]), total_amount, timestamp, }) } #[test] fn test_json() { let json = include_str!("../../fixtures/transactions.json"); let csv = include_str!("../../fixtures/transactions.csv"); let json: Vec> = serde_json::from_str(json).unwrap(); let mut csv_reader = csv::Reader::from_reader(csv.as_bytes()); let json_rows = json .iter() .map(|row| from_json_row(row.clone())) .collect::, anyhow::Error>>() .unwrap(); let csv_rows = csv_reader .records() .map(|record| from_csv_row(record.unwrap())) .collect::, anyhow::Error>>() .unwrap(); assert_eq!(csv_rows.len(), json_rows.len(), "Different number of rows"); for (i, (json_row, csv_row)) in json_rows.iter().zip(csv_rows.iter()).enumerate() { assert_eq!(json_row, csv_row, "Row {} is different", i); assert_eq!( json_row.compute_hash(), csv_row.compute_hash(), "Row {} hash are different", i ); } } fn csv_opt(s: &str) -> Option { match s { "" => None, v => Some(v.to_string()), } } pub fn from_csv_row(row: StringRecord) -> anyhow::Result { let date = NaiveDate::parse_from_str(&row[headings::DATE], "%d/%m/%Y") .context("Failed to parse date from csv")?; let time = NaiveTime::parse_from_str(&row[headings::TIME], "%H:%M:%S") .context("Failed to parse time from csv")?; let timestamp = NaiveDateTime::new(date, time); Ok(MonzoRow { timestamp, transaction_id: row[headings::TRANSACTION_ID].to_string(), title: csv_opt(&row[headings::NAME]), transaction_type: row[headings::TYPE].to_string(), description: csv_opt(&row[headings::DESCRIPTION]), emoji: csv_opt(&row[headings::EMOJI]), notes: csv_opt(&row[headings::NOTES_AND_TAGS]), receipt: csv_opt(&row[headings::RECEIPT]), total_amount: row[headings::AMOUNT].parse::()?, category_split: csv_opt(&row[headings::CATEGORY_SPLIT]), primary_category: row[headings::CATEGORY].to_string(), }) }