monzo-ingestion/src/ingestion/ingestion_logic.rs
2024-08-09 09:43:10 +01:00

232 lines
7.7 KiB
Rust

use crate::ingestion::db::Insertion;
use anyhow::{anyhow, Context};
use chrono::{DateTime, NaiveDate, NaiveDateTime, NaiveTime};
use csv::StringRecord;
use entity::expenditure::ActiveModel;
use entity::transaction;
use num_traits::FromPrimitive;
use sea_orm::prelude::Decimal;
use sea_orm::IntoActiveModel;
use serde_json::Value;
use std::hash::Hash;
#[allow(dead_code)]
pub(crate) mod headings {
pub const TRANSACTION_ID: usize = 0;
pub const DATE: usize = 1;
pub const TIME: usize = 2;
pub const TYPE: usize = 3;
pub const NAME: usize = 4;
pub const EMOJI: usize = 5;
pub const CATEGORY: usize = 6;
pub const AMOUNT: usize = 7;
pub const CURRENCY: usize = 8;
pub const LOCAL_AMOUNT: usize = 9;
pub const LOCAL_CURRENCY: usize = 10;
pub const NOTES_AND_TAGS: usize = 11;
pub const ADDRESS: usize = 12;
pub const RECEIPT: usize = 13;
pub const DESCRIPTION: usize = 14;
pub const CATEGORY_SPLIT: usize = 15;
}
#[derive(Debug, Eq, PartialEq, Hash, Clone)]
pub struct MonzoRow {
pub category_split: Option<String>,
pub primary_category: String,
pub total_amount: Decimal,
pub receipt: Option<String>,
pub notes: Option<String>,
pub emoji: Option<String>,
pub description: Option<String>,
pub transaction_type: String,
pub title: Option<String>,
pub timestamp: NaiveDateTime,
pub transaction_id: String,
}
impl MonzoRow {
fn parse_section(monzo_transaction_id: &str, section: &str) -> anyhow::Result<ActiveModel> {
let mut components = section.split(':');
let category: String = components
.next()
.context("Missing Missing category")?
.to_string();
let amount = components
.next()
.context("Missing amount")?
.parse::<Decimal>()?;
Ok(entity::expenditure::Model {
transaction_id: monzo_transaction_id.to_string(),
category,
amount,
}
.into_active_model())
}
/// Compute a hash of this row, returning the number as an i64 to be used as a unique constraint
/// in the database.
pub fn compute_hash(&self) -> i64 {
use std::collections::hash_map::DefaultHasher;
use std::hash::Hasher;
let mut hasher = DefaultHasher::new();
self.hash(&mut hasher);
hasher.finish() as i64
}
pub fn into_insertion(self) -> Result<Insertion, anyhow::Error> {
let identity_hash = self.compute_hash();
let expenditures: Vec<_> = match &self.category_split {
Some(split) if !split.is_empty() => split
.split(',')
.map(|section| Self::parse_section(&self.transaction_id, section))
.collect::<Result<Vec<_>, anyhow::Error>>()?,
_ => vec![entity::expenditure::Model {
category: self.primary_category.clone(),
amount: self.total_amount,
transaction_id: self.transaction_id.clone(),
}
.into_active_model()],
};
Ok(Insertion {
transaction: transaction::Model {
id: self.transaction_id,
transaction_type: self.transaction_type,
timestamp: self.timestamp,
title: self.title,
emoji: self.emoji,
notes: self.notes,
receipt: self.receipt,
total_amount: self.total_amount,
description: self.description,
identity_hash: Some(identity_hash),
}
.into_active_model(),
contained_expenditures: expenditures,
identity_hash,
})
}
}
fn json_opt(value: &Value) -> Option<String> {
match value {
Value::String(string) if string.is_empty() => None,
Value::String(string) => Some(string.to_string()),
_ => None,
}
}
fn json_required_str(value: &Value, label: &str) -> anyhow::Result<String> {
match value {
Value::String(string) if string.is_empty() => Err(anyhow!("{} is empty", label)),
Value::String(string) => Ok(string.to_string()),
_ => Err(anyhow!("{} is not a string", label)),
}
}
fn parse_timestamp(date: &str, time: &str) -> anyhow::Result<NaiveDateTime> {
let date = NaiveDate::parse_from_str(date, "%Y-%m-%d")?;
let time = NaiveTime::parse_from_str(time, "%H:%M:%S")?;
Ok(date.and_time(time))
}
pub fn from_json_row(row: Vec<Value>) -> anyhow::Result<MonzoRow> {
let date = DateTime::parse_from_rfc3339(row[headings::DATE].as_str().context("No date")?)
.context("Failed to parse date")?;
let time = DateTime::parse_from_rfc3339(row[headings::TIME].as_str().context("No time")?)
.context("Failed to parse date")?
.time();
let timestamp = date.date_naive().and_time(time);
let total_amount = Decimal::from_f64(row[headings::AMOUNT].as_f64().context("No amount")?)
.context("Failed to parse date")?;
Ok(MonzoRow {
transaction_id: json_required_str(&row[headings::TRANSACTION_ID], "Transaction ID")?,
title: json_opt(&row[headings::NAME]),
transaction_type: json_required_str(&row[headings::TYPE], "Transaction type")?,
description: json_opt(&row[headings::DESCRIPTION]),
emoji: json_opt(&row[headings::EMOJI]),
notes: json_opt(&row[headings::NOTES_AND_TAGS]),
receipt: json_opt(&row[headings::RECEIPT]),
primary_category: json_required_str(&row[headings::CATEGORY], "Primary Category")?,
category_split: json_opt(&row[headings::CATEGORY_SPLIT]),
total_amount,
timestamp,
})
}
#[test]
fn test_json() {
let json = include_str!("../../fixtures/transactions.json");
let csv = include_str!("../../fixtures/transactions.csv");
let json: Vec<Vec<Value>> = serde_json::from_str(json).unwrap();
let mut csv_reader = csv::Reader::from_reader(csv.as_bytes());
let json_rows = json
.iter()
.map(|row| from_json_row(row.clone()))
.collect::<Result<Vec<_>, anyhow::Error>>()
.unwrap();
let csv_rows = csv_reader
.records()
.map(|record| from_csv_row(record.unwrap()))
.collect::<Result<Vec<_>, anyhow::Error>>()
.unwrap();
assert_eq!(csv_rows.len(), json_rows.len(), "Different number of rows");
for (i, (json_row, csv_row)) in json_rows.iter().zip(csv_rows.iter()).enumerate() {
assert_eq!(json_row, csv_row, "Row {} is different", i);
assert_eq!(
json_row.compute_hash(),
csv_row.compute_hash(),
"Row {} hash are different",
i
);
}
}
fn csv_opt(s: &str) -> Option<String> {
match s {
"" => None,
v => Some(v.to_string()),
}
}
pub fn from_csv_row(row: StringRecord) -> anyhow::Result<MonzoRow> {
let date = NaiveDate::parse_from_str(&row[headings::DATE], "%d/%m/%Y")
.context("Failed to parse date from csv")?;
let time = NaiveTime::parse_from_str(&row[headings::TIME], "%H:%M:%S")
.context("Failed to parse time from csv")?;
let timestamp = NaiveDateTime::new(date, time);
Ok(MonzoRow {
timestamp,
transaction_id: row[headings::TRANSACTION_ID].to_string(),
title: csv_opt(&row[headings::NAME]),
transaction_type: row[headings::TYPE].to_string(),
description: csv_opt(&row[headings::DESCRIPTION]),
emoji: csv_opt(&row[headings::EMOJI]),
notes: csv_opt(&row[headings::NOTES_AND_TAGS]),
receipt: csv_opt(&row[headings::RECEIPT]),
total_amount: row[headings::AMOUNT].parse::<Decimal>()?,
category_split: csv_opt(&row[headings::CATEGORY_SPLIT]),
primary_category: row[headings::CATEGORY].to_string(),
})
}