232 lines
7.7 KiB
Rust
232 lines
7.7 KiB
Rust
use crate::ingestion::db::Insertion;
|
|
use anyhow::{anyhow, Context};
|
|
use chrono::{DateTime, NaiveDate, NaiveDateTime, NaiveTime};
|
|
use csv::StringRecord;
|
|
use entity::expenditure::ActiveModel;
|
|
use entity::transaction;
|
|
use num_traits::FromPrimitive;
|
|
use sea_orm::prelude::Decimal;
|
|
use sea_orm::IntoActiveModel;
|
|
use serde_json::Value;
|
|
use std::hash::Hash;
|
|
|
|
#[allow(dead_code)]
|
|
pub(crate) mod headings {
|
|
pub const TRANSACTION_ID: usize = 0;
|
|
pub const DATE: usize = 1;
|
|
pub const TIME: usize = 2;
|
|
pub const TYPE: usize = 3;
|
|
pub const NAME: usize = 4;
|
|
pub const EMOJI: usize = 5;
|
|
pub const CATEGORY: usize = 6;
|
|
pub const AMOUNT: usize = 7;
|
|
pub const CURRENCY: usize = 8;
|
|
pub const LOCAL_AMOUNT: usize = 9;
|
|
pub const LOCAL_CURRENCY: usize = 10;
|
|
pub const NOTES_AND_TAGS: usize = 11;
|
|
pub const ADDRESS: usize = 12;
|
|
pub const RECEIPT: usize = 13;
|
|
pub const DESCRIPTION: usize = 14;
|
|
pub const CATEGORY_SPLIT: usize = 15;
|
|
}
|
|
|
|
#[derive(Debug, Eq, PartialEq, Hash, Clone)]
|
|
pub struct MonzoRow {
|
|
pub category_split: Option<String>,
|
|
pub primary_category: String,
|
|
pub total_amount: Decimal,
|
|
pub receipt: Option<String>,
|
|
pub notes: Option<String>,
|
|
pub emoji: Option<String>,
|
|
pub description: Option<String>,
|
|
pub transaction_type: String,
|
|
pub title: Option<String>,
|
|
pub timestamp: NaiveDateTime,
|
|
pub transaction_id: String,
|
|
}
|
|
|
|
impl MonzoRow {
|
|
fn parse_section(monzo_transaction_id: &str, section: &str) -> anyhow::Result<ActiveModel> {
|
|
let mut components = section.split(':');
|
|
let category: String = components
|
|
.next()
|
|
.context("Missing Missing category")?
|
|
.to_string();
|
|
|
|
let amount = components
|
|
.next()
|
|
.context("Missing amount")?
|
|
.parse::<Decimal>()?;
|
|
|
|
Ok(entity::expenditure::Model {
|
|
transaction_id: monzo_transaction_id.to_string(),
|
|
category,
|
|
amount,
|
|
}
|
|
.into_active_model())
|
|
}
|
|
|
|
/// Compute a hash of this row, returning the number as an i64 to be used as a unique constraint
|
|
/// in the database.
|
|
pub fn compute_hash(&self) -> i64 {
|
|
use std::collections::hash_map::DefaultHasher;
|
|
use std::hash::Hasher;
|
|
|
|
let mut hasher = DefaultHasher::new();
|
|
self.hash(&mut hasher);
|
|
hasher.finish() as i64
|
|
}
|
|
|
|
pub fn into_insertion(self) -> Result<Insertion, anyhow::Error> {
|
|
let identity_hash = self.compute_hash();
|
|
|
|
let expenditures: Vec<_> = match &self.category_split {
|
|
Some(split) if !split.is_empty() => split
|
|
.split(',')
|
|
.map(|section| Self::parse_section(&self.transaction_id, section))
|
|
.collect::<Result<Vec<_>, anyhow::Error>>()?,
|
|
|
|
_ => vec![entity::expenditure::Model {
|
|
category: self.primary_category.clone(),
|
|
amount: self.total_amount,
|
|
transaction_id: self.transaction_id.clone(),
|
|
}
|
|
.into_active_model()],
|
|
};
|
|
|
|
Ok(Insertion {
|
|
transaction: transaction::Model {
|
|
id: self.transaction_id,
|
|
transaction_type: self.transaction_type,
|
|
timestamp: self.timestamp,
|
|
title: self.title,
|
|
emoji: self.emoji,
|
|
notes: self.notes,
|
|
receipt: self.receipt,
|
|
total_amount: self.total_amount,
|
|
description: self.description,
|
|
identity_hash: Some(identity_hash),
|
|
}
|
|
.into_active_model(),
|
|
|
|
contained_expenditures: expenditures,
|
|
identity_hash,
|
|
})
|
|
}
|
|
}
|
|
|
|
fn json_opt(value: &Value) -> Option<String> {
|
|
match value {
|
|
Value::String(string) if string.is_empty() => None,
|
|
Value::String(string) => Some(string.to_string()),
|
|
_ => None,
|
|
}
|
|
}
|
|
|
|
fn json_required_str(value: &Value, label: &str) -> anyhow::Result<String> {
|
|
match value {
|
|
Value::String(string) if string.is_empty() => Err(anyhow!("{} is empty", label)),
|
|
Value::String(string) => Ok(string.to_string()),
|
|
_ => Err(anyhow!("{} is not a string", label)),
|
|
}
|
|
}
|
|
|
|
fn parse_timestamp(date: &str, time: &str) -> anyhow::Result<NaiveDateTime> {
|
|
let date = NaiveDate::parse_from_str(date, "%Y-%m-%d")?;
|
|
let time = NaiveTime::parse_from_str(time, "%H:%M:%S")?;
|
|
|
|
Ok(date.and_time(time))
|
|
}
|
|
|
|
pub fn from_json_row(row: Vec<Value>) -> anyhow::Result<MonzoRow> {
|
|
let date = DateTime::parse_from_rfc3339(row[headings::DATE].as_str().context("No date")?)
|
|
.context("Failed to parse date")?;
|
|
|
|
let time = DateTime::parse_from_rfc3339(row[headings::TIME].as_str().context("No time")?)
|
|
.context("Failed to parse date")?
|
|
.time();
|
|
|
|
let timestamp = date.date_naive().and_time(time);
|
|
|
|
let total_amount = Decimal::from_f64(row[headings::AMOUNT].as_f64().context("No amount")?)
|
|
.context("Failed to parse date")?;
|
|
|
|
Ok(MonzoRow {
|
|
transaction_id: json_required_str(&row[headings::TRANSACTION_ID], "Transaction ID")?,
|
|
title: json_opt(&row[headings::NAME]),
|
|
transaction_type: json_required_str(&row[headings::TYPE], "Transaction type")?,
|
|
description: json_opt(&row[headings::DESCRIPTION]),
|
|
emoji: json_opt(&row[headings::EMOJI]),
|
|
notes: json_opt(&row[headings::NOTES_AND_TAGS]),
|
|
receipt: json_opt(&row[headings::RECEIPT]),
|
|
primary_category: json_required_str(&row[headings::CATEGORY], "Primary Category")?,
|
|
category_split: json_opt(&row[headings::CATEGORY_SPLIT]),
|
|
total_amount,
|
|
timestamp,
|
|
})
|
|
}
|
|
|
|
#[test]
|
|
fn test_json() {
|
|
let json = include_str!("../../fixtures/transactions.json");
|
|
let csv = include_str!("../../fixtures/transactions.csv");
|
|
|
|
let json: Vec<Vec<Value>> = serde_json::from_str(json).unwrap();
|
|
let mut csv_reader = csv::Reader::from_reader(csv.as_bytes());
|
|
|
|
let json_rows = json
|
|
.iter()
|
|
.map(|row| from_json_row(row.clone()))
|
|
.collect::<Result<Vec<_>, anyhow::Error>>()
|
|
.unwrap();
|
|
|
|
let csv_rows = csv_reader
|
|
.records()
|
|
.map(|record| from_csv_row(record.unwrap()))
|
|
.collect::<Result<Vec<_>, anyhow::Error>>()
|
|
.unwrap();
|
|
|
|
assert_eq!(csv_rows.len(), json_rows.len(), "Different number of rows");
|
|
|
|
for (i, (json_row, csv_row)) in json_rows.iter().zip(csv_rows.iter()).enumerate() {
|
|
assert_eq!(json_row, csv_row, "Row {} is different", i);
|
|
assert_eq!(
|
|
json_row.compute_hash(),
|
|
csv_row.compute_hash(),
|
|
"Row {} hash are different",
|
|
i
|
|
);
|
|
}
|
|
}
|
|
|
|
fn csv_opt(s: &str) -> Option<String> {
|
|
match s {
|
|
"" => None,
|
|
v => Some(v.to_string()),
|
|
}
|
|
}
|
|
|
|
pub fn from_csv_row(row: StringRecord) -> anyhow::Result<MonzoRow> {
|
|
let date = NaiveDate::parse_from_str(&row[headings::DATE], "%d/%m/%Y")
|
|
.context("Failed to parse date from csv")?;
|
|
|
|
let time = NaiveTime::parse_from_str(&row[headings::TIME], "%H:%M:%S")
|
|
.context("Failed to parse time from csv")?;
|
|
|
|
let timestamp = NaiveDateTime::new(date, time);
|
|
|
|
Ok(MonzoRow {
|
|
timestamp,
|
|
transaction_id: row[headings::TRANSACTION_ID].to_string(),
|
|
title: csv_opt(&row[headings::NAME]),
|
|
transaction_type: row[headings::TYPE].to_string(),
|
|
description: csv_opt(&row[headings::DESCRIPTION]),
|
|
emoji: csv_opt(&row[headings::EMOJI]),
|
|
notes: csv_opt(&row[headings::NOTES_AND_TAGS]),
|
|
receipt: csv_opt(&row[headings::RECEIPT]),
|
|
total_amount: row[headings::AMOUNT].parse::<Decimal>()?,
|
|
category_split: csv_opt(&row[headings::CATEGORY_SPLIT]),
|
|
primary_category: row[headings::CATEGORY].to_string(),
|
|
})
|
|
}
|