Move transaction whittling out of the database
All checks were successful
Build and Publish Docker Container / build (push) Successful in 9m56s
All checks were successful
Build and Publish Docker Container / build (push) Successful in 9m56s
This commit is contained in:
parent
3df05b2d9c
commit
4bb9f2813d
@ -6,24 +6,29 @@ pub struct Migration;
|
|||||||
#[async_trait::async_trait]
|
#[async_trait::async_trait]
|
||||||
impl MigrationTrait for Migration {
|
impl MigrationTrait for Migration {
|
||||||
async fn up(&self, manager: &SchemaManager) -> Result<(), DbErr> {
|
async fn up(&self, manager: &SchemaManager) -> Result<(), DbErr> {
|
||||||
manager.alter_table(
|
manager
|
||||||
TableAlterStatement::new()
|
.alter_table(
|
||||||
.table(Transaction::Table)
|
TableAlterStatement::new()
|
||||||
.add_column(
|
.table(Transaction::Table)
|
||||||
ColumnDef::new(Transaction::IdentityHash)
|
.add_column(
|
||||||
.big_integer()
|
ColumnDef::new(Transaction::IdentityHash)
|
||||||
.unique_key(),
|
.big_integer()
|
||||||
).to_owned()
|
.unique_key(),
|
||||||
).await
|
)
|
||||||
|
.to_owned(),
|
||||||
|
)
|
||||||
|
.await
|
||||||
}
|
}
|
||||||
|
|
||||||
async fn down(&self, manager: &SchemaManager) -> Result<(), DbErr> {
|
async fn down(&self, manager: &SchemaManager) -> Result<(), DbErr> {
|
||||||
manager.alter_table(
|
manager
|
||||||
TableAlterStatement::new()
|
.alter_table(
|
||||||
.table(Transaction::Table)
|
TableAlterStatement::new()
|
||||||
.drop_column(Transaction::IdentityHash)
|
.table(Transaction::Table)
|
||||||
.to_owned()
|
.drop_column(Transaction::IdentityHash)
|
||||||
).await
|
.to_owned(),
|
||||||
|
)
|
||||||
|
.await
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
|
|||||||
@ -1,51 +1,17 @@
|
|||||||
use crate::error::AppError;
|
use crate::error::AppError;
|
||||||
|
use crate::ingestion::ingestion_logic::MonzoRow;
|
||||||
use anyhow::anyhow;
|
use anyhow::anyhow;
|
||||||
use entity::{expenditure, transaction};
|
use entity::{expenditure, transaction};
|
||||||
use sea_orm::sea_query::{OnConflict, PostgresQueryBuilder};
|
use sea_orm::sea_query::OnConflict;
|
||||||
use sea_orm::{ColumnTrait, DatabaseConnection, EntityTrait, Iterable, TransactionTrait};
|
|
||||||
use sea_orm::{
|
use sea_orm::{
|
||||||
ConnectionTrait, DatabaseBackend, DatabaseTransaction, DbErr, QueryFilter, QueryTrait,
|
ColumnTrait, DatabaseConnection, EntityTrait, Iterable, QuerySelect, TransactionTrait,
|
||||||
Statement,
|
|
||||||
};
|
};
|
||||||
use crate::ingestion::ingestion_logic::MonzoRow;
|
use sea_orm::{ConnectionTrait, DatabaseTransaction, QueryFilter};
|
||||||
|
|
||||||
pub struct Insertion {
|
pub struct Insertion {
|
||||||
pub transaction: transaction::ActiveModel,
|
pub transaction: transaction::ActiveModel,
|
||||||
pub contained_expenditures: Vec<expenditure::ActiveModel>,
|
pub contained_expenditures: Vec<expenditure::ActiveModel>,
|
||||||
}
|
pub identity_hash: i64,
|
||||||
|
|
||||||
async fn update_expenditures(
|
|
||||||
tx: &DatabaseTransaction,
|
|
||||||
insertions: &[Insertion],
|
|
||||||
) -> Result<(), DbErr> {
|
|
||||||
// Expenditures can change as we re-categorise them, so we delete all the old ones and
|
|
||||||
// insert an entirely new set to ensure we don't end up leaving old ones around.
|
|
||||||
expenditure::Entity::delete_many()
|
|
||||||
.filter(
|
|
||||||
expenditure::Column::TransactionId
|
|
||||||
.is_in(insertions.iter().map(|i| i.transaction.id.as_ref())),
|
|
||||||
)
|
|
||||||
.exec(tx)
|
|
||||||
.await?;
|
|
||||||
|
|
||||||
expenditure::Entity::insert_many(
|
|
||||||
insertions
|
|
||||||
.iter()
|
|
||||||
.flat_map(|i| &i.contained_expenditures)
|
|
||||||
.cloned(),
|
|
||||||
)
|
|
||||||
.on_conflict(
|
|
||||||
OnConflict::columns(vec![
|
|
||||||
expenditure::Column::TransactionId,
|
|
||||||
expenditure::Column::Category,
|
|
||||||
])
|
|
||||||
.update_columns(expenditure::Column::iter())
|
|
||||||
.to_owned(),
|
|
||||||
)
|
|
||||||
.exec(tx)
|
|
||||||
.await?;
|
|
||||||
|
|
||||||
Ok(())
|
|
||||||
}
|
}
|
||||||
|
|
||||||
// Note while this is more efficient in db calls, it does bind together the entire group.
|
// Note while this is more efficient in db calls, it does bind together the entire group.
|
||||||
@ -63,8 +29,11 @@ pub async fn insert(
|
|||||||
|
|
||||||
for insertions in insertions.chunks(400) {
|
for insertions in insertions.chunks(400) {
|
||||||
let tx = db.begin().await?;
|
let tx = db.begin().await?;
|
||||||
let inserted_transaction_ids = update_transactions(insertions, &tx).await?;
|
let (new_or_updated_insertions, inserted_transaction_ids) =
|
||||||
update_expenditures(&tx, &insertions).await?;
|
whittle_insertions(insertions, &tx).await?;
|
||||||
|
|
||||||
|
update_transactions(&tx, &new_or_updated_insertions).await?;
|
||||||
|
update_expenditures(&tx, &new_or_updated_insertions, &inserted_transaction_ids).await?;
|
||||||
tx.commit().await?;
|
tx.commit().await?;
|
||||||
|
|
||||||
// We wait until the transaction is committed before adding the new transaction ids to the
|
// We wait until the transaction is committed before adding the new transaction ids to the
|
||||||
@ -78,34 +47,87 @@ pub async fn insert(
|
|||||||
Ok(new_transaction_ids)
|
Ok(new_transaction_ids)
|
||||||
}
|
}
|
||||||
|
|
||||||
async fn update_transactions(
|
async fn update_expenditures(
|
||||||
insertions: &[Insertion],
|
|
||||||
tx: &DatabaseTransaction,
|
tx: &DatabaseTransaction,
|
||||||
) -> Result<Vec<String>, AppError> {
|
new_or_updated_insertions: &[&Insertion],
|
||||||
|
inserted_transaction_ids: &[String],
|
||||||
|
) -> Result<(), AppError> {
|
||||||
|
// Expenditures can change as we re-categorise them, so we delete all the old ones and
|
||||||
|
// insert an entirely new set to ensure we don't end up leaving old ones around.
|
||||||
|
expenditure::Entity::delete_many()
|
||||||
|
.filter(expenditure::Column::TransactionId.is_in(inserted_transaction_ids))
|
||||||
|
.exec(tx)
|
||||||
|
.await?;
|
||||||
|
|
||||||
|
expenditure::Entity::insert_many(
|
||||||
|
new_or_updated_insertions
|
||||||
|
.into_iter()
|
||||||
|
.flat_map(|i| &i.contained_expenditures)
|
||||||
|
.cloned(),
|
||||||
|
)
|
||||||
|
.on_conflict(
|
||||||
|
OnConflict::columns(vec![
|
||||||
|
expenditure::Column::TransactionId,
|
||||||
|
expenditure::Column::Category,
|
||||||
|
])
|
||||||
|
.update_columns(expenditure::Column::iter())
|
||||||
|
.to_owned(),
|
||||||
|
)
|
||||||
|
.exec(tx)
|
||||||
|
.await?;
|
||||||
|
Ok(())
|
||||||
|
}
|
||||||
|
|
||||||
let insert =
|
async fn update_transactions(
|
||||||
transaction::Entity::insert_many(insertions.iter().map(|i| &i.transaction).cloned())
|
tx: &DatabaseTransaction,
|
||||||
.on_conflict(
|
new_or_updated_insertions: &[&Insertion],
|
||||||
OnConflict::columns([transaction::Column::Id, transaction::Column::IdentityHash])
|
) -> Result<(), AppError> {
|
||||||
.update_columns(transaction::Column::iter())
|
let transactions = new_or_updated_insertions
|
||||||
.to_owned(),
|
|
||||||
)
|
|
||||||
.into_query()
|
|
||||||
.returning_col(transaction::Column::Id)
|
|
||||||
.build(PostgresQueryBuilder);
|
|
||||||
|
|
||||||
let inserted_transaction_ids = tx
|
|
||||||
.query_all(Statement::from_sql_and_values(
|
|
||||||
DatabaseBackend::Postgres,
|
|
||||||
insert.0,
|
|
||||||
insert.1,
|
|
||||||
))
|
|
||||||
.await?
|
|
||||||
.iter()
|
.iter()
|
||||||
.map(|r| r.try_get_by("id"))
|
.map(|i| &i.transaction)
|
||||||
.collect::<Result<Vec<String>, _>>()?;
|
.cloned();
|
||||||
Ok(inserted_transaction_ids)
|
|
||||||
|
transaction::Entity::insert_many(transactions)
|
||||||
|
.on_conflict(
|
||||||
|
OnConflict::column(transaction::Column::Id)
|
||||||
|
.update_columns(transaction::Column::iter())
|
||||||
|
.to_owned(),
|
||||||
|
)
|
||||||
|
.exec(tx)
|
||||||
|
.await?;
|
||||||
|
Ok(())
|
||||||
|
}
|
||||||
|
|
||||||
|
async fn whittle_insertions<'a>(
|
||||||
|
insertions: &'a [Insertion],
|
||||||
|
tx: &DatabaseTransaction,
|
||||||
|
) -> Result<(Vec<&'a Insertion>, Vec<String>), AppError> {
|
||||||
|
let existing_hashes = transaction::Entity::find()
|
||||||
|
.select_only()
|
||||||
|
.columns([transaction::Column::IdentityHash])
|
||||||
|
.filter(transaction::Column::IdentityHash.is_not_null())
|
||||||
|
.into_tuple::<(i64,)>()
|
||||||
|
.all(tx)
|
||||||
|
.await?;
|
||||||
|
|
||||||
|
// We will only update those where the hash is different to avoid unnecessary updates and
|
||||||
|
// notifications.
|
||||||
|
let new_or_updated_insertions = insertions
|
||||||
|
.into_iter()
|
||||||
|
.filter(|i| {
|
||||||
|
let hash = i.identity_hash;
|
||||||
|
!existing_hashes
|
||||||
|
.iter()
|
||||||
|
.any(|(existing_hash,)| *existing_hash == hash)
|
||||||
|
})
|
||||||
|
.collect::<Vec<_>>();
|
||||||
|
|
||||||
|
let inserted_transaction_ids = new_or_updated_insertions
|
||||||
|
.iter()
|
||||||
|
.map(|i| i.transaction.id.clone().unwrap())
|
||||||
|
.collect::<Vec<_>>();
|
||||||
|
|
||||||
|
Ok((new_or_updated_insertions, inserted_transaction_ids))
|
||||||
}
|
}
|
||||||
|
|
||||||
async fn notify_new_transactions(
|
async fn notify_new_transactions(
|
||||||
@ -121,16 +143,23 @@ async fn notify_new_transactions(
|
|||||||
}
|
}
|
||||||
|
|
||||||
mod tests {
|
mod tests {
|
||||||
use anyhow::Error;
|
|
||||||
use sea_orm::{DatabaseConnection};
|
|
||||||
use sqlx::{PgPool};
|
|
||||||
use sqlx::postgres::PgListener;
|
|
||||||
use testcontainers::ContainerAsync;
|
|
||||||
use migration::MigratorTrait;
|
|
||||||
use testcontainers::runners::AsyncRunner;
|
|
||||||
use super::notify_new_transactions;
|
use super::notify_new_transactions;
|
||||||
|
use anyhow::Error;
|
||||||
|
use migration::MigratorTrait;
|
||||||
|
use sea_orm::DatabaseConnection;
|
||||||
|
use sqlx::postgres::PgListener;
|
||||||
|
use sqlx::PgPool;
|
||||||
|
use testcontainers::runners::AsyncRunner;
|
||||||
|
use testcontainers::ContainerAsync;
|
||||||
|
|
||||||
async fn initialise() -> Result<(ContainerAsync<testcontainers_modules::postgres::Postgres>, DatabaseConnection, PgPool), Error> {
|
async fn initialise() -> Result<
|
||||||
|
(
|
||||||
|
ContainerAsync<testcontainers_modules::postgres::Postgres>,
|
||||||
|
DatabaseConnection,
|
||||||
|
PgPool,
|
||||||
|
),
|
||||||
|
Error,
|
||||||
|
> {
|
||||||
let container = testcontainers_modules::postgres::Postgres::default()
|
let container = testcontainers_modules::postgres::Postgres::default()
|
||||||
.start()
|
.start()
|
||||||
.await?;
|
.await?;
|
||||||
@ -144,8 +173,7 @@ mod tests {
|
|||||||
let db: DatabaseConnection = sea_orm::Database::connect(connection_string).await?;
|
let db: DatabaseConnection = sea_orm::Database::connect(connection_string).await?;
|
||||||
migration::Migrator::up(&db, None).await?;
|
migration::Migrator::up(&db, None).await?;
|
||||||
|
|
||||||
let pool = PgPool::connect(connection_string)
|
let pool = PgPool::connect(connection_string).await?;
|
||||||
.await?;
|
|
||||||
|
|
||||||
Ok((container, db, pool))
|
Ok((container, db, pool))
|
||||||
}
|
}
|
||||||
@ -156,17 +184,22 @@ mod tests {
|
|||||||
let mut listener = PgListener::connect_with(&pool).await?;
|
let mut listener = PgListener::connect_with(&pool).await?;
|
||||||
listener.listen("monzo_new_transactions").await?;
|
listener.listen("monzo_new_transactions").await?;
|
||||||
|
|
||||||
let ids = vec!["test1".to_string(), "test2".to_string(), "test3".to_string()];
|
let ids = vec![
|
||||||
|
"test1".to_string(),
|
||||||
|
"test2".to_string(),
|
||||||
|
"test3".to_string(),
|
||||||
|
];
|
||||||
|
|
||||||
notify_new_transactions(
|
notify_new_transactions(&db, &ids).await?;
|
||||||
&db,
|
|
||||||
&ids,
|
|
||||||
).await?;
|
|
||||||
|
|
||||||
let notification = listener.recv().await?;
|
let notification = listener.recv().await?;
|
||||||
let payload = notification.payload();
|
let payload = notification.payload();
|
||||||
println!("Payload: {}", payload);
|
println!("Payload: {}", payload);
|
||||||
assert_eq!(serde_json::from_str::<Vec<String>>(&payload)?, ids, "Payloads do not match");
|
assert_eq!(
|
||||||
|
serde_json::from_str::<Vec<String>>(&payload)?,
|
||||||
|
ids,
|
||||||
|
"Payloads do not match"
|
||||||
|
);
|
||||||
|
|
||||||
Ok(())
|
Ok(())
|
||||||
}
|
}
|
||||||
|
|||||||
@ -1,4 +1,3 @@
|
|||||||
use std::hash::Hash;
|
|
||||||
use crate::ingestion::db::Insertion;
|
use crate::ingestion::db::Insertion;
|
||||||
use anyhow::{anyhow, Context};
|
use anyhow::{anyhow, Context};
|
||||||
use chrono::{DateTime, NaiveDate, NaiveDateTime, NaiveTime};
|
use chrono::{DateTime, NaiveDate, NaiveDateTime, NaiveTime};
|
||||||
@ -9,6 +8,7 @@ use num_traits::FromPrimitive;
|
|||||||
use sea_orm::prelude::Decimal;
|
use sea_orm::prelude::Decimal;
|
||||||
use sea_orm::IntoActiveModel;
|
use sea_orm::IntoActiveModel;
|
||||||
use serde_json::Value;
|
use serde_json::Value;
|
||||||
|
use std::hash::Hash;
|
||||||
|
|
||||||
#[allow(dead_code)]
|
#[allow(dead_code)]
|
||||||
mod headings {
|
mod headings {
|
||||||
@ -62,7 +62,8 @@ impl MonzoRow {
|
|||||||
transaction_id: monzo_transaction_id.to_string(),
|
transaction_id: monzo_transaction_id.to_string(),
|
||||||
category,
|
category,
|
||||||
amount,
|
amount,
|
||||||
}.into_active_model())
|
}
|
||||||
|
.into_active_model())
|
||||||
}
|
}
|
||||||
|
|
||||||
/// Compute a hash of this row, returning the number as an i64 to be used as a unique constraint
|
/// Compute a hash of this row, returning the number as an i64 to be used as a unique constraint
|
||||||
@ -77,7 +78,9 @@ impl MonzoRow {
|
|||||||
}
|
}
|
||||||
|
|
||||||
pub fn into_insertion(self) -> Result<Insertion, anyhow::Error> {
|
pub fn into_insertion(self) -> Result<Insertion, anyhow::Error> {
|
||||||
let expenditures: Vec<_> = match self.category_split {
|
let identity_hash = self.compute_hash();
|
||||||
|
|
||||||
|
let expenditures: Vec<_> = match &self.category_split {
|
||||||
Some(split) if !split.is_empty() => split
|
Some(split) if !split.is_empty() => split
|
||||||
.split(',')
|
.split(',')
|
||||||
.map(|section| Self::parse_section(&self.transaction_id, section))
|
.map(|section| Self::parse_section(&self.transaction_id, section))
|
||||||
@ -88,7 +91,7 @@ impl MonzoRow {
|
|||||||
amount: self.total_amount,
|
amount: self.total_amount,
|
||||||
transaction_id: self.transaction_id.clone(),
|
transaction_id: self.transaction_id.clone(),
|
||||||
}
|
}
|
||||||
.into_active_model()],
|
.into_active_model()],
|
||||||
};
|
};
|
||||||
|
|
||||||
Ok(Insertion {
|
Ok(Insertion {
|
||||||
@ -102,10 +105,12 @@ impl MonzoRow {
|
|||||||
receipt: self.receipt,
|
receipt: self.receipt,
|
||||||
total_amount: self.total_amount,
|
total_amount: self.total_amount,
|
||||||
description: self.description,
|
description: self.description,
|
||||||
identity_hash: Some(self.compute_hash()),
|
identity_hash: Some(identity_hash),
|
||||||
}.into_active_model(),
|
}
|
||||||
|
.into_active_model(),
|
||||||
|
|
||||||
contained_expenditures: expenditures,
|
contained_expenditures: expenditures,
|
||||||
|
identity_hash,
|
||||||
})
|
})
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|||||||
Loading…
Reference in New Issue
Block a user