From 5e7f602890af5acc8bd30c7fec41c57122e5c5ad Mon Sep 17 00:00:00 2001 From: Kieran Date: Thu, 26 Sep 2024 14:38:00 +0100 Subject: [PATCH] feat: void.cat migration binary --- Cargo.lock | 142 +++++++++++++++++++++++++++--- Cargo.toml | 19 +++- src/{ => bin}/main.rs | 29 +++--- src/bin/void_cat_migrate.rs | 171 ++++++++++++++++++++++++++++++++++++ src/filesystem.rs | 2 +- src/lib.rs | 10 +++ src/webhook.rs | 2 +- 7 files changed, 339 insertions(+), 36 deletions(-) rename src/{ => bin}/main.rs (83%) create mode 100644 src/bin/void_cat_migrate.rs create mode 100644 src/lib.rs diff --git a/Cargo.lock b/Cargo.lock index 08021d8..6308ccc 100644 --- a/Cargo.lock +++ b/Cargo.lock @@ -86,6 +86,55 @@ dependencies = [ "libc", ] +[[package]] +name = "anstream" +version = "0.6.15" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "64e15c1ab1f89faffbf04a634d5e1962e9074f2741eef6d97f3c4e322426d526" +dependencies = [ + "anstyle", + "anstyle-parse", + "anstyle-query", + "anstyle-wincon", + "colorchoice", + "is_terminal_polyfill", + "utf8parse", +] + +[[package]] +name = "anstyle" +version = "1.0.8" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "1bec1de6f59aedf83baf9ff929c98f2ad654b97c9510f4e70cf6f661d49fd5b1" + +[[package]] +name = "anstyle-parse" +version = "0.2.5" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "eb47de1e80c2b463c735db5b217a0ddc39d612e7ac9e2e96a5aed1f57616c1cb" +dependencies = [ + "utf8parse", +] + +[[package]] +name = "anstyle-query" +version = "1.1.1" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "6d36fc52c7f6c869915e99412912f22093507da8d9e942ceaf66fe4b7c14422a" +dependencies = [ + "windows-sys 0.52.0", +] + +[[package]] +name = "anstyle-wincon" +version = "3.0.4" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "5bf74e1b6e971609db8ca7a9ce79fd5768ab6ae46441c572e46cf596f59e57f8" +dependencies = [ + "anstyle", + "windows-sys 0.52.0", +] + [[package]] name = "anyhow" version = "1.0.86" @@ -519,6 +568,52 @@ dependencies = [ "libloading", ] +[[package]] +name = "clap" +version = "4.5.18" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "b0956a43b323ac1afaffc053ed5c4b7c1f1800bacd1683c353aabbb752515dd3" +dependencies = [ + "clap_builder", + "clap_derive", +] + +[[package]] +name = "clap_builder" +version = "4.5.18" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "4d72166dd41634086d5803a47eb71ae740e61d84709c36f3c34110173db3961b" +dependencies = [ + "anstream", + "anstyle", + "clap_lex", + "strsim", +] + +[[package]] +name = "clap_derive" +version = "4.5.18" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "4ac6a0c7b1a9e9a5186361f67dfa1b88213572f427fb9ab038efb2bd8c582dab" +dependencies = [ + "heck 0.5.0", + "proc-macro2", + "quote", + "syn", +] + +[[package]] +name = "clap_lex" +version = "0.7.2" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "1462739cb27611015575c0c11df5df7601141071f07518d56fcc1be504cbec97" + +[[package]] +name = "colorchoice" +version = "1.0.2" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "d3fd119d74b830634cea2a0f58bbd0d54540518a14397557951e79340abc28c0" + [[package]] name = "concurrent-queue" version = "2.5.0" @@ -1640,6 +1735,12 @@ dependencies = [ "windows-sys 0.52.0", ] +[[package]] +name = "is_terminal_polyfill" +version = "1.70.1" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "7943c866cc5cd64cbc25b2e01621d07fa8eb2a1a23160ee81ce38704e97b8ecf" + [[package]] name = "itertools" version = "0.12.1" @@ -2709,6 +2810,8 @@ dependencies = [ "candle-nn", "candle-transformers", "chrono", + "clap", + "clap_derive", "config", "ffmpeg-sys-the-third", "hex", @@ -2721,6 +2824,7 @@ dependencies = [ "serde_with", "sha2", "sqlx", + "sqlx-postgres", "tokio", "ureq", "url", @@ -3129,9 +3233,9 @@ dependencies = [ [[package]] name = "sqlx" -version = "0.8.1" +version = "0.8.2" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "fcfa89bea9500db4a0d038513d7a060566bfc51d46d1c014847049a45cce85e8" +checksum = "93334716a037193fac19df402f8571269c84a00852f6a7066b5d2616dcd64d3e" dependencies = [ "sqlx-core", "sqlx-macros", @@ -3142,9 +3246,9 @@ dependencies = [ [[package]] name = "sqlx-core" -version = "0.8.1" +version = "0.8.2" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "d06e2f2bd861719b1f3f0c7dbe1d80c30bf59e76cf019f07d9014ed7eefb8e08" +checksum = "d4d8060b456358185f7d50c55d9b5066ad956956fddec42ee2e8567134a8936e" dependencies = [ "atoi", "byteorder", @@ -3178,13 +3282,14 @@ dependencies = [ "tokio-stream", "tracing", "url", + "uuid", ] [[package]] name = "sqlx-macros" -version = "0.8.1" +version = "0.8.2" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "2f998a9defdbd48ed005a89362bd40dd2117502f15294f61c8d47034107dbbdc" +checksum = "cac0692bcc9de3b073e8d747391827297e075c7710ff6276d9f7a1f3d58c6657" dependencies = [ "proc-macro2", "quote", @@ -3195,9 +3300,9 @@ dependencies = [ [[package]] name = "sqlx-macros-core" -version = "0.8.1" +version = "0.8.2" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "3d100558134176a2629d46cec0c8891ba0be8910f7896abfdb75ef4ab6f4e7ce" +checksum = "1804e8a7c7865599c9c79be146dc8a9fd8cc86935fa641d3ea58e5f0688abaa5" dependencies = [ "dotenvy", "either", @@ -3221,9 +3326,9 @@ dependencies = [ [[package]] name = "sqlx-mysql" -version = "0.8.1" +version = "0.8.2" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "936cac0ab331b14cb3921c62156d913e4c15b74fb6ec0f3146bd4ef6e4fb3c12" +checksum = "64bb4714269afa44aef2755150a0fc19d756fb580a67db8885608cf02f47d06a" dependencies = [ "atoi", "base64 0.22.1", @@ -3259,14 +3364,15 @@ dependencies = [ "stringprep", "thiserror", "tracing", + "uuid", "whoami", ] [[package]] name = "sqlx-postgres" -version = "0.8.1" +version = "0.8.2" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "9734dbce698c67ecf67c442f768a5e90a49b2a4d61a9f1d59f73874bd4cf0710" +checksum = "6fa91a732d854c5d7726349bb4bb879bb9478993ceb764247660aee25f67c2f8" dependencies = [ "atoi", "base64 0.22.1", @@ -3298,14 +3404,15 @@ dependencies = [ "stringprep", "thiserror", "tracing", + "uuid", "whoami", ] [[package]] name = "sqlx-sqlite" -version = "0.8.1" +version = "0.8.2" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "a75b419c3c1b1697833dd927bdc4c6545a620bc1bbafabd44e1efbe9afcd337e" +checksum = "d5b2cf34a45953bfd3daaf3db0f7a7878ab9b7a6b91b422d24a7a9e4c857b680" dependencies = [ "atoi", "chrono", @@ -3323,6 +3430,7 @@ dependencies = [ "sqlx-core", "tracing", "url", + "uuid", ] [[package]] @@ -3850,6 +3958,12 @@ dependencies = [ "serde", ] +[[package]] +name = "utf8parse" +version = "0.2.2" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "06abde3611657adf66d383f00b093d7faecc7fa57071cce2578660c9f1010821" + [[package]] name = "uuid" version = "1.10.0" diff --git a/Cargo.toml b/Cargo.toml index 95a4516..3a6b582 100644 --- a/Cargo.toml +++ b/Cargo.toml @@ -3,11 +3,23 @@ name = "route96" version = "0.2.0" edition = "2021" +[[bin]] +name = "void_cat_migrate" +required-features = ["bin-migrate"] + +[[bin]] +name = "route96" +path = "src/bin/main.rs" + +[lib] +name = "route96" + [features] default = ["nip96", "blossom"] labels = ["nip96", "dep:candle-core", "dep:candle-nn", "dep:candle-transformers"] nip96 = ["dep:ffmpeg-sys-the-third", "dep:blurhash", "dep:libc"] blossom = [] +bin-migrate = ["dep:sqlx-postgres", "dep:clap", "dep:clap_derive"] [dependencies] log = "0.4.21" @@ -21,7 +33,7 @@ serde = { version = "1.0.198", features = ["derive"] } uuid = { version = "1.8.0", features = ["v4"] } anyhow = "1.0.82" sha2 = "0.10.8" -sqlx = { version = "0.8.1", features = ["mysql", "runtime-tokio", "chrono"] } +sqlx = { version = "0.8.1", features = ["mysql", "runtime-tokio", "chrono", "uuid"] } config = { version = "0.14.0", features = ["toml"] } chrono = { version = "0.4.38", features = ["serde"] } url = "2.5.0" @@ -30,8 +42,11 @@ ureq = { version = "2.9.7", features = ["json"] } libc = { version = "0.2.153", optional = true } blurhash = { version = "0.2.1", optional = true } -ffmpeg-sys-the-third = { version = "2.0.0+ffmpeg-7.0", features = ["default"], optional = true } +ffmpeg-sys-the-third = { version = "2.0.0", features = ["default"], optional = true } candle-core = { git = "https://github.com/huggingface/candle.git", version = "^0.6.1", optional = true } candle-nn = { git = "https://github.com/huggingface/candle.git", version = "^0.6.1", optional = true } candle-transformers = { git = "https://github.com/huggingface/candle.git", version = "^0.6.1", optional = true } +clap = { version = "4.5.18", features = ["derive"], optional = true } +sqlx-postgres = { version = "0.8.2", optional = true, features = ["chrono", "uuid"] } +clap_derive = { version = "4.5.18", optional = true } diff --git a/src/main.rs b/src/bin/main.rs similarity index 83% rename from src/main.rs rename to src/bin/main.rs index d475cfb..6fed642 100644 --- a/src/main.rs +++ b/src/bin/main.rs @@ -8,22 +8,13 @@ use rocket::data::{ByteUnit, Limits}; use rocket::routes; use rocket::shield::Shield; -use crate::cors::CORS; -use crate::db::Database; -use crate::filesystem::FileStore; -use crate::routes::{get_blob, head_blob, root}; -use crate::settings::Settings; -use crate::webhook::Webhook; - -mod auth; -mod cors; -mod db; -mod filesystem; -#[cfg(feature = "nip96")] -mod processing; -mod routes; -mod settings; -mod webhook; +use route96::cors::CORS; +use route96::db::Database; +use route96::filesystem::FileStore; +use route96::routes; +use route96::routes::{get_blob, head_blob, root}; +use route96::settings::Settings; +use route96::webhook::Webhook; #[rocket::main] async fn main() -> Result<(), Error> { @@ -70,10 +61,12 @@ async fn main() -> Result<(), Error> { .attach(Shield::new()) // disable .mount("/", routes![root, get_blob, head_blob]); - #[cfg(feature = "blossom")] { + #[cfg(feature = "blossom")] + { rocket = rocket.mount("/", routes::blossom_routes()); } - #[cfg(feature = "nip96")] { + #[cfg(feature = "nip96")] + { rocket = rocket.mount("/", routes::nip96_routes()); } if let Err(e) = rocket.launch().await { diff --git a/src/bin/void_cat_migrate.rs b/src/bin/void_cat_migrate.rs new file mode 100644 index 0000000..2c6dedf --- /dev/null +++ b/src/bin/void_cat_migrate.rs @@ -0,0 +1,171 @@ +use anyhow::Error; +use chrono::{DateTime, Utc}; +use clap::Parser; +use config::Config; +use log::{info, warn}; +use route96::db::{Database, FileUpload}; +use route96::filesystem::FileStore; +use route96::settings::Settings; +use sqlx::FromRow; +use sqlx_postgres::{PgPool, Postgres}; +use std::path::PathBuf; +use tokio::fs::File; +use uuid::Uuid; + +#[derive(Parser, Debug)] +#[command(version, about)] +struct Args { + /// Database connection string for void.cat DB + #[arg(long)] + pub database: String, + + /// Path to filestore on void.cat + #[arg(long)] + pub data_path: String, +} + +#[tokio::main] +async fn main() -> Result<(), Error> { + pretty_env_logger::init(); + + let builder = Config::builder() + .add_source(config::File::with_name("config.toml")) + .add_source(config::Environment::with_prefix("APP")) + .build()?; + + let settings: Settings = builder.try_deserialize()?; + + let db = Database::new(&settings.database).await?; + let fs = FileStore::new(settings.clone()); + + let args: Args = Args::parse(); + + let db_void = VoidCatDb::connect(&args.database).await?; + + let mut page = 0; + loop { + let files = db_void.list_files(page).await?; + if files.len() == 0 { + break; + } + for f in files { + if let Err(e) = migrate_file(&f, &db, &fs, &args).await { + warn!("Failed to migrate file: {}, {}", &f.id, e); + } + } + page += 1; + } + Ok(()) +} + +async fn migrate_file( + f: &VoidFile, + db: &Database, + fs: &FileStore, + args: &Args, +) -> Result<(), Error> { + let pubkey_vec = hex::decode(&f.email)?; + let id_vec = hex::decode(&f.digest)?; + + // copy file + let src_path = PathBuf::new().join(&args.data_path).join(f.map_to_path()); + let dst_path = fs.map_path(&id_vec); + if src_path.exists() && !dst_path.exists() { + info!( + "Copying file: {} from {} => {}", + &f.id, + src_path.to_str().unwrap(), + dst_path.to_str().unwrap() + ); + tokio::fs::copy(src_path, dst_path).await?; + } else if dst_path.exists() { + info!("File already exists {}, continuing...", &f.id); + } else { + anyhow::bail!("Source file not found {}", src_path.to_str().unwrap()); + } + let uid = db.upsert_user(&pubkey_vec).await?; + info!("Mapped user {} => {}", &f.email, uid); + + let md: Option> = match &f.media_dimensions { + Some(s) => Some(s.split("x").collect()), + _ => None, + }; + let fu = FileUpload { + id: id_vec, + name: match &f.name { + Some(n) => n.to_string(), + None => "".to_string(), + }, + size: f.size as u64, + mime_type: f.mime_type.clone(), + created: f.uploaded, + width: match &md { + Some(s) => Some(s[0].parse::()?), + None => None, + }, + height: match &md { + Some(s) => Some(s[1].parse::()?), + None => None, + }, + blur_hash: None, + alt: f.description.clone(), + }; + db.add_file(&fu, uid).await?; + Ok(()) +} + +#[derive(FromRow)] +struct VoidFile { + #[sqlx(rename = "Id")] + pub id: Uuid, + #[sqlx(rename = "Name")] + pub name: Option, + #[sqlx(rename = "Size")] + pub size: i64, + #[sqlx(rename = "Uploaded")] + pub uploaded: DateTime, + #[sqlx(rename = "Description")] + pub description: Option, + #[sqlx(rename = "MimeType")] + pub mime_type: String, + #[sqlx(rename = "Digest")] + pub digest: String, + #[sqlx(rename = "MediaDimensions")] + pub media_dimensions: Option, + #[sqlx(rename = "Email")] + pub email: String, +} + +impl VoidFile { + fn map_to_path(&self) -> PathBuf { + let id_str = self.id.as_hyphenated().to_string(); + PathBuf::new() + .join("files-v2/") + .join(&id_str[..2]) + .join(&id_str[2..4]) + .join(&id_str) + } +} + +struct VoidCatDb { + pub pool: PgPool, +} + +impl VoidCatDb { + async fn connect(conn: &str) -> Result { + let pool = PgPool::connect(conn).await?; + Ok(Self { pool }) + } + + async fn list_files(&self, page: usize) -> Result, sqlx::Error> { + let page_size = 100; + sqlx::query_as(format!("select f.\"Id\", f.\"Name\", CAST(f.\"Size\" as BIGINT) \"Size\", f.\"Uploaded\", f.\"Description\", f.\"MimeType\", f.\"Digest\", f.\"MediaDimensions\", u.\"Email\" +from \"Files\" f, \"UserFiles\" uf, \"Users\" u +where f.\"Id\" = uf.\"FileId\" +and uf.\"UserId\" = u.\"Id\" +and u.\"AuthType\" = 4\ +offset {} limit {}", page * page_size, page_size).as_str()) + .fetch_all(&self.pool) + .await + } +} diff --git a/src/filesystem.rs b/src/filesystem.rs index afbd9a5..b3a2506 100644 --- a/src/filesystem.rs +++ b/src/filesystem.rs @@ -220,7 +220,7 @@ impl FileStore { temp_dir().join(id.to_string()) } - fn map_path(&self, id: &Vec) -> PathBuf { + pub fn map_path(&self, id: &Vec) -> PathBuf { let id = hex::encode(id); Path::new(&self.settings.storage_dir) .join(&id[0..2]) diff --git a/src/lib.rs b/src/lib.rs new file mode 100644 index 0000000..73fac13 --- /dev/null +++ b/src/lib.rs @@ -0,0 +1,10 @@ + +pub mod auth; +pub mod cors; +pub mod db; +pub mod filesystem; +#[cfg(feature = "nip96")] +pub mod processing; +pub mod routes; +pub mod settings; +pub mod webhook; \ No newline at end of file diff --git a/src/webhook.rs b/src/webhook.rs index fde836f..f95265a 100644 --- a/src/webhook.rs +++ b/src/webhook.rs @@ -3,7 +3,7 @@ use serde::{Deserialize, Serialize}; use crate::filesystem::FileSystemResult; -pub(crate) struct Webhook { +pub struct Webhook { url: String, }