diff --git a/.gitignore b/.gitignore index 84b37fb..65849f8 100644 --- a/.gitignore +++ b/.gitignore @@ -1,3 +1,4 @@ /target /FANTOIR* +!/fantoir-datasource Cargo.lock diff --git a/fantoir-datasource/Cargo.toml b/fantoir-datasource/Cargo.toml index bc0bfb3..64da1e4 100644 --- a/fantoir-datasource/Cargo.toml +++ b/fantoir-datasource/Cargo.toml @@ -1,34 +1,36 @@ [package] name = "fantoir-datasource" version = "0.1.0" edition = "2021" description = "Generates a Postgres table from FANTOIR raw file" authors = [ "Sébastien Santoro " ] license = "BSD-2-Clause" [dependencies] +chrono = "~0.4.23" lazy_static = "~1.4.0" +opendatasoft-explore-api = { version = "0.1.0", path = "../opendatasoft-explore-api" } oxrdf = "~0.1.1" sparesults = "~0.1.3" [dependencies.async-scoped] version = "~0.7.1" features = ["use-tokio"] [dependencies.clap] version = "~4.0.32" features = ["derive"] [dependencies.reqwest] version = "~0.11.13" features = ["gzip", "deflate"] [dependencies.sqlx] version = "~0.6.2" features = ["runtime-tokio-native-tls", "postgres", "chrono"] [dependencies.tokio] version = "~1.23.0" features = ["full"] diff --git a/fantoir-datasource/README.md b/fantoir-datasource/README.md index 64b4d1c..a0f37d8 100644 --- a/fantoir-datasource/README.md +++ b/fantoir-datasource/README.md @@ -1,51 +1,57 @@ ## How to use? Define your PostgreSQL connection URL in environment: ``` export DATABASE_URL="postgres://fantoir:fantoir@localhost/fantoir" ``` +## Requirements + +A PostgreSQL server, at least for import/wikidata/promote commands. + +If you wish to use the tool to fetch the FANTOIR database: `unzip` + ## Development ### Build instructions The PostgreSQL library is required to link against it. If not found, you can add the path to the LIB environment variable. ### Prepare a test database Execute the following queries as postgres user: ``` CREATE ROLE fantoir WITH PASSWORD 'fantoir' LOGIN; CREATE DATABASE fantoir OWNER fantoir; ``` Connected as your database role, enable the pg_trgm extension to be able to generate the index for full-text search with trigrams: ``` CREATE EXTENSION pg_trgm; ``` If the extension doesn't exist, it can be included in a package named for example `postgresql-contrib`. You can then use the code with the default DATABASE_URL documented above. ### Database pitfalls The FANTOIR database uses INSEE department code, they can contain a letter, currently only for Corse (2A and 2B). That also applies when building the INSEE commune code. If a record is canceled, the cancel date can be omitted. The creation date can be omitted too. The last line of the FANTOIR database must be ignored. Wikidata uses the "code FANTOIR", matching the "code RIVOLI" documented in FANTOIR file description. This code matches the 11 first characters of a record. See also https://www.wikidata.org/wiki/Property:P3182. diff --git a/fantoir-datasource/src/commands/fetch/fantoir_file.rs b/fantoir-datasource/src/commands/fetch/fantoir_file.rs new file mode 100644 index 0000000..7e6a208 --- /dev/null +++ b/fantoir-datasource/src/commands/fetch/fantoir_file.rs @@ -0,0 +1,114 @@ +use std::cmp::Ordering; +use std::path::Path; + +use opendatasoft_explore_api::schema::Attachment; + +use chrono::Datelike; +use chrono::Months; +use chrono::NaiveDate; + +/* ------------------------------------------------------------- + FANTOIR file metadata + - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - */ + +#[derive(Clone, Debug, Eq, PartialEq, Hash)] +pub struct FantoirFile { + pub url: String, + + /// The month of FANTOIR file production + pub date: NaiveDate, +} + +impl FantoirFile { + pub fn from (attachment: &Attachment) -> Option { + let id_date = attachment.metas.id + .replace("fichier_national_fantoir_situation_", "") + .replace("_zip", ""); + + Some(Self { + url: attachment.href.clone(), + date: parse_fantoir_date(&id_date)?, + }) + } + + pub fn get_file_candidates(&self) -> Vec { + let previous_month = self.date - Months::new(1); + vec![ + format!("FANTOIR{}{}", previous_month.month(), previous_month.year() - 2000), + format!("FANTOIR{}{}", self.date.month(), self.date.year() - 2000), + ] + } + + pub fn exists_locally(&self) -> bool { + self.get_file_candidates() + .iter() + .any(|candidate| Path::new(candidate).is_file()) + } +} + +impl PartialOrd for FantoirFile { + fn partial_cmp(&self, other: &Self) -> Option { + Some(self.cmp(other)) + } +} + +impl Ord for FantoirFile { + fn cmp(&self, other: &Self) -> Ordering { + self.date.cmp(&other.date) + } +} + +fn parse_fantoir_date(id_date: &str) -> Option { + let parts: Vec<_> = id_date.split("_").collect(); // [ month in French, year ] + + if parts.len() != 2 { + return None; + } + + NaiveDate::from_ymd_opt( + parts[1].parse().ok()?, + parse_french_month_long_name(parts[0])?, + 1 + ) +} + +fn parse_french_month_long_name(month: &str) -> Option { + match month { + "janvier" => Some(1), + "fevrier" => Some(2), + "mars" => Some(3), + "avril" => Some(4), + "mai" => Some(5), + "juin" => Some(6), + "juillet" => Some(7), + "aout" => Some(8), + "septembre" => Some(9), + "octobre" => Some(10), + "novembre" => Some(11), + "decembre" => Some(12), + _ => None, + } +} + +#[cfg(test)] +mod tests { + use chrono::NaiveDate; + use super::*; + + #[test] + fn test_parse_fantoir_date() { + let expected = NaiveDate::from_ymd_opt(2022, 11, 1).unwrap(); + assert_eq!(Some(expected), parse_fantoir_date("novembre_2022")); + } + + #[test] + fn test_get_file_candidates() { + let file = FantoirFile { + url: "foo/fichier_national_fantoir_situation_novembre_2022_zip".to_string(), + date: NaiveDate::from_ymd_opt(2022, 11, 1).unwrap(), + }; + + let expected = vec!["FANTOIR1022".to_string(), "FANTOIR1122".to_string()]; + assert_eq!(expected, file.get_file_candidates()); + } +} diff --git a/fantoir-datasource/src/commands/fetch/mod.rs b/fantoir-datasource/src/commands/fetch/mod.rs new file mode 100644 index 0000000..4d84c81 --- /dev/null +++ b/fantoir-datasource/src/commands/fetch/mod.rs @@ -0,0 +1,124 @@ +//! Fetch command for the fantoir-datasource tool. +//! +//! Check last version and download if needed + +use std::env; +use std::path::PathBuf; +use std::process::exit; + +use chrono::Utc; +use tokio::fs::remove_file; +use opendatasoft_explore_api::requests::ExploreApiEndPoint; +use tokio::process::Command; + +use crate::commands::fetch::fantoir_file::FantoirFile; +use crate::commands::fetch::os::is_command_available; +use crate::services::http_client::Client as HttpClient; + +mod fantoir_file; +mod os; + +static ENDPOINT: &'static str = "https://data.economie.gouv.fr/api/v2"; +static DATASET_ID: &'static str = "fichier-fantoir-des-voies-et-lieux-dits"; + +pub async fn fetch (overwrite: bool) { + let fantoir_file = get_last_file_information().await; + + let file_exists = fantoir_file.exists_locally(); + if file_exists && !overwrite { + eprintln!("FANTOIR file already exists. Run with --overwrite to overwrite it."); + exit(12); + } + + if !is_command_available("unzip") { + eprintln!("No 'unzip' utility has been found, please install it or fix PATH if needed."); + exit(32); + } + + let target_path = get_fantoir_zip_path(); + if let Err(error) = HttpClient::new(None).download(&fantoir_file.url, &target_path).await { + eprintln!("Can't download FANTOIR file: {:?}", error); + exit(16); + } + + let exit_code = match unzip(&target_path, overwrite).await { + Ok(path) => { + println!("FANTOIR_FILE={}", &path); + + 0 + } + Err(exit_code) => exit_code, + }; + + if let Err(error) = remove_file(&target_path).await { + eprintln!("Can't remove downloaded temporary file: {}", error); + eprintln!("Please delete manually {}", target_path.to_str().unwrap()) + } + + exit(exit_code); +} + +/// Determines a temporary location where to save the FANTOIR file ZIP archive +fn get_fantoir_zip_path() -> PathBuf { + let filename = format!("fantoir-download-{}.zip", Utc::now().timestamp()); + + env::temp_dir() + .join(filename) +} + +async fn unzip(archive_path: &PathBuf, overwrite: bool) -> Result { + let overwrite_option = match overwrite { + true => "-o", + false => "-n", + }; + + let process = Command::new("unzip") + .arg(overwrite_option) + .arg(archive_path.as_path().to_str().unwrap()) + .output() + .await + .expect("Can't spawn unzip process"); + + if process.status.success() { + match find_extracted_file(process.stdout) { + None => Err(127), + Some(filename) => Ok(filename), + } + } else { + Err(process.status.code().unwrap()) + } +} + +fn find_extracted_file(stdout: Vec) -> Option { + let output = String::from_utf8(stdout) + .expect("Can't read unzip stdout"); + + for action in vec!["inflating: ", "extracting: "] { + if !output.contains(action) { + continue; + } + + let pos = output.find(action).unwrap() + action.len(); + let buffer = &output[pos..]; + let pos = buffer.find("\r\n").unwrap_or( + buffer.find("\n").unwrap() + ); + + return Some(String::from(buffer[..pos].trim())); + } + + None +} + +pub async fn get_last_file_information () -> FantoirFile { + let endpoint = ExploreApiEndPoint::new(ENDPOINT); + let result = endpoint.get_dataset_attachments(DATASET_ID).await; + + result + .attachments + .into_iter() + .filter(|attachment| attachment.metas.title.starts_with("Fichier national FANTOIR")) + .map(|attachment| FantoirFile::from(&attachment).expect("Can't parse FANTOIR file metadata")) + .max() // The most recent + .unwrap() +} diff --git a/fantoir-datasource/src/commands/fetch/os.rs b/fantoir-datasource/src/commands/fetch/os.rs new file mode 100644 index 0000000..bfa406a --- /dev/null +++ b/fantoir-datasource/src/commands/fetch/os.rs @@ -0,0 +1,31 @@ +//! OS-related helper methods + +use std::env::consts::OS; +use std::process::{Command, Stdio}; + +pub fn is_command_available (command: &str) -> bool { + let command_to_use = match OS { + "windows" => "where", + _ => "which", // command -v is sometimes recommended, but doesn't exist as standalone + }; + + // Use the exit code to determine if the command has been found + Command::new(command_to_use) + .arg(command) + .stdout(Stdio::null()) // Discard both stdout and stderr + .stderr(Stdio::null()) + .status() + .expect("failed to execute process") + .success() +} + +#[cfg(test)] +mod tests { + use super::*; + + #[test] + pub fn test_is_command_available () { + assert!(is_command_available("unzip")); + assert!(!is_command_available("notexisting")); + } +} diff --git a/fantoir-datasource/src/commands/mod.rs b/fantoir-datasource/src/commands/mod.rs index f4d1c80..168ea75 100644 --- a/fantoir-datasource/src/commands/mod.rs +++ b/fantoir-datasource/src/commands/mod.rs @@ -1,6 +1,7 @@ //! Commands for the fantoir-datasource tool. +pub(crate) mod fetch; pub(crate) mod import; pub(crate) mod promote; pub(crate) mod query; pub(crate) mod wikidata; diff --git a/fantoir-datasource/src/fantoir.rs b/fantoir-datasource/src/fantoir.rs index 3dbcb48..daed379 100644 --- a/fantoir-datasource/src/fantoir.rs +++ b/fantoir-datasource/src/fantoir.rs @@ -1,332 +1,332 @@ //! # Helper methods for FANTOIR database. //! //! This module offers a structure for a FANTOIR record, methods to parse the file and export it. //! Database functions expect to work with an executor from sqlx crate. +use chrono::NaiveDate; use lazy_static::lazy_static; use sqlx::PgPool; -use sqlx::types::chrono::NaiveDate; lazy_static! { static ref DEPARTMENTS_WITH_CODE_DIRECTION: Vec<&'static str> = vec!["13", "59", "75", "92", "97"]; /// The alphabet without I O and Q. static ref RIVOLI_STRING: Vec = vec![ 'A', 'B', 'C', 'D', 'E', 'F', 'G', 'H', 'J', 'K', 'L', 'M', 'N', 'P', 'R', 'S', 'T', 'U', 'V', 'W', 'X', 'Y', 'Z' ]; } /// A voie in the FANTOIR database #[derive(Debug)] pub struct FantoirEntry { /* Identifiers */ code_fantoir: String, /* Part 1 - commune */ departement: String, // Generally an integer, but INSEE uses 2A and 2B for Corse code_commune: i32, code_insee: String, // Afa in Corse has 2A001 type_commune: Option, is_pseudo_recensee: bool, /* Part 2 - voie */ identifiant_communal_voie: String, cle_rivoli: String, code_nature_voie: Option, libelle_voie: String, type_voie: i32, // 1: voie, 2: ens. immo, 3: lieu-dit, 4: pseudo-voie, 5: provisoire is_public: bool, /* Part 3 - population */ is_large: bool, population_a_part: i32, population_fictive: i32, /* Part 4 - metadata */ is_cancelled: bool, cancel_date: Option, creation_date: Option, code_majic: i32, last_alpha_word: String, } impl FantoirEntry { pub fn parse_line(line: &str) -> Self { let departement = match &line[0..2] { "97" => String::from(&line[0..3]), // include for DOM/TOM the next digit department => String::from(department), }; let len = line.len(); Self { /* Identifier */ code_fantoir: String::from(&line[0..11]), /* Part 1 - commune */ departement, code_commune: line[3..6].parse().expect("Can't parse code commune"), code_insee: format!("{:02}{:03}", &line[0..2], &line[3..6]), type_commune: parse_optional_string(&line[43..44]), is_pseudo_recensee: &line[45..46] == "3", /* Part 2 - voie */ identifiant_communal_voie: String::from(&line[6..10]), cle_rivoli: String::from(&line[10..11]), code_nature_voie: parse_optional_string(&line[11..15]), libelle_voie: String::from(line[15..41].trim()), type_voie: line[108..109].parse().expect("Can't parse type de voie."), is_public: &line[48..49] == "0", /* Part 3 - population */ is_large: &line[49..50] == "*", population_a_part: line[59..66].parse().expect("Can't parse population à part"), population_fictive: line[66..73].parse().expect("Can't parse population fictive"), /* Part 4 - metadata */ is_cancelled: &line[73..74] != " ", cancel_date: parse_fantoir_date(&line[74..81]), creation_date: parse_fantoir_date(&line[81..88]), code_majic: line[103..108].parse().expect("Can't parse MAJIC"), last_alpha_word: String::from(&line[112..len]), } } pub async fn insert_to_db(&self, pool: &PgPool, table: &str) { let mut query = format!("INSERT INTO {}", table); query.push_str( r#" (code_fantoir, departement, code_commune, code_insee, type_commune, is_pseudo_recensee, identifiant_communal_voie, cle_rivoli, code_nature_voie, libelle_voie, type_voie, is_public, is_large, population_a_part, population_fictive, is_cancelled, cancel_date, creation_date, code_majic, last_alpha_word ) VALUES ($1, $2, $3, $4, $5, $6, $7, $8, $9, $10, $11, $12, $13, $14, $15, $16, $17, $18, $19, $20 )"# ); sqlx::query(&query) /* Identifiers */ .bind(&self.code_fantoir) /* Part 1 - commune */ .bind(&self.departement) .bind(&self.code_commune) .bind(&self.code_insee) .bind(&self.type_commune) .bind(&self.is_pseudo_recensee) /* Part 2 - Voie */ .bind(&self.identifiant_communal_voie) .bind(&self.cle_rivoli) .bind(&self.code_nature_voie) .bind(&self.libelle_voie) .bind(&self.type_voie) .bind(&self.is_public) /* Part 3 - Population */ .bind(&self.is_large) .bind(&self.population_a_part) .bind(&self.population_fictive) /* Part 4 - Metadata */ .bind(&self.is_cancelled) .bind(&self.cancel_date) .bind(&self.creation_date) .bind(&self.code_majic) .bind(&self.last_alpha_word) .execute(pool) .await .expect("Can't insert entry to database"); } } pub fn parse_fantoir_date (date: &str) -> Option { if date == "0000000" { return None; } let year = date[0..4].parse().expect("Can't parse date: year part"); let ord = date[4..7].parse().expect("Can't parse date: ordinal part"); NaiveDate::from_yo_opt(year, ord) } fn parse_optional_string (expression: &str) -> Option { let expression = expression.trim(); if expression.len() > 0 { Some(String::from(expression)) } else { None } } /// A fixed FANTOIR code result #[derive(Debug, Eq, PartialEq)] pub enum FixedFantoirCode { /// The code has been fully computed Computed(String), /// Information needed to query the code has been extracted, but code direction is unknown /// Such result can be queried through search_code_fantoir() ToSearch { code_insee: String, identifiant_communal_voie: String }, } /// Transforms FANTOIR code from BAN into regular FANTOIR codes. /// BAN sometimes uses _ without Rivoli key. pub fn fix_fantoir_code(code: &str) -> FixedFantoirCode { let mut code = code.to_string(); if code.contains("_") { // 97231_B026 -> 972231B026 code = if code.starts_with("97") { // Code direction = department last digit format!("{}{}{}", &code[0..=2], &code[2..5], &code[6..]) } else if uses_specific_code_direction(&code) { // We can't fix it by computation, we need to search it in the database return FixedFantoirCode::ToSearch { code_insee: code[0..5].to_string(), identifiant_communal_voie: code[6..10].to_string(), } } else { // Code direction = 0 format!("{}0{}{}", &code[0..=2], &code[3..5], &code[6..]) }; } if code.len() == 10 { let last_char = code.chars().last().unwrap(); match last_char { '0'..='9' => { code.push(compute_rivoli_key(&code)); } 'A'..='Z' => { // 441090516U -> 4401090516U code = if uses_specific_code_direction(&code) { // We can't fix it by computation, we need to search it in the database // 920514135A -> 92051 4135 return FixedFantoirCode::ToSearch { code_insee: code[0..5].to_string(), identifiant_communal_voie: code[5..9].to_string(), } } else { format!("{}0{}", &code[0..2], &code[2..]) }; } _ => unreachable!(), } } FixedFantoirCode::Computed(code) } pub fn uses_specific_code_direction (code: &str) -> bool { DEPARTMENTS_WITH_CODE_DIRECTION .iter() .any(|&dpt| code.starts_with(dpt)) } pub fn compute_rivoli_key (code: &str) -> char { // See https://georezo.net/forum/viewtopic.php?id=102292 if code.starts_with("2A") || code.starts_with("2B") { // 2A would be 2 10 and 2B would be 2 11, but how to build a number to multiply by 19? unimplemented!() } let part_commune: i32 = code[0..6].parse().unwrap(); let type_voie = code.chars().nth(6).unwrap(); let type_voie = if type_voie.is_alphabetic() { type_voie as u32 - 55 } else { type_voie.to_digit(10).unwrap() }; let numero_identifiant_communal_voie: i32 = code[7..].parse().unwrap(); let index = (part_commune * 19 + type_voie as i32 * 11 + numero_identifiant_communal_voie) % 23; return RIVOLI_STRING[index as usize]; } #[cfg(test)] mod tests { // Note this useful idiom: importing names from outer (for mod tests) scope. use super::*; #[test] fn test_parse_fantoir_date() { let expected = NaiveDate::from_ymd_opt(1987, 1, 1).unwrap(); let actual = parse_fantoir_date("1987001").unwrap(); assert_eq!(expected, actual); } #[test] fn test_parse_optional_string() { assert_eq!(Some(String::from("quux")), parse_optional_string("quux")); } #[test] fn test_parse_optional_string_with_trailing_spaces() { assert_eq!(Some(String::from("quux")), parse_optional_string("quux ")); } #[test] fn test_parse_optional_string_when_empty() { assert_eq!(true, parse_optional_string("").is_none()); } #[test] fn test_parse_optional_string_when_only_spaces() { assert_eq!(true, parse_optional_string(" ").is_none()); } #[test] pub fn test_fix_fantoir_code () { assert_fixed_fantoir_code("755112P144L", fix_fantoir_code("755112P144L")); assert_fixed_fantoir_code("972231B026U", fix_fantoir_code("97231_B026")); assert_fixed_fantoir_code("4401090516U", fix_fantoir_code("441090516U")); assert_fixed_fantoir_code("972222B305L", fix_fantoir_code("972222B305")); } fn assert_fixed_fantoir_code (expected: &str, actual: FixedFantoirCode) { match actual { FixedFantoirCode::Computed(code) => { assert_eq!(expected, &code); }, _ => assert!(false, "Expected a computed FANTOIR code") } } #[test] pub fn test_fix_fantoir_code_when_it_cannot_be_computed () { let expected = FixedFantoirCode::ToSearch { code_insee: "92002".to_string(), identifiant_communal_voie: "5130".to_string() }; assert_eq!(expected, fix_fantoir_code("920025130X"), "As code direction can't be computed, this code should be to search"); assert_eq!(expected, fix_fantoir_code("92002_5130"), "As code direction can't be computed, this code should be to search"); } #[test] pub fn test_compute_rivoli_key() { assert_eq!('W', compute_rivoli_key("380003B001")); assert_eq!('U', compute_rivoli_key("972231B026")); } #[test] pub fn test_compute_rivoli_key_with_type_voie_zero() { assert_eq!('C', compute_rivoli_key("9722230261")); } } diff --git a/fantoir-datasource/src/main.rs b/fantoir-datasource/src/main.rs index ce86816..364c97e 100644 --- a/fantoir-datasource/src/main.rs +++ b/fantoir-datasource/src/main.rs @@ -1,107 +1,120 @@ use std::env; use clap::{Args, Parser}; use crate::commands::promote::promote; mod commands; mod db; mod fantoir; mod services; #[derive(Debug, Parser)] #[command(name = "fantoir-datasource")] #[clap(author="Nasqueron project", version, about="Import FANTOIR database into PostgreSQL", long_about=None)] enum FantoirCommand { + /// Fetch the last version of the FANTOIR file + Fetch(FetchArgs), + /// Import from FANTOIR file generated by the DGFIP #[command(arg_required_else_help = true)] Import(ImportArgs), /// Promote an imported FANTOIR table as the current FANTOIR table to use #[command(arg_required_else_help = true)] Promote(PromoteArgs), /// Query Wikidata SPARQL end-point to enrich FANTOIR information Wikidata(WikidataArgs), /// Query the imported FANTOIR table Query(QueryArgs) } +#[derive(Debug, Args)] +pub struct FetchArgs { + /// Overwrite file if already existing + #[arg(long)] + overwrite: bool, +} + #[derive(Debug, Args)] pub struct ImportArgs { /// Create table if it doesn't exist #[arg(short = 'c')] create_table: bool, /// Truncate table if it already exists, allowing the overwrite mode. /// If not specified, the script will fail if table exists. #[arg(short = 't')] overwrite_table: bool, /// The FANTOIR file to import fantoir_file: String, /// The name of the table to populate fantoir_table: String, } #[derive(Debug, Args)] pub struct PromoteArgs { /// The name of the table to promote fantoir_table: String, } #[derive(Debug, Args)] pub struct WikidataArgs { /// Create table if it doesn't exist #[arg(short = 'c')] create_table: bool, /// Truncate table if it already exists, allowing the overwrite mode. /// If not specified, the script will fail if table exists. #[arg(short = 't')] overwrite_table: bool, /// Generate a Wikidata maintenance report instead to print errors to stderr #[arg(long)] maintenance_report: bool, } #[derive(Debug, Args)] #[clap(trailing_var_arg=true)] pub struct QueryArgs { /// INSEE code to identify a commune #[arg(long)] code_insee: Option, /// Identifier of the voie by the commune #[arg(long)] code_voie: Option, /// Expression to search libelle: Vec, } #[tokio::main] async fn main() { let command = FantoirCommand::parse(); // Will exit if argument is missing or --help/--version provided. let database_url = env::var("DATABASE_URL") .expect("The environment variable DATABASE_URL need to be set to your PostgreSQL database."); match command { + FantoirCommand::Fetch(args) => { + commands::fetch::fetch(args.overwrite).await; + }, FantoirCommand::Import(args) => { commands::import::import(&args, &database_url).await; }, FantoirCommand::Promote(args) => { promote(&args.fantoir_table, &database_url).await; }, FantoirCommand::Wikidata(args) => { commands::wikidata::import(&args, &database_url).await }, FantoirCommand::Query(args) => { commands::query::search(args, &database_url).await }, }; } diff --git a/fantoir-datasource/src/services/http_client.rs b/fantoir-datasource/src/services/http_client.rs index 7eef561..9e7b546 100644 --- a/fantoir-datasource/src/services/http_client.rs +++ b/fantoir-datasource/src/services/http_client.rs @@ -1,56 +1,100 @@ -use lazy_static::lazy_static; +use std::io::Error as IOError; +use std::path::Path; -use reqwest::{Client as ReqwestClient, ClientBuilder, Error, IntoUrl, Response}; +use lazy_static::lazy_static; +use reqwest::Client as ReqwestClient; +use reqwest::ClientBuilder; +use reqwest::Error as ReqwestError; +use reqwest::IntoUrl; +use reqwest::Response; use reqwest::header::HeaderMap; +use tokio::fs::File; +use tokio::io::AsyncWriteExt; /* ------------------------------------------------------------- User agent The USER_AGENT variable is computed at build time. - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - */ lazy_static! { pub static ref USER_AGENT: String = format!( "{}/{} (https://databases.nasqueron.org/)", env!("CARGO_PKG_NAME"), env!("CARGO_PKG_VERSION") ); } pub fn get_user_agent () -> &'static str { &USER_AGENT } /* ------------------------------------------------------------- HTTP client - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - */ pub struct Client { client: ReqwestClient, } impl Client { pub fn new(headers: Option) -> Self { let headers = headers .unwrap_or(HeaderMap::new()); let client = ClientBuilder::new() .user_agent(get_user_agent()) .default_headers(headers) .gzip(true) .deflate(true) .build() .expect("Can't build HTTP client"); Self { client, } } pub async fn get(&self, url: T) -> Result where T: IntoUrl { self.client .get(url) .send() .await + .map_err(|error| Error::Reqwest(error)) } + + pub async fn download(&self, url: T, target_path: P) -> Result + where T: IntoUrl, P: AsRef { + let mut file = File::create(target_path) + .await + .map_err(|error| Error::IO(error))?; + + let mut target_content = self.get(url).await?; + let mut bytes_read = 0; + while let Some(chunk) = target_content + .chunk() + .await + .map_err(|error| Error::Reqwest(error))? + { + bytes_read += file.write(chunk.as_ref()) + .await + .map_err(|error| Error::IO(error))?; + } + + Ok(bytes_read) + } +} + +/* ------------------------------------------------------------- + HTTP client error + - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - */ + +/// HTTP client error +#[derive(Debug)] +pub enum Error { + /// Represents an underlying error from Reqwest HTTP client when processing a request. + Reqwest(ReqwestError), + + /// Represents an IO error when doing file operations. + IO(IOError), }