Page MenuHomeDevCentral

D2738.id6966.diff
No OneTemporary

D2738.id6966.diff

diff --git a/.gitignore b/.gitignore
--- a/.gitignore
+++ b/.gitignore
@@ -1,3 +1,4 @@
/target
/FANTOIR*
+!/fantoir-datasource
Cargo.lock
diff --git a/fantoir-datasource/Cargo.toml b/fantoir-datasource/Cargo.toml
--- a/fantoir-datasource/Cargo.toml
+++ b/fantoir-datasource/Cargo.toml
@@ -9,7 +9,9 @@
license = "BSD-2-Clause"
[dependencies]
+chrono = "~0.4.23"
lazy_static = "~1.4.0"
+opendatasoft-explore-api = { path = "../opendatasoft-explore-api" }
oxrdf = "~0.1.1"
sparesults = "~0.1.3"
diff --git a/fantoir-datasource/README.md b/fantoir-datasource/README.md
--- a/fantoir-datasource/README.md
+++ b/fantoir-datasource/README.md
@@ -6,6 +6,12 @@
export DATABASE_URL="postgres://fantoir:fantoir@localhost/fantoir"
```
+## Requirements
+
+A PostgreSQL server, at least for import/wikidata/promote commands.
+
+If you wish to use the tool to fetch the FANTOIR database: `unzip`
+
## Development
### Build instructions
diff --git a/fantoir-datasource/src/commands/fetch/fantoir_file.rs b/fantoir-datasource/src/commands/fetch/fantoir_file.rs
new file mode 100644
--- /dev/null
+++ b/fantoir-datasource/src/commands/fetch/fantoir_file.rs
@@ -0,0 +1,114 @@
+use std::cmp::Ordering;
+use std::path::Path;
+
+use opendatasoft_explore_api::schema::Attachment;
+
+use chrono::Datelike;
+use chrono::Months;
+use chrono::NaiveDate;
+
+/* -------------------------------------------------------------
+ FANTOIR file metadata
+ - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - */
+
+#[derive(Clone, Debug, Eq, PartialEq, Hash)]
+pub struct FantoirFile {
+ pub url: String,
+
+ /// The month of FANTOIR file production
+ pub date: NaiveDate,
+}
+
+impl FantoirFile {
+ pub fn from (attachment: &Attachment) -> Option<Self> {
+ let id_date = attachment.metas.id
+ .replace("fichier_national_fantoir_situation_", "")
+ .replace("_zip", "");
+
+ Some(Self {
+ url: attachment.href.clone(),
+ date: parse_fantoir_date(&id_date)?,
+ })
+ }
+
+ pub fn get_file_candidates(&self) -> Vec<String> {
+ let previous_month = self.date - Months::new(1);
+ vec![
+ format!("FANTOIR{}{}", previous_month.month(), previous_month.year() - 2000),
+ format!("FANTOIR{}{}", self.date.month(), self.date.year() - 2000),
+ ]
+ }
+
+ pub fn exists_locally(&self) -> bool {
+ self.get_file_candidates()
+ .iter()
+ .any(|candidate| Path::new(candidate).is_file())
+ }
+}
+
+impl PartialOrd for FantoirFile {
+ fn partial_cmp(&self, other: &Self) -> Option<Ordering> {
+ Some(self.cmp(other))
+ }
+}
+
+impl Ord for FantoirFile {
+ fn cmp(&self, other: &Self) -> Ordering {
+ self.date.cmp(&other.date)
+ }
+}
+
+fn parse_fantoir_date(id_date: &str) -> Option<NaiveDate> {
+ let parts: Vec<_> = id_date.split("_").collect(); // [ month in French, year ]
+
+ if parts.len() != 2 {
+ return None;
+ }
+
+ NaiveDate::from_ymd_opt(
+ parts[1].parse().ok()?,
+ parse_french_month_long_name(parts[0])?,
+ 1
+ )
+}
+
+fn parse_french_month_long_name(month: &str) -> Option<u32> {
+ match month {
+ "janvier" => Some(1),
+ "fevrier" => Some(2),
+ "mars" => Some(3),
+ "avril" => Some(4),
+ "mai" => Some(5),
+ "juin" => Some(6),
+ "juillet" => Some(7),
+ "aout" => Some(8),
+ "septembre" => Some(9),
+ "octobre" => Some(10),
+ "novembre" => Some(11),
+ "decembre" => Some(12),
+ _ => None,
+ }
+}
+
+#[cfg(test)]
+mod tests {
+ use chrono::NaiveDate;
+ use super::*;
+
+ #[test]
+ fn test_parse_fantoir_date() {
+ let expected = NaiveDate::from_ymd_opt(2022, 11, 1).unwrap();
+ assert_eq!(Some(expected), parse_fantoir_date("novembre_2022"));
+ }
+
+ #[test]
+ fn test_get_file_candidates() {
+ let file = FantoirFile {
+ url: "foo/fichier_national_fantoir_situation_novembre_2022_zip".to_string(),
+ date: NaiveDate::from_ymd_opt(2022, 11, 1).unwrap(),
+ };
+
+ let expected = vec!["FANTOIR1022".to_string(), "FANTOIR1122".to_string()];
+ assert_eq!(expected, file.get_file_candidates());
+ }
+}
diff --git a/fantoir-datasource/src/commands/fetch/mod.rs b/fantoir-datasource/src/commands/fetch/mod.rs
new file mode 100644
--- /dev/null
+++ b/fantoir-datasource/src/commands/fetch/mod.rs
@@ -0,0 +1,124 @@
+//! Fetch command for the fantoir-datasource tool.
+//!
+//! Check last version and download if needed
+
+use std::env;
+use std::path::PathBuf;
+use std::process::exit;
+
+use chrono::Utc;
+use tokio::fs::remove_file;
+use opendatasoft_explore_api::requests::ExploreApiEndPoint;
+use tokio::process::Command;
+
+use crate::commands::fetch::fantoir_file::FantoirFile;
+use crate::commands::fetch::os::is_command_available;
+use crate::services::http_client::Client as HttpClient;
+
+mod fantoir_file;
+mod os;
+
+static ENDPOINT: &'static str = "https://data.economie.gouv.fr/api/v2";
+static DATASET_ID: &'static str = "fichier-fantoir-des-voies-et-lieux-dits";
+
+pub async fn fetch (overwrite: bool) {
+ let fantoir_file = get_last_file_information().await;
+
+ let file_exists = fantoir_file.exists_locally();
+ if file_exists && !overwrite {
+ eprintln!("FANTOIR file already exists. Run with --overwrite to overwrite it.");
+ exit(12);
+ }
+
+ if !is_command_available("unzip") {
+ eprintln!("No 'unzip' utility has been found, please install it or fix PATH if needed.");
+ exit(32);
+ }
+
+ let target_path = get_fantoir_zip_path();
+ if let Err(error) = HttpClient::new(None).download(&fantoir_file.url, &target_path).await {
+ eprintln!("Can't download FANTOIR file: {:?}", error);
+ exit(16);
+ }
+
+ let exit_code = match unzip(&target_path, overwrite).await {
+ Ok(path) => {
+ println!("FANTOIR_FILE={}", &path);
+
+ 0
+ }
+ Err(exit_code) => exit_code,
+ };
+
+ if let Err(error) = remove_file(&target_path).await {
+ eprintln!("Can't remove downloaded temporary file: {}", error);
+ eprintln!("Please delete manually {}", target_path.to_str().unwrap())
+ }
+
+ exit(exit_code);
+}
+
+/// Determines a temporary location where to save the FANTOIR file ZIP archive
+fn get_fantoir_zip_path() -> PathBuf {
+ let filename = format!("fantoir-download-{}.zip", Utc::now().timestamp());
+
+ env::temp_dir()
+ .join(filename)
+}
+
+async fn unzip(archive_path: &PathBuf, overwrite: bool) -> Result<String, i32> {
+ let overwrite_option = match overwrite {
+ true => "-o",
+ false => "-n",
+ };
+
+ let process = Command::new("unzip")
+ .arg(overwrite_option)
+ .arg(archive_path.as_path().to_str().unwrap())
+ .output()
+ .await
+ .expect("Can't spawn unzip process");
+
+ if process.status.success() {
+ match find_extracted_file(process.stdout) {
+ None => Err(127),
+ Some(filename) => Ok(filename),
+ }
+ } else {
+ Err(process.status.code().unwrap())
+ }
+}
+
+fn find_extracted_file(stdout: Vec<u8>) -> Option<String> {
+ let output = String::from_utf8(stdout)
+ .expect("Can't read unzip stdout");
+
+ for action in vec!["inflating: ", "extracting: "] {
+ if !output.contains(action) {
+ continue;
+ }
+
+ let pos = output.find(action).unwrap() + action.len();
+ let buffer = &output[pos..];
+ let pos = buffer.find("\r\n").unwrap_or(
+ buffer.find("\n").unwrap()
+ );
+
+ return Some(String::from(buffer[..pos].trim()));
+ }
+
+ None
+}
+
+pub async fn get_last_file_information () -> FantoirFile {
+ let endpoint = ExploreApiEndPoint::new(ENDPOINT);
+ let result = endpoint.get_dataset_attachments(DATASET_ID).await;
+
+ result
+ .attachments
+ .into_iter()
+ .filter(|attachment| attachment.metas.title.starts_with("Fichier national FANTOIR"))
+ .map(|attachment| FantoirFile::from(&attachment).expect("Can't parse FANTOIR file metadata"))
+ .max() // The most recent
+ .unwrap()
+}
diff --git a/fantoir-datasource/src/commands/fetch/os.rs b/fantoir-datasource/src/commands/fetch/os.rs
new file mode 100644
--- /dev/null
+++ b/fantoir-datasource/src/commands/fetch/os.rs
@@ -0,0 +1,31 @@
+//! OS-related helper methods
+
+use std::env::consts::OS;
+use std::process::{Command, Stdio};
+
+pub fn is_command_available (command: &str) -> bool {
+ let command_to_use = match OS {
+ "windows" => "where",
+ _ => "which", // command -v is sometimes recommended, but doesn't exist as standalone
+ };
+
+ // Use the exit code to determine if the command has been found
+ Command::new(command_to_use)
+ .arg(command)
+ .stdout(Stdio::null()) // Discard both stdout and stderr
+ .stderr(Stdio::null())
+ .status()
+ .expect("failed to execute process")
+ .success()
+}
+
+#[cfg(test)]
+mod tests {
+ use super::*;
+
+ #[test]
+ pub fn test_is_command_available () {
+ assert!(is_command_available("unzip"));
+ assert!(!is_command_available("notexisting"));
+ }
+}
diff --git a/fantoir-datasource/src/commands/mod.rs b/fantoir-datasource/src/commands/mod.rs
--- a/fantoir-datasource/src/commands/mod.rs
+++ b/fantoir-datasource/src/commands/mod.rs
@@ -1,5 +1,6 @@
//! Commands for the fantoir-datasource tool.
+pub(crate) mod fetch;
pub(crate) mod import;
pub(crate) mod promote;
pub(crate) mod query;
diff --git a/fantoir-datasource/src/fantoir.rs b/fantoir-datasource/src/fantoir.rs
--- a/fantoir-datasource/src/fantoir.rs
+++ b/fantoir-datasource/src/fantoir.rs
@@ -3,9 +3,9 @@
//! This module offers a structure for a FANTOIR record, methods to parse the file and export it.
//! Database functions expect to work with an executor from sqlx crate.
+use chrono::NaiveDate;
use lazy_static::lazy_static;
use sqlx::PgPool;
-use sqlx::types::chrono::NaiveDate;
lazy_static! {
static ref DEPARTMENTS_WITH_CODE_DIRECTION: Vec<&'static str> = vec!["13", "59", "75", "92", "97"];
diff --git a/fantoir-datasource/src/main.rs b/fantoir-datasource/src/main.rs
--- a/fantoir-datasource/src/main.rs
+++ b/fantoir-datasource/src/main.rs
@@ -13,6 +13,9 @@
#[command(name = "fantoir-datasource")]
#[clap(author="Nasqueron project", version, about="Import FANTOIR database into PostgreSQL", long_about=None)]
enum FantoirCommand {
+ /// Fetch the last version of the FANTOIR file
+ Fetch(FetchArgs),
+
/// Import from FANTOIR file generated by the DGFIP
#[command(arg_required_else_help = true)]
Import(ImportArgs),
@@ -28,6 +31,13 @@
Query(QueryArgs)
}
+#[derive(Debug, Args)]
+pub struct FetchArgs {
+ /// Overwrite file if already existing
+ #[arg(long)]
+ overwrite: bool,
+}
+
#[derive(Debug, Args)]
pub struct ImportArgs {
/// Create table if it doesn't exist
@@ -91,6 +101,9 @@
.expect("The environment variable DATABASE_URL need to be set to your PostgreSQL database.");
match command {
+ FantoirCommand::Fetch(args) => {
+ commands::fetch::fetch(args.overwrite).await;
+ },
FantoirCommand::Import(args) => {
commands::import::import(&args, &database_url).await;
},
diff --git a/fantoir-datasource/src/services/http_client.rs b/fantoir-datasource/src/services/http_client.rs
--- a/fantoir-datasource/src/services/http_client.rs
+++ b/fantoir-datasource/src/services/http_client.rs
@@ -1,7 +1,15 @@
-use lazy_static::lazy_static;
+use std::io::Error as IOError;
+use std::path::Path;
-use reqwest::{Client as ReqwestClient, ClientBuilder, Error, IntoUrl, Response};
+use lazy_static::lazy_static;
+use reqwest::Client as ReqwestClient;
+use reqwest::ClientBuilder;
+use reqwest::Error as ReqwestError;
+use reqwest::IntoUrl;
+use reqwest::Response;
use reqwest::header::HeaderMap;
+use tokio::fs::File;
+use tokio::io::AsyncWriteExt;
/* -------------------------------------------------------------
User agent
@@ -52,5 +60,41 @@
.get(url)
.send()
.await
+ .map_err(|error| Error::Reqwest(error))
}
+
+ pub async fn download<P, T>(&self, url: T, target_path: P) -> Result<usize, Error>
+ where T: IntoUrl, P: AsRef<Path> {
+ let mut file = File::create(target_path)
+ .await
+ .map_err(|error| Error::IO(error))?;
+
+ let mut target_content = self.get(url).await?;
+ let mut bytes_read = 0;
+ while let Some(chunk) = target_content
+ .chunk()
+ .await
+ .map_err(|error| Error::Reqwest(error))?
+ {
+ bytes_read += file.write(chunk.as_ref())
+ .await
+ .map_err(|error| Error::IO(error))?;
+ }
+
+ Ok(bytes_read)
+ }
+}
+
+/* -------------------------------------------------------------
+ HTTP client error
+ - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - */
+
+/// HTTP client error
+#[derive(Debug)]
+pub enum Error {
+ /// Represents an underlying error from Reqwest HTTP client when processing a request.
+ Reqwest(ReqwestError),
+
+ /// Represents an IO error when doing file operations.
+ IO(IOError),
}

File Metadata

Mime Type
text/plain
Expires
Sun, Jan 19, 16:49 (11 h, 34 m)
Storage Engine
blob
Storage Format
Raw Data
Storage Handle
2361222
Default Alt Text
D2738.id6966.diff (13 KB)

Event Timeline