Page Menu
Home
DevCentral
Search
Configure Global Search
Log In
Files
F3755065
D2738.id6979.diff
No One
Temporary
Actions
View File
Edit File
Delete File
View Transforms
Subscribe
Mute Notifications
Award Token
Flag For Later
Size
13 KB
Referenced Files
None
Subscribers
None
D2738.id6979.diff
View Options
diff --git a/.gitignore b/.gitignore
--- a/.gitignore
+++ b/.gitignore
@@ -1,3 +1,4 @@
/target
/FANTOIR*
+!/fantoir-datasource
Cargo.lock
diff --git a/fantoir-datasource/Cargo.toml b/fantoir-datasource/Cargo.toml
--- a/fantoir-datasource/Cargo.toml
+++ b/fantoir-datasource/Cargo.toml
@@ -9,7 +9,9 @@
license = "BSD-2-Clause"
[dependencies]
+chrono = "~0.4.23"
lazy_static = "~1.4.0"
+opendatasoft-explore-api = { version = "0.1.0", path = "../opendatasoft-explore-api" }
oxrdf = "~0.1.1"
sparesults = "~0.1.3"
diff --git a/fantoir-datasource/README.md b/fantoir-datasource/README.md
--- a/fantoir-datasource/README.md
+++ b/fantoir-datasource/README.md
@@ -6,6 +6,12 @@
export DATABASE_URL="postgres://fantoir:fantoir@localhost/fantoir"
```
+## Requirements
+
+A PostgreSQL server, at least for import/wikidata/promote commands.
+
+If you wish to use the tool to fetch the FANTOIR database: `unzip`
+
## Development
### Build instructions
diff --git a/fantoir-datasource/src/commands/fetch/fantoir_file.rs b/fantoir-datasource/src/commands/fetch/fantoir_file.rs
new file mode 100644
--- /dev/null
+++ b/fantoir-datasource/src/commands/fetch/fantoir_file.rs
@@ -0,0 +1,114 @@
+use std::cmp::Ordering;
+use std::path::Path;
+
+use opendatasoft_explore_api::schema::Attachment;
+
+use chrono::Datelike;
+use chrono::Months;
+use chrono::NaiveDate;
+
+/* -------------------------------------------------------------
+ FANTOIR file metadata
+ - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - */
+
+#[derive(Clone, Debug, Eq, PartialEq, Hash)]
+pub struct FantoirFile {
+ pub url: String,
+
+ /// The month of FANTOIR file production
+ pub date: NaiveDate,
+}
+
+impl FantoirFile {
+ pub fn from (attachment: &Attachment) -> Option<Self> {
+ let id_date = attachment.metas.id
+ .replace("fichier_national_fantoir_situation_", "")
+ .replace("_zip", "");
+
+ Some(Self {
+ url: attachment.href.clone(),
+ date: parse_fantoir_date(&id_date)?,
+ })
+ }
+
+ pub fn get_file_candidates(&self) -> Vec<String> {
+ let previous_month = self.date - Months::new(1);
+ vec![
+ format!("FANTOIR{}{}", previous_month.month(), previous_month.year() - 2000),
+ format!("FANTOIR{}{}", self.date.month(), self.date.year() - 2000),
+ ]
+ }
+
+ pub fn exists_locally(&self) -> bool {
+ self.get_file_candidates()
+ .iter()
+ .any(|candidate| Path::new(candidate).is_file())
+ }
+}
+
+impl PartialOrd for FantoirFile {
+ fn partial_cmp(&self, other: &Self) -> Option<Ordering> {
+ Some(self.cmp(other))
+ }
+}
+
+impl Ord for FantoirFile {
+ fn cmp(&self, other: &Self) -> Ordering {
+ self.date.cmp(&other.date)
+ }
+}
+
+fn parse_fantoir_date(id_date: &str) -> Option<NaiveDate> {
+ let parts: Vec<_> = id_date.split("_").collect(); // [ month in French, year ]
+
+ if parts.len() != 2 {
+ return None;
+ }
+
+ NaiveDate::from_ymd_opt(
+ parts[1].parse().ok()?,
+ parse_french_month_long_name(parts[0])?,
+ 1
+ )
+}
+
+fn parse_french_month_long_name(month: &str) -> Option<u32> {
+ match month {
+ "janvier" => Some(1),
+ "fevrier" => Some(2),
+ "mars" => Some(3),
+ "avril" => Some(4),
+ "mai" => Some(5),
+ "juin" => Some(6),
+ "juillet" => Some(7),
+ "aout" => Some(8),
+ "septembre" => Some(9),
+ "octobre" => Some(10),
+ "novembre" => Some(11),
+ "decembre" => Some(12),
+ _ => None,
+ }
+}
+
+#[cfg(test)]
+mod tests {
+ use chrono::NaiveDate;
+ use super::*;
+
+ #[test]
+ fn test_parse_fantoir_date() {
+ let expected = NaiveDate::from_ymd_opt(2022, 11, 1).unwrap();
+ assert_eq!(Some(expected), parse_fantoir_date("novembre_2022"));
+ }
+
+ #[test]
+ fn test_get_file_candidates() {
+ let file = FantoirFile {
+ url: "foo/fichier_national_fantoir_situation_novembre_2022_zip".to_string(),
+ date: NaiveDate::from_ymd_opt(2022, 11, 1).unwrap(),
+ };
+
+ let expected = vec!["FANTOIR1022".to_string(), "FANTOIR1122".to_string()];
+ assert_eq!(expected, file.get_file_candidates());
+ }
+}
diff --git a/fantoir-datasource/src/commands/fetch/mod.rs b/fantoir-datasource/src/commands/fetch/mod.rs
new file mode 100644
--- /dev/null
+++ b/fantoir-datasource/src/commands/fetch/mod.rs
@@ -0,0 +1,124 @@
+//! Fetch command for the fantoir-datasource tool.
+//!
+//! Check last version and download if needed
+
+use std::env;
+use std::path::PathBuf;
+use std::process::exit;
+
+use chrono::Utc;
+use tokio::fs::remove_file;
+use opendatasoft_explore_api::requests::ExploreApiEndPoint;
+use tokio::process::Command;
+
+use crate::commands::fetch::fantoir_file::FantoirFile;
+use crate::commands::fetch::os::is_command_available;
+use crate::services::http_client::Client as HttpClient;
+
+mod fantoir_file;
+mod os;
+
+static ENDPOINT: &'static str = "https://data.economie.gouv.fr/api/v2";
+static DATASET_ID: &'static str = "fichier-fantoir-des-voies-et-lieux-dits";
+
+pub async fn fetch (overwrite: bool) {
+ let fantoir_file = get_last_file_information().await;
+
+ let file_exists = fantoir_file.exists_locally();
+ if file_exists && !overwrite {
+ eprintln!("FANTOIR file already exists. Run with --overwrite to overwrite it.");
+ exit(12);
+ }
+
+ if !is_command_available("unzip") {
+ eprintln!("No 'unzip' utility has been found, please install it or fix PATH if needed.");
+ exit(32);
+ }
+
+ let target_path = get_fantoir_zip_path();
+ if let Err(error) = HttpClient::new(None).download(&fantoir_file.url, &target_path).await {
+ eprintln!("Can't download FANTOIR file: {:?}", error);
+ exit(16);
+ }
+
+ let exit_code = match unzip(&target_path, overwrite).await {
+ Ok(path) => {
+ println!("FANTOIR_FILE={}", &path);
+
+ 0
+ }
+ Err(exit_code) => exit_code,
+ };
+
+ if let Err(error) = remove_file(&target_path).await {
+ eprintln!("Can't remove downloaded temporary file: {}", error);
+ eprintln!("Please delete manually {}", target_path.to_str().unwrap())
+ }
+
+ exit(exit_code);
+}
+
+/// Determines a temporary location where to save the FANTOIR file ZIP archive
+fn get_fantoir_zip_path() -> PathBuf {
+ let filename = format!("fantoir-download-{}.zip", Utc::now().timestamp());
+
+ env::temp_dir()
+ .join(filename)
+}
+
+async fn unzip(archive_path: &PathBuf, overwrite: bool) -> Result<String, i32> {
+ let overwrite_option = match overwrite {
+ true => "-o",
+ false => "-n",
+ };
+
+ let process = Command::new("unzip")
+ .arg(overwrite_option)
+ .arg(archive_path.as_path().to_str().unwrap())
+ .output()
+ .await
+ .expect("Can't spawn unzip process");
+
+ if process.status.success() {
+ match find_extracted_file(process.stdout) {
+ None => Err(127),
+ Some(filename) => Ok(filename),
+ }
+ } else {
+ Err(process.status.code().unwrap())
+ }
+}
+
+fn find_extracted_file(stdout: Vec<u8>) -> Option<String> {
+ let output = String::from_utf8(stdout)
+ .expect("Can't read unzip stdout");
+
+ for action in vec!["inflating: ", "extracting: "] {
+ if !output.contains(action) {
+ continue;
+ }
+
+ let pos = output.find(action).unwrap() + action.len();
+ let buffer = &output[pos..];
+ let pos = buffer.find("\r\n").unwrap_or(
+ buffer.find("\n").unwrap()
+ );
+
+ return Some(String::from(buffer[..pos].trim()));
+ }
+
+ None
+}
+
+pub async fn get_last_file_information () -> FantoirFile {
+ let endpoint = ExploreApiEndPoint::new(ENDPOINT);
+ let result = endpoint.get_dataset_attachments(DATASET_ID).await;
+
+ result
+ .attachments
+ .into_iter()
+ .filter(|attachment| attachment.metas.title.starts_with("Fichier national FANTOIR"))
+ .map(|attachment| FantoirFile::from(&attachment).expect("Can't parse FANTOIR file metadata"))
+ .max() // The most recent
+ .unwrap()
+}
diff --git a/fantoir-datasource/src/commands/fetch/os.rs b/fantoir-datasource/src/commands/fetch/os.rs
new file mode 100644
--- /dev/null
+++ b/fantoir-datasource/src/commands/fetch/os.rs
@@ -0,0 +1,31 @@
+//! OS-related helper methods
+
+use std::env::consts::OS;
+use std::process::{Command, Stdio};
+
+pub fn is_command_available (command: &str) -> bool {
+ let command_to_use = match OS {
+ "windows" => "where",
+ _ => "which", // command -v is sometimes recommended, but doesn't exist as standalone
+ };
+
+ // Use the exit code to determine if the command has been found
+ Command::new(command_to_use)
+ .arg(command)
+ .stdout(Stdio::null()) // Discard both stdout and stderr
+ .stderr(Stdio::null())
+ .status()
+ .expect("failed to execute process")
+ .success()
+}
+
+#[cfg(test)]
+mod tests {
+ use super::*;
+
+ #[test]
+ pub fn test_is_command_available () {
+ assert!(is_command_available("unzip"));
+ assert!(!is_command_available("notexisting"));
+ }
+}
diff --git a/fantoir-datasource/src/commands/mod.rs b/fantoir-datasource/src/commands/mod.rs
--- a/fantoir-datasource/src/commands/mod.rs
+++ b/fantoir-datasource/src/commands/mod.rs
@@ -1,5 +1,6 @@
//! Commands for the fantoir-datasource tool.
+pub(crate) mod fetch;
pub(crate) mod import;
pub(crate) mod promote;
pub(crate) mod query;
diff --git a/fantoir-datasource/src/fantoir.rs b/fantoir-datasource/src/fantoir.rs
--- a/fantoir-datasource/src/fantoir.rs
+++ b/fantoir-datasource/src/fantoir.rs
@@ -3,9 +3,9 @@
//! This module offers a structure for a FANTOIR record, methods to parse the file and export it.
//! Database functions expect to work with an executor from sqlx crate.
+use chrono::NaiveDate;
use lazy_static::lazy_static;
use sqlx::PgPool;
-use sqlx::types::chrono::NaiveDate;
lazy_static! {
static ref DEPARTMENTS_WITH_CODE_DIRECTION: Vec<&'static str> = vec!["13", "59", "75", "92", "97"];
diff --git a/fantoir-datasource/src/main.rs b/fantoir-datasource/src/main.rs
--- a/fantoir-datasource/src/main.rs
+++ b/fantoir-datasource/src/main.rs
@@ -13,6 +13,9 @@
#[command(name = "fantoir-datasource")]
#[clap(author="Nasqueron project", version, about="Import FANTOIR database into PostgreSQL", long_about=None)]
enum FantoirCommand {
+ /// Fetch the last version of the FANTOIR file
+ Fetch(FetchArgs),
+
/// Import from FANTOIR file generated by the DGFIP
#[command(arg_required_else_help = true)]
Import(ImportArgs),
@@ -28,6 +31,13 @@
Query(QueryArgs)
}
+#[derive(Debug, Args)]
+pub struct FetchArgs {
+ /// Overwrite file if already existing
+ #[arg(long)]
+ overwrite: bool,
+}
+
#[derive(Debug, Args)]
pub struct ImportArgs {
/// Create table if it doesn't exist
@@ -91,6 +101,9 @@
.expect("The environment variable DATABASE_URL need to be set to your PostgreSQL database.");
match command {
+ FantoirCommand::Fetch(args) => {
+ commands::fetch::fetch(args.overwrite).await;
+ },
FantoirCommand::Import(args) => {
commands::import::import(&args, &database_url).await;
},
diff --git a/fantoir-datasource/src/services/http_client.rs b/fantoir-datasource/src/services/http_client.rs
--- a/fantoir-datasource/src/services/http_client.rs
+++ b/fantoir-datasource/src/services/http_client.rs
@@ -1,7 +1,15 @@
-use lazy_static::lazy_static;
+use std::io::Error as IOError;
+use std::path::Path;
-use reqwest::{Client as ReqwestClient, ClientBuilder, Error, IntoUrl, Response};
+use lazy_static::lazy_static;
+use reqwest::Client as ReqwestClient;
+use reqwest::ClientBuilder;
+use reqwest::Error as ReqwestError;
+use reqwest::IntoUrl;
+use reqwest::Response;
use reqwest::header::HeaderMap;
+use tokio::fs::File;
+use tokio::io::AsyncWriteExt;
/* -------------------------------------------------------------
User agent
@@ -52,5 +60,41 @@
.get(url)
.send()
.await
+ .map_err(|error| Error::Reqwest(error))
}
+
+ pub async fn download<P, T>(&self, url: T, target_path: P) -> Result<usize, Error>
+ where T: IntoUrl, P: AsRef<Path> {
+ let mut file = File::create(target_path)
+ .await
+ .map_err(|error| Error::IO(error))?;
+
+ let mut target_content = self.get(url).await?;
+ let mut bytes_read = 0;
+ while let Some(chunk) = target_content
+ .chunk()
+ .await
+ .map_err(|error| Error::Reqwest(error))?
+ {
+ bytes_read += file.write(chunk.as_ref())
+ .await
+ .map_err(|error| Error::IO(error))?;
+ }
+
+ Ok(bytes_read)
+ }
+}
+
+/* -------------------------------------------------------------
+ HTTP client error
+ - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - */
+
+/// HTTP client error
+#[derive(Debug)]
+pub enum Error {
+ /// Represents an underlying error from Reqwest HTTP client when processing a request.
+ Reqwest(ReqwestError),
+
+ /// Represents an IO error when doing file operations.
+ IO(IOError),
}
File Metadata
Details
Attached
Mime Type
text/plain
Expires
Tue, Nov 19, 12:14 (21 h, 39 m)
Storage Engine
blob
Storage Format
Raw Data
Storage Handle
2252472
Default Alt Text
D2738.id6979.diff (13 KB)
Attached To
Mode
D2738: Fetch last FANTOIR file
Attached
Detach File
Event Timeline
Log In to Comment