Page MenuHomeDevCentral

D2729.id6918.diff
No OneTemporary

D2729.id6918.diff

diff --git a/.arcconfig b/.arcconfig
new file mode 100644
--- /dev/null
+++ b/.arcconfig
@@ -0,0 +1,4 @@
+{
+ "phabricator.uri": "https://devcentral.nasqueron.org",
+ "repository.callsign": "FANTDB"
+}
diff --git a/.gitignore b/.gitignore
new file mode 100644
--- /dev/null
+++ b/.gitignore
@@ -0,0 +1,3 @@
+/target
+/FANTOIR*
+Cargo.lock
diff --git a/Cargo.toml b/Cargo.toml
new file mode 100644
--- /dev/null
+++ b/Cargo.toml
@@ -0,0 +1,23 @@
+[package]
+name = "fantoir2db"
+version = "0.1.0"
+edition = "2021"
+description = "Generates a Postgres table from FANTOIR raw file"
+authors = [
+ "Sébastien Santoro <dereckson@espace-win.org>"
+]
+license = "BSD-2-Clause"
+
+[dependencies]
+
+[dependencies.clap]
+version = "~4.0.32"
+features = ["derive"]
+
+[dependencies.sqlx]
+version = "~0.6.2"
+features = ["runtime-tokio-native-tls", "postgres", "chrono"]
+
+[dependencies.tokio]
+version = "~1.23.0"
+features = ["full"]
diff --git a/README.md b/README.md
new file mode 100644
--- /dev/null
+++ b/README.md
@@ -0,0 +1,51 @@
+## How to use?
+
+Define your PostgreSQL connection URL in environment:
+
+```
+export DATABASE_URL="postgres://fantoir:fantoir@localhost/fantoir"
+```
+
+## Development
+
+### Build instructions
+
+The PostgreSQL library is required to link against it.
+If not found, you can add the path to the LIB environment variable.
+
+### Prepare a test database
+
+Execute the following queries as postgres user:
+
+```
+CREATE ROLE fantoir WITH PASSWORD 'fantoir' LOGIN;
+CREATE DATABASE fantoir OWNER fantoir;
+```
+
+Connected as your database role, enable the pg_trgm extension
+to be able to generate the index for full-text search with trigrams:
+
+```
+CREATE EXTENSION pg_trgm;
+```
+
+If the extension doesn't exist, it can be included in a package
+named for example `postgresql-contrib`.
+
+You can then use the code with the default DATABASE_URL documented above.
+
+### Database pitfalls
+
+The FANTOIR database uses INSEE department code, they can contain a letter,
+currently only for Corse (2A and 2B).
+That also applies when building the INSEE commune code.
+
+If a record is canceled, the cancel date can be omitted.
+The creation date can be omitted too.
+
+The last line of the FANTOIR database must be ignored.
+
+Wikidata uses the "code FANTOIR", matching the "code RIVOLI"
+documented in FANTOIR file description. This code matches the
+11 first characters of a record.
+See also https://www.wikidata.org/wiki/Property:P3182.
diff --git a/src/commands/import.rs b/src/commands/import.rs
new file mode 100644
--- /dev/null
+++ b/src/commands/import.rs
@@ -0,0 +1,84 @@
+//! Import command for the fantoir2db tool.
+//!
+//! Import from FANTOIR file generated by the DGFIP
+
+use std::process::exit;
+
+use sqlx::PgPool;
+use tokio::fs::File;
+use tokio::io::{AsyncBufReadExt, BufReader};
+
+use crate::ImportArgs;
+use crate::db::*;
+use crate::fantoir::FantoirEntry;
+
+pub async fn import(args: &ImportArgs, database_url: &str) {
+ let fd = File::open(&args.fantoir_file).await.expect("Can't open file.");
+ let pool = connect_to_db(database_url).await;
+
+ // Create/truncate table as needed and as allowed by options
+ if let Err(error) = initialize_table(args, &pool).await {
+ eprintln!("{}", &error);
+ exit(1);
+ }
+
+ // Currently, async closures are unstable, see https://github.com/rust-lang/rust/issues/62290
+ // They are also largely unimplemented. As such, this code doesn't follow HOF pattern.
+ let mut buffer = BufReader::new(fd).lines();
+ while let Ok(line) = buffer.next_line().await {
+ if line.is_none() {
+ break;
+ }
+ let line = line.unwrap();
+
+ if line.len() < 90 {
+ // This record is the header or describes a department or a commune
+ continue;
+ }
+
+ if line.starts_with("9999999999") {
+ // This record is the last of the database
+ break;
+ }
+
+ FantoirEntry::parse_line(&line)
+ .insert_to_db(&pool, &args.fantoir_table)
+ .await
+ }
+}
+
+async fn initialize_table(args: &ImportArgs, pool: &PgPool) -> Result<(), String> {
+ if is_table_exists(pool, &args.fantoir_table).await {
+ if is_table_empty(&pool, &args.fantoir_table).await {
+ return Ok(());
+ }
+
+ if args.overwrite_table {
+ truncate_table(&pool, &args.fantoir_table).await;
+ return Ok(());
+ }
+
+ return Err(format!(
+ "Table {} already exists and contains rows. To overwrite it, run the import tool with -t option.",
+ &args.fantoir_table
+ ));
+ }
+
+ if args.create_table {
+ create_table(&pool, &args.fantoir_table).await;
+ return Ok(());
+ }
+
+ Err(format!(
+ "Table {} doesn't exist. To create it, run the import tool with -c option.",
+ &args.fantoir_table
+ ))
+}
+
+async fn create_table(pool: &PgPool, table: &str) {
+ let queries = include_str!("../schema/fantoir.sql")
+ .replace("/*table*/fantoir", table)
+ .replace("/*index*/index_fantoir_", format!("index_{}_", table).as_ref());
+
+ run_multiple_queries(pool, &queries).await;
+}
diff --git a/src/commands/mod.rs b/src/commands/mod.rs
new file mode 100644
--- /dev/null
+++ b/src/commands/mod.rs
@@ -0,0 +1,3 @@
+//! Commands for the fantoir2db tool.
+
+pub(crate) mod import;
diff --git a/src/db.rs b/src/db.rs
new file mode 100644
--- /dev/null
+++ b/src/db.rs
@@ -0,0 +1,71 @@
+//! # Utilities for database.
+//!
+//! This module provides helpers to interact with a PostgreSQL database.
+//! Functions expect to work with an executor from sqlx crate.
+
+use sqlx::PgPool;
+use sqlx::postgres::PgPoolOptions;
+
+static QUERIES_SEPARATOR: &str = "\n\n\n";
+
+pub async fn connect_to_db (database_url: &str) -> PgPool {
+ PgPoolOptions::new()
+ .max_connections(3)
+ .connect(database_url)
+ .await
+ .expect("Can't connect to database.")
+}
+
+pub async fn is_table_exists (pool: &PgPool, table: &str) -> bool {
+ let query = r#"
+ SELECT EXISTS (
+ SELECT FROM
+ pg_tables
+ WHERE
+ schemaname = 'public' AND
+ tablename = $1
+ );
+ "#;
+
+ let result: (bool,) = sqlx::query_as(query)
+ .bind(table)
+ .fetch_one(pool)
+ .await
+ .expect("Can't check if table exists.");
+
+ result.0
+}
+
+pub async fn is_table_empty (pool: &PgPool, table: &str) -> bool {
+ let query = r#"
+ SELECT EXISTS (
+ SELECT 1 FROM %%table%%
+ );
+ "#.replace("%%table%%", table);
+
+ let result: (bool,) = sqlx::query_as(&query)
+ .fetch_one(pool)
+ .await
+ .expect("Can't check if table is empty.");
+
+ !result.0
+}
+
+pub async fn truncate_table (pool: &PgPool, table: &str) {
+ let query = format!("TRUNCATE TABLE {} RESTART IDENTITY;", table);
+
+ sqlx::query(&query)
+ .bind(table)
+ .execute(pool)
+ .await
+ .expect("Can't truncate table.");
+}
+
+pub async fn run_multiple_queries(pool: &PgPool, queries: &str) {
+ for query in queries.split(QUERIES_SEPARATOR) {
+ sqlx::query(&query)
+ .execute(pool)
+ .await
+ .expect("Can't run SQL query.");
+ }
+}
diff --git a/src/fantoir.rs b/src/fantoir.rs
new file mode 100644
--- /dev/null
+++ b/src/fantoir.rs
@@ -0,0 +1,192 @@
+//! # Helper methods for FANTOIR database.
+//!
+//! This module offers a structure for a FANTOIR record, methods to parse the file and export it.
+//! Database functions expect to work with an executor from sqlx crate.
+
+use sqlx::PgPool;
+use sqlx::types::chrono::NaiveDate;
+
+/// A voie in the FANTOIR database
+#[derive(Debug)]
+pub struct FantoirEntry {
+ /* Identifiers */
+ code_fantoir: String,
+
+ /* Part 1 - commune */
+ departement: String, // Generally an integer, but INSEE uses 2A and 2B for Corse
+ code_commune: i32,
+ code_insee: String, // Afa in Corse has 2A001
+ type_commune: Option<String>,
+ is_pseudo_recensee: bool,
+
+ /* Part 2 - voie */
+ identifiant_communal_voie: String,
+ cle_rivoli: String,
+ code_nature_voie: Option<String>,
+ libelle_voie: String,
+ type_voie: i32, // 1: voie, 2: ens. immo, 3: lieu-dit, 4: pseudo-voie, 5: provisoire
+ is_public: bool,
+
+ /* Part 3 - population */
+ is_large: bool,
+ population_a_part: i32,
+ population_fictive: i32,
+
+ /* Part 4 - metadata */
+ is_cancelled: bool,
+ cancel_date: Option<NaiveDate>,
+ creation_date: Option<NaiveDate>,
+ code_majic: i32,
+ last_alpha_word: String,
+}
+
+impl FantoirEntry {
+ pub fn parse_line(line: &str) -> Self {
+ let departement = match &line[0..2] {
+ "97" => String::from(&line[0..3]), // include for DOM/TOM the next digit
+ department => String::from(department),
+ };
+ let len = line.len();
+
+ Self {
+ /* Identifier */
+ code_fantoir: String::from(&line[0..11]),
+
+ /* Part 1 - commune */
+ departement,
+ code_commune: line[3..6].parse().expect("Can't parse code commune"),
+ code_insee: format!("{:02}{:03}", &line[0..2], &line[3..6]),
+ type_commune: parse_optional_string(&line[43..44]),
+ is_pseudo_recensee: &line[45..46] == "3",
+
+ /* Part 2 - voie */
+ identifiant_communal_voie: String::from(&line[6..10]),
+ cle_rivoli: String::from(&line[10..11]),
+ code_nature_voie: parse_optional_string(&line[11..15]),
+ libelle_voie: String::from(line[15..41].trim()),
+ type_voie: line[108..109].parse().expect("Can't parse type de voie."),
+ is_public: &line[48..49] == "0",
+
+ /* Part 3 - population */
+ is_large: &line[49..50] == "*",
+ population_a_part: line[59..66].parse().expect("Can't parse population à part"),
+ population_fictive: line[66..73].parse().expect("Can't parse population fictive"),
+
+ /* Part 4 - metadata */
+ is_cancelled: &line[73..74] != " ",
+ cancel_date: parse_fantoir_date(&line[74..81]),
+ creation_date: parse_fantoir_date(&line[81..88]),
+ code_majic: line[103..108].parse().expect("Can't parse MAJIC"),
+ last_alpha_word: String::from(&line[112..len]),
+ }
+ }
+
+ pub async fn insert_to_db(&self, pool: &PgPool, table: &str) {
+ let mut query = format!("INSERT INTO {}", table);
+ query.push_str(
+ r#"
+ (code_fantoir,
+ departement, code_commune, code_insee, type_commune, is_pseudo_recensee,
+ identifiant_communal_voie, cle_rivoli, code_nature_voie, libelle_voie, type_voie, is_public,
+ is_large, population_a_part, population_fictive,
+ is_cancelled, cancel_date, creation_date, code_majic, last_alpha_word
+ )
+ VALUES
+ ($1,
+ $2, $3, $4, $5, $6,
+ $7, $8, $9, $10, $11, $12,
+ $13, $14, $15,
+ $16, $17, $18, $19, $20
+ )"#
+ );
+
+ sqlx::query(&query)
+ /* Identifiers */
+ .bind(&self.code_fantoir)
+
+ /* Part 1 - commune */
+ .bind(&self.departement)
+ .bind(&self.code_commune)
+ .bind(&self.code_insee)
+ .bind(&self.type_commune)
+ .bind(&self.is_pseudo_recensee)
+
+ /* Part 2 - Voie */
+ .bind(&self.identifiant_communal_voie)
+ .bind(&self.cle_rivoli)
+ .bind(&self.code_nature_voie)
+ .bind(&self.libelle_voie)
+ .bind(&self.type_voie)
+ .bind(&self.is_public)
+
+ /* Part 3 - Population */
+ .bind(&self.is_large)
+ .bind(&self.population_a_part)
+ .bind(&self.population_fictive)
+
+ /* Part 4 - Metadata */
+ .bind(&self.is_cancelled)
+ .bind(&self.cancel_date)
+ .bind(&self.creation_date)
+ .bind(&self.code_majic)
+ .bind(&self.last_alpha_word)
+
+ .execute(pool)
+ .await
+ .expect("Can't insert entry to database");
+ }
+}
+
+pub fn parse_fantoir_date (date: &str) -> Option<NaiveDate> {
+ if date == "0000000" {
+ return None;
+ }
+
+ let year = date[0..4].parse().expect("Can't parse date: year part");
+ let ord = date[4..7].parse().expect("Can't parse date: ordinal part");
+
+ NaiveDate::from_yo_opt(year, ord)
+}
+
+fn parse_optional_string (expression: &str) -> Option<String> {
+ let expression = expression.trim();
+
+ if expression.len() > 0 {
+ Some(String::from(expression))
+ } else {
+ None
+ }
+}
+
+#[cfg(test)]
+mod tests {
+ // Note this useful idiom: importing names from outer (for mod tests) scope.
+ use super::*;
+
+ #[test]
+ fn test_parse_fantoir_date() {
+ let expected = NaiveDate::from_ymd_opt(1987, 1, 1).unwrap();
+ let actual = parse_fantoir_date("1987001");
+ assert_eq!(expected, actual);
+ }
+
+ #[test]
+ fn test_parse_optional_string() {
+ assert_eq!(Some(String::from("quux")), parse_optional_string("quux"));
+ }
+
+ #[test]
+ fn test_parse_optional_string_with_trailing_spaces() {
+ assert_eq!(Some(String::from("quux")), parse_optional_string("quux "));
+ }
+
+ #[test]
+ fn test_parse_optional_string_when_empty() {
+ assert_eq!(true, parse_optional_string("").is_none());
+ }
+
+ #[test]
+ fn test_parse_optional_string_when_only_spaces() {
+ assert_eq!(true, parse_optional_string(" ").is_none());
+ }
+}
diff --git a/src/main.rs b/src/main.rs
new file mode 100644
--- /dev/null
+++ b/src/main.rs
@@ -0,0 +1,50 @@
+use std::env;
+
+use clap::{Args, Parser};
+
+use crate::commands::import::import;
+
+mod commands;
+mod db;
+mod fantoir;
+
+#[derive(Debug, Parser)]
+#[command(name = "fantoir2db")]
+#[clap(author="Nasqueron project", version, about="Import FANTOIR database into PostgreSQL", long_about=None)]
+enum FantoirCommand {
+ /// Import from FANTOIR file generated by the DGFIP
+ #[command(arg_required_else_help = true)]
+ Import(ImportArgs),
+}
+
+#[derive(Debug, Args)]
+pub struct ImportArgs {
+ /// Create table if it doesn't exist
+ #[arg(short = 'c')]
+ create_table: bool,
+
+ /// Truncate table if it already exists, allowing the overwrite mode.
+ /// If not specified, the script will fail if table exists.
+ #[arg(short = 't')]
+ overwrite_table: bool,
+
+ /// The FANTOIR file to import
+ fantoir_file: String,
+
+ /// The name of the table to populate
+ fantoir_table: String,
+}
+
+#[tokio::main]
+async fn main() {
+ let command = FantoirCommand::parse(); // Will exit if argument is missing or --help/--version provided.
+
+ let database_url = env::var("DATABASE_URL")
+ .expect("The environment variable DATABASE_URL need to be set to your PostgreSQL database.");
+
+ match command {
+ FantoirCommand::Import(args) => {
+ import(&args, &database_url).await;
+ },
+ };
+}
diff --git a/src/schema/fantoir.sql b/src/schema/fantoir.sql
new file mode 100644
--- /dev/null
+++ b/src/schema/fantoir.sql
@@ -0,0 +1,54 @@
+-- The fantoir table uses French columns to easily identify them
+-- from the file description (specification), not translated to English.
+-- This table is only to use for France ways, not for other countries,
+-- as it's specifically tied up to the FANTOIR database.
+--
+-- If you provide several instructions, separate those with TWO blank lines.
+-- Indexes have to match every WHERE clause used against the database.
+--
+-- This schema is compiled as part of the program, as such you need to rebuild
+-- (`cargo build`) the project after any schema modification.
+
+CREATE TABLE IF NOT EXISTS /*table*/fantoir (
+ -- identifiers
+ id bigserial
+ constraint /*index*/index_fantoir_pk
+ primary key,
+ code_fantoir char(11) NOT NULL,
+
+ -- Part 1 - commune
+ departement varchar(3) NOT NULL,
+ code_commune integer NOT NULL,
+ code_insee char(5) NOT NULL,
+ type_commune varchar(1),
+ is_pseudo_recensee bool NOT NULL,
+
+ -- Part 2 - voie
+ identifiant_communal_voie varchar(4) NOT NULL,
+ cle_rivoli char(1) NOT NULL,
+ code_nature_voie varchar(4),
+ libelle_voie varchar(26) NOT NULL,
+ type_voie smallint NOT NULL,
+ is_public bool NOT NULL,
+
+ -- Part 3 - population
+ is_large bool NOT NULL,
+ population_a_part integer NOT NULL,
+ population_fictive integer NOT NULL,
+
+ -- Part 4 - metadata
+ is_cancelled bool NOT NULL,
+ cancel_date DATE,
+ creation_date DATE,
+ code_majic integer NOT NULL,
+ last_alpha_word varchar(8) NOT NULL,
+
+ -- Constraints
+ UNIQUE (code_fantoir),
+ UNIQUE (code_insee, identifiant_communal_voie)
+);
+
+
+CREATE INDEX CONCURRENTLY /*index*/index_fantoir_voie_trigram
+ ON /*table*/fantoir
+ USING gin (libelle_voie gin_trgm_ops);

File Metadata

Mime Type
text/plain
Expires
Thu, Dec 19, 05:58 (4 h, 9 m)
Storage Engine
blob
Storage Format
Raw Data
Storage Handle
2306141
Default Alt Text
D2729.id6918.diff (16 KB)

Event Timeline