Page Menu
Home
DevCentral
Search
Configure Global Search
Log In
Files
F3769126
D2729.id6924.diff
No One
Temporary
Actions
View File
Edit File
Delete File
View Transforms
Subscribe
Mute Notifications
Award Token
Flag For Later
Size
17 KB
Referenced Files
None
Subscribers
None
D2729.id6924.diff
View Options
Index: .arcconfig
===================================================================
--- /dev/null
+++ .arcconfig
@@ -0,0 +1,4 @@
+{
+ "phabricator.uri": "https://devcentral.nasqueron.org",
+ "repository.callsign": "FANTDB"
+}
Index: .gitignore
===================================================================
--- /dev/null
+++ .gitignore
@@ -0,0 +1,3 @@
+/target
+/FANTOIR*
+Cargo.lock
Index: Cargo.toml
===================================================================
--- /dev/null
+++ Cargo.toml
@@ -0,0 +1,23 @@
+[package]
+name = "fantoir-datasource"
+version = "0.1.0"
+edition = "2021"
+description = "Generates a Postgres table from FANTOIR raw file"
+authors = [
+ "Sébastien Santoro <dereckson@espace-win.org>"
+]
+license = "BSD-2-Clause"
+
+[dependencies]
+
+[dependencies.clap]
+version = "~4.0.32"
+features = ["derive"]
+
+[dependencies.sqlx]
+version = "~0.6.2"
+features = ["runtime-tokio-native-tls", "postgres", "chrono"]
+
+[dependencies.tokio]
+version = "~1.23.0"
+features = ["full"]
Index: README.md
===================================================================
--- /dev/null
+++ README.md
@@ -0,0 +1,51 @@
+## How to use?
+
+Define your PostgreSQL connection URL in environment:
+
+```
+export DATABASE_URL="postgres://fantoir:fantoir@localhost/fantoir"
+```
+
+## Development
+
+### Build instructions
+
+The PostgreSQL library is required to link against it.
+If not found, you can add the path to the LIB environment variable.
+
+### Prepare a test database
+
+Execute the following queries as postgres user:
+
+```
+CREATE ROLE fantoir WITH PASSWORD 'fantoir' LOGIN;
+CREATE DATABASE fantoir OWNER fantoir;
+```
+
+Connected as your database role, enable the pg_trgm extension
+to be able to generate the index for full-text search with trigrams:
+
+```
+CREATE EXTENSION pg_trgm;
+```
+
+If the extension doesn't exist, it can be included in a package
+named for example `postgresql-contrib`.
+
+You can then use the code with the default DATABASE_URL documented above.
+
+### Database pitfalls
+
+The FANTOIR database uses INSEE department code, they can contain a letter,
+currently only for Corse (2A and 2B).
+That also applies when building the INSEE commune code.
+
+If a record is canceled, the cancel date can be omitted.
+The creation date can be omitted too.
+
+The last line of the FANTOIR database must be ignored.
+
+Wikidata uses the "code FANTOIR", matching the "code RIVOLI"
+documented in FANTOIR file description. This code matches the
+11 first characters of a record.
+See also https://www.wikidata.org/wiki/Property:P3182.
Index: src/commands/import.rs
===================================================================
--- /dev/null
+++ src/commands/import.rs
@@ -0,0 +1,84 @@
+//! Import command for the fantoir-datasource tool.
+//!
+//! Import from FANTOIR file generated by the DGFIP
+
+use std::process::exit;
+
+use sqlx::PgPool;
+use tokio::fs::File;
+use tokio::io::{AsyncBufReadExt, BufReader};
+
+use crate::ImportArgs;
+use crate::db::*;
+use crate::fantoir::FantoirEntry;
+
+pub async fn import(args: &ImportArgs, database_url: &str) {
+ let fd = File::open(&args.fantoir_file).await.expect("Can't open file.");
+ let pool = connect_to_db(database_url).await;
+
+ // Create/truncate table as needed and as allowed by options
+ if let Err(error) = initialize_table(args, &pool).await {
+ eprintln!("{}", &error);
+ exit(1);
+ }
+
+ // Currently, async closures are unstable, see https://github.com/rust-lang/rust/issues/62290
+ // They are also largely unimplemented. As such, this code doesn't follow HOF pattern.
+ let mut buffer = BufReader::new(fd).lines();
+ while let Ok(line) = buffer.next_line().await {
+ if line.is_none() {
+ break;
+ }
+ let line = line.unwrap();
+
+ if line.len() < 90 {
+ // This record is the header or describes a department or a commune
+ continue;
+ }
+
+ if line.starts_with("9999999999") {
+ // This record is the last of the database
+ break;
+ }
+
+ FantoirEntry::parse_line(&line)
+ .insert_to_db(&pool, &args.fantoir_table)
+ .await
+ }
+}
+
+async fn initialize_table(args: &ImportArgs, pool: &PgPool) -> Result<(), String> {
+ if is_table_exists(pool, &args.fantoir_table).await {
+ if is_table_empty(&pool, &args.fantoir_table).await {
+ return Ok(());
+ }
+
+ if args.overwrite_table {
+ truncate_table(&pool, &args.fantoir_table).await;
+ return Ok(());
+ }
+
+ return Err(format!(
+ "Table {} already exists and contains rows. To overwrite it, run the import tool with -t option.",
+ &args.fantoir_table
+ ));
+ }
+
+ if args.create_table {
+ create_table(&pool, &args.fantoir_table).await;
+ return Ok(());
+ }
+
+ Err(format!(
+ "Table {} doesn't exist. To create it, run the import tool with -c option.",
+ &args.fantoir_table
+ ))
+}
+
+async fn create_table(pool: &PgPool, table: &str) {
+ let queries = include_str!("../schema/fantoir.sql")
+ .replace("/*table*/fantoir", table)
+ .replace("/*index*/index_fantoir_", format!("index_{}_", table).as_ref());
+
+ run_multiple_queries(pool, &queries).await;
+}
Index: src/commands/mod.rs
===================================================================
--- /dev/null
+++ src/commands/mod.rs
@@ -0,0 +1,3 @@
+//! Commands for the fantoir-datasource tool.
+
+pub(crate) mod import;
Index: src/db.rs
===================================================================
--- /dev/null
+++ src/db.rs
@@ -0,0 +1,71 @@
+//! # Utilities for database.
+//!
+//! This module provides helpers to interact with a PostgreSQL database.
+//! Functions expect to work with an executor from sqlx crate.
+
+use sqlx::PgPool;
+use sqlx::postgres::PgPoolOptions;
+
+static QUERIES_SEPARATOR: &str = "\n\n\n";
+
+pub async fn connect_to_db (database_url: &str) -> PgPool {
+ PgPoolOptions::new()
+ .max_connections(3)
+ .connect(database_url)
+ .await
+ .expect("Can't connect to database.")
+}
+
+pub async fn is_table_exists (pool: &PgPool, table: &str) -> bool {
+ let query = r#"
+ SELECT EXISTS (
+ SELECT FROM
+ pg_tables
+ WHERE
+ schemaname = 'public' AND
+ tablename = $1
+ );
+ "#;
+
+ let result: (bool,) = sqlx::query_as(query)
+ .bind(table)
+ .fetch_one(pool)
+ .await
+ .expect("Can't check if table exists.");
+
+ result.0
+}
+
+pub async fn is_table_empty (pool: &PgPool, table: &str) -> bool {
+ let query = r#"
+ SELECT EXISTS (
+ SELECT 1 FROM %%table%%
+ );
+ "#.replace("%%table%%", table);
+
+ let result: (bool,) = sqlx::query_as(&query)
+ .fetch_one(pool)
+ .await
+ .expect("Can't check if table is empty.");
+
+ !result.0
+}
+
+pub async fn truncate_table (pool: &PgPool, table: &str) {
+ let query = format!("TRUNCATE TABLE {} RESTART IDENTITY;", table);
+
+ sqlx::query(&query)
+ .bind(table)
+ .execute(pool)
+ .await
+ .expect("Can't truncate table.");
+}
+
+pub async fn run_multiple_queries(pool: &PgPool, queries: &str) {
+ for query in queries.split(QUERIES_SEPARATOR) {
+ sqlx::query(&query)
+ .execute(pool)
+ .await
+ .expect("Can't run SQL query.");
+ }
+}
Index: src/fantoir.rs
===================================================================
--- /dev/null
+++ src/fantoir.rs
@@ -0,0 +1,192 @@
+//! # Helper methods for FANTOIR database.
+//!
+//! This module offers a structure for a FANTOIR record, methods to parse the file and export it.
+//! Database functions expect to work with an executor from sqlx crate.
+
+use sqlx::PgPool;
+use sqlx::types::chrono::NaiveDate;
+
+/// A voie in the FANTOIR database
+#[derive(Debug)]
+pub struct FantoirEntry {
+ /* Identifiers */
+ code_fantoir: String,
+
+ /* Part 1 - commune */
+ departement: String, // Generally an integer, but INSEE uses 2A and 2B for Corse
+ code_commune: i32,
+ code_insee: String, // Afa in Corse has 2A001
+ type_commune: Option<String>,
+ is_pseudo_recensee: bool,
+
+ /* Part 2 - voie */
+ identifiant_communal_voie: String,
+ cle_rivoli: String,
+ code_nature_voie: Option<String>,
+ libelle_voie: String,
+ type_voie: i32, // 1: voie, 2: ens. immo, 3: lieu-dit, 4: pseudo-voie, 5: provisoire
+ is_public: bool,
+
+ /* Part 3 - population */
+ is_large: bool,
+ population_a_part: i32,
+ population_fictive: i32,
+
+ /* Part 4 - metadata */
+ is_cancelled: bool,
+ cancel_date: Option<NaiveDate>,
+ creation_date: Option<NaiveDate>,
+ code_majic: i32,
+ last_alpha_word: String,
+}
+
+impl FantoirEntry {
+ pub fn parse_line(line: &str) -> Self {
+ let departement = match &line[0..2] {
+ "97" => String::from(&line[0..3]), // include for DOM/TOM the next digit
+ department => String::from(department),
+ };
+ let len = line.len();
+
+ Self {
+ /* Identifier */
+ code_fantoir: String::from(&line[0..11]),
+
+ /* Part 1 - commune */
+ departement,
+ code_commune: line[3..6].parse().expect("Can't parse code commune"),
+ code_insee: format!("{:02}{:03}", &line[0..2], &line[3..6]),
+ type_commune: parse_optional_string(&line[43..44]),
+ is_pseudo_recensee: &line[45..46] == "3",
+
+ /* Part 2 - voie */
+ identifiant_communal_voie: String::from(&line[6..10]),
+ cle_rivoli: String::from(&line[10..11]),
+ code_nature_voie: parse_optional_string(&line[11..15]),
+ libelle_voie: String::from(line[15..41].trim()),
+ type_voie: line[108..109].parse().expect("Can't parse type de voie."),
+ is_public: &line[48..49] == "0",
+
+ /* Part 3 - population */
+ is_large: &line[49..50] == "*",
+ population_a_part: line[59..66].parse().expect("Can't parse population à part"),
+ population_fictive: line[66..73].parse().expect("Can't parse population fictive"),
+
+ /* Part 4 - metadata */
+ is_cancelled: &line[73..74] != " ",
+ cancel_date: parse_fantoir_date(&line[74..81]),
+ creation_date: parse_fantoir_date(&line[81..88]),
+ code_majic: line[103..108].parse().expect("Can't parse MAJIC"),
+ last_alpha_word: String::from(&line[112..len]),
+ }
+ }
+
+ pub async fn insert_to_db(&self, pool: &PgPool, table: &str) {
+ let mut query = format!("INSERT INTO {}", table);
+ query.push_str(
+ r#"
+ (code_fantoir,
+ departement, code_commune, code_insee, type_commune, is_pseudo_recensee,
+ identifiant_communal_voie, cle_rivoli, code_nature_voie, libelle_voie, type_voie, is_public,
+ is_large, population_a_part, population_fictive,
+ is_cancelled, cancel_date, creation_date, code_majic, last_alpha_word
+ )
+ VALUES
+ ($1,
+ $2, $3, $4, $5, $6,
+ $7, $8, $9, $10, $11, $12,
+ $13, $14, $15,
+ $16, $17, $18, $19, $20
+ )"#
+ );
+
+ sqlx::query(&query)
+ /* Identifiers */
+ .bind(&self.code_fantoir)
+
+ /* Part 1 - commune */
+ .bind(&self.departement)
+ .bind(&self.code_commune)
+ .bind(&self.code_insee)
+ .bind(&self.type_commune)
+ .bind(&self.is_pseudo_recensee)
+
+ /* Part 2 - Voie */
+ .bind(&self.identifiant_communal_voie)
+ .bind(&self.cle_rivoli)
+ .bind(&self.code_nature_voie)
+ .bind(&self.libelle_voie)
+ .bind(&self.type_voie)
+ .bind(&self.is_public)
+
+ /* Part 3 - Population */
+ .bind(&self.is_large)
+ .bind(&self.population_a_part)
+ .bind(&self.population_fictive)
+
+ /* Part 4 - Metadata */
+ .bind(&self.is_cancelled)
+ .bind(&self.cancel_date)
+ .bind(&self.creation_date)
+ .bind(&self.code_majic)
+ .bind(&self.last_alpha_word)
+
+ .execute(pool)
+ .await
+ .expect("Can't insert entry to database");
+ }
+}
+
+pub fn parse_fantoir_date (date: &str) -> Option<NaiveDate> {
+ if date == "0000000" {
+ return None;
+ }
+
+ let year = date[0..4].parse().expect("Can't parse date: year part");
+ let ord = date[4..7].parse().expect("Can't parse date: ordinal part");
+
+ NaiveDate::from_yo_opt(year, ord)
+}
+
+fn parse_optional_string (expression: &str) -> Option<String> {
+ let expression = expression.trim();
+
+ if expression.len() > 0 {
+ Some(String::from(expression))
+ } else {
+ None
+ }
+}
+
+#[cfg(test)]
+mod tests {
+ // Note this useful idiom: importing names from outer (for mod tests) scope.
+ use super::*;
+
+ #[test]
+ fn test_parse_fantoir_date() {
+ let expected = NaiveDate::from_ymd_opt(1987, 1, 1).unwrap();
+ let actual = parse_fantoir_date("1987001");
+ assert_eq!(expected, actual);
+ }
+
+ #[test]
+ fn test_parse_optional_string() {
+ assert_eq!(Some(String::from("quux")), parse_optional_string("quux"));
+ }
+
+ #[test]
+ fn test_parse_optional_string_with_trailing_spaces() {
+ assert_eq!(Some(String::from("quux")), parse_optional_string("quux "));
+ }
+
+ #[test]
+ fn test_parse_optional_string_when_empty() {
+ assert_eq!(true, parse_optional_string("").is_none());
+ }
+
+ #[test]
+ fn test_parse_optional_string_when_only_spaces() {
+ assert_eq!(true, parse_optional_string(" ").is_none());
+ }
+}
Index: src/main.rs
===================================================================
--- /dev/null
+++ src/main.rs
@@ -0,0 +1,50 @@
+use std::env;
+
+use clap::{Args, Parser};
+
+use crate::commands::import::import;
+
+mod commands;
+mod db;
+mod fantoir;
+
+#[derive(Debug, Parser)]
+#[command(name = "fantoir-datasource")]
+#[clap(author="Nasqueron project", version, about="Import FANTOIR database into PostgreSQL", long_about=None)]
+enum FantoirCommand {
+ /// Import from FANTOIR file generated by the DGFIP
+ #[command(arg_required_else_help = true)]
+ Import(ImportArgs),
+}
+
+#[derive(Debug, Args)]
+pub struct ImportArgs {
+ /// Create table if it doesn't exist
+ #[arg(short = 'c')]
+ create_table: bool,
+
+ /// Truncate table if it already exists, allowing the overwrite mode.
+ /// If not specified, the script will fail if table exists.
+ #[arg(short = 't')]
+ overwrite_table: bool,
+
+ /// The FANTOIR file to import
+ fantoir_file: String,
+
+ /// The name of the table to populate
+ fantoir_table: String,
+}
+
+#[tokio::main]
+async fn main() {
+ let command = FantoirCommand::parse(); // Will exit if argument is missing or --help/--version provided.
+
+ let database_url = env::var("DATABASE_URL")
+ .expect("The environment variable DATABASE_URL need to be set to your PostgreSQL database.");
+
+ match command {
+ FantoirCommand::Import(args) => {
+ import(&args, &database_url).await;
+ },
+ };
+}
Index: src/schema/fantoir.sql
===================================================================
--- /dev/null
+++ src/schema/fantoir.sql
@@ -0,0 +1,54 @@
+-- The fantoir table uses French columns to easily identify them
+-- from the file description (specification), not translated to English.
+-- This table is only to use for France ways, not for other countries,
+-- as it's specifically tied up to the FANTOIR database.
+--
+-- If you provide several instructions, separate those with TWO blank lines.
+-- Indexes have to match every WHERE clause used against the database.
+--
+-- This schema is compiled as part of the program, as such you need to rebuild
+-- (`cargo build`) the project after any schema modification.
+
+CREATE TABLE IF NOT EXISTS /*table*/fantoir (
+ -- identifiers
+ id bigserial
+ constraint /*index*/index_fantoir_pk
+ primary key,
+ code_fantoir char(11) NOT NULL,
+
+ -- Part 1 - commune
+ departement varchar(3) NOT NULL,
+ code_commune integer NOT NULL,
+ code_insee char(5) NOT NULL,
+ type_commune varchar(1),
+ is_pseudo_recensee bool NOT NULL,
+
+ -- Part 2 - voie
+ identifiant_communal_voie varchar(4) NOT NULL,
+ cle_rivoli char(1) NOT NULL,
+ code_nature_voie varchar(4),
+ libelle_voie varchar(26) NOT NULL,
+ type_voie smallint NOT NULL,
+ is_public bool NOT NULL,
+
+ -- Part 3 - population
+ is_large bool NOT NULL,
+ population_a_part integer NOT NULL,
+ population_fictive integer NOT NULL,
+
+ -- Part 4 - metadata
+ is_cancelled bool NOT NULL,
+ cancel_date DATE,
+ creation_date DATE,
+ code_majic integer NOT NULL,
+ last_alpha_word varchar(8) NOT NULL,
+
+ -- Constraints
+ UNIQUE (code_fantoir),
+ UNIQUE (code_insee, identifiant_communal_voie)
+);
+
+
+CREATE INDEX CONCURRENTLY /*index*/index_fantoir_voie_trigram
+ ON /*table*/fantoir
+ USING gin (libelle_voie gin_trgm_ops);
File Metadata
Details
Attached
Mime Type
text/plain
Expires
Sun, Nov 24, 13:07 (10 h, 2 m)
Storage Engine
blob
Storage Format
Raw Data
Storage Handle
2260211
Default Alt Text
D2729.id6924.diff (17 KB)
Attached To
Mode
D2729: Import FANTOIR file into PostgreSQL db
Attached
Detach File
Event Timeline
Log In to Comment