Page MenuHomeDevCentral

No OneTemporary

diff --git a/fantoir-datasource/Cargo.toml b/fantoir-datasource/Cargo.toml
index 64da1e4..c8b9d2d 100644
--- a/fantoir-datasource/Cargo.toml
+++ b/fantoir-datasource/Cargo.toml
@@ -1,36 +1,37 @@
[package]
name = "fantoir-datasource"
version = "0.1.0"
edition = "2021"
description = "Generates a Postgres table from FANTOIR raw file"
authors = [
"Sébastien Santoro <dereckson@espace-win.org>"
]
license = "BSD-2-Clause"
[dependencies]
chrono = "~0.4.23"
lazy_static = "~1.4.0"
opendatasoft-explore-api = { version = "0.1.0", path = "../opendatasoft-explore-api" }
oxrdf = "~0.1.1"
+regex = "~1.7.1"
sparesults = "~0.1.3"
[dependencies.async-scoped]
version = "~0.7.1"
features = ["use-tokio"]
[dependencies.clap]
version = "~4.0.32"
features = ["derive"]
[dependencies.reqwest]
version = "~0.11.13"
features = ["gzip", "deflate"]
[dependencies.sqlx]
version = "~0.6.2"
features = ["runtime-tokio-native-tls", "postgres", "chrono"]
[dependencies.tokio]
version = "~1.23.0"
features = ["full"]
diff --git a/fantoir-datasource/src/commands/query.rs b/fantoir-datasource/src/commands/query.rs
index 05e141e..2f47b7d 100644
--- a/fantoir-datasource/src/commands/query.rs
+++ b/fantoir-datasource/src/commands/query.rs
@@ -1,76 +1,90 @@
use std::process::exit;
use sqlx::PgPool;
use crate::db::connect_to_db;
+use crate::fantoir::looks_like_canonical_fantoir_code;
use crate::QueryArgs;
use crate::services::query::*;
static EXIT_CODE_NO_RESULT_FOUND: i32 = 4;
pub async fn search(args: QueryArgs, database_url: &str) {
let pool = connect_to_db(database_url).await;
if args.code_insee.is_some() && args.code_voie.is_some() {
let code_fantoir = search_fantoir_code(
&pool,
&args.code_insee.unwrap(),
&args.code_voie.unwrap(),
).await;
if let Some(code) = code_fantoir {
search_one_row(&pool, &code).await;
return;
}
exit(EXIT_CODE_NO_RESULT_FOUND);
}
- if args.libelle.len() > 0 {
+ if args.expression.len() > 0 {
+ if let Some(code) = pick_fantoir_code_from_args(&args.expression) {
+ search_one_row(&pool, &code).await;
+ return;
+ }
+
search_libelle(&pool, args).await;
return;
}
unimplemented!()
}
async fn search_one_row(pool: &PgPool, code_fantoir: &str) {
match query_fantoir_code(pool, code_fantoir).await {
None => {
exit(EXIT_CODE_NO_RESULT_FOUND);
}
Some(result) => {
println!("{}", result);
}
}
}
async fn search_libelle(pool: &PgPool, args: QueryArgs) {
- let expression = args.libelle.join(" ");
+ let expression = args.expression.join(" ");
let mut found = false;
query_libelle(pool, &expression)
.await
.iter()
.filter(|&entry| entry_matches_conditions(entry, &args))
.for_each(|entry| {
found = true;
println!("{}", entry);
});
if !found {
exit(EXIT_CODE_NO_RESULT_FOUND);
}
}
fn entry_matches_conditions(entry: &FantoirVoieResult, conditions: &QueryArgs) -> bool {
if let Some(code_insee) = &conditions.code_insee {
if &entry.code_insee != code_insee {
return false;
}
}
return true;
}
+
+fn pick_fantoir_code_from_args (expressions: &Vec<String>) -> Option<String> {
+ if expressions.len() == 1 && looks_like_canonical_fantoir_code(&expressions[0]) {
+ Some(expressions[0].clone())
+ } else {
+ None
+ }
+}
diff --git a/fantoir-datasource/src/fantoir.rs b/fantoir-datasource/src/fantoir.rs
index daed379..224a0e3 100644
--- a/fantoir-datasource/src/fantoir.rs
+++ b/fantoir-datasource/src/fantoir.rs
@@ -1,332 +1,358 @@
//! # Helper methods for FANTOIR database.
//!
//! This module offers a structure for a FANTOIR record, methods to parse the file and export it.
//! Database functions expect to work with an executor from sqlx crate.
use chrono::NaiveDate;
use lazy_static::lazy_static;
+use regex::Regex;
use sqlx::PgPool;
lazy_static! {
static ref DEPARTMENTS_WITH_CODE_DIRECTION: Vec<&'static str> = vec!["13", "59", "75", "92", "97"];
/// The alphabet without I O and Q.
static ref RIVOLI_STRING: Vec<char> = vec![
'A', 'B', 'C', 'D', 'E', 'F', 'G', 'H', 'J', 'K', 'L', 'M',
'N', 'P', 'R', 'S', 'T', 'U', 'V', 'W', 'X', 'Y', 'Z'
];
+
+ static ref RE_FANTOIR: Regex = Regex::new(
+ "^[0-9][0-9A-Z][0-9][0-9][0-9][0-9][0-9A-Z][0-9][0-9][0-9][A-Z]$"
+ ).unwrap();
}
/// A voie in the FANTOIR database
#[derive(Debug)]
pub struct FantoirEntry {
/* Identifiers */
code_fantoir: String,
/* Part 1 - commune */
departement: String, // Generally an integer, but INSEE uses 2A and 2B for Corse
code_commune: i32,
code_insee: String, // Afa in Corse has 2A001
type_commune: Option<String>,
is_pseudo_recensee: bool,
/* Part 2 - voie */
identifiant_communal_voie: String,
cle_rivoli: String,
code_nature_voie: Option<String>,
libelle_voie: String,
type_voie: i32, // 1: voie, 2: ens. immo, 3: lieu-dit, 4: pseudo-voie, 5: provisoire
is_public: bool,
/* Part 3 - population */
is_large: bool,
population_a_part: i32,
population_fictive: i32,
/* Part 4 - metadata */
is_cancelled: bool,
cancel_date: Option<NaiveDate>,
creation_date: Option<NaiveDate>,
code_majic: i32,
last_alpha_word: String,
}
impl FantoirEntry {
pub fn parse_line(line: &str) -> Self {
let departement = match &line[0..2] {
"97" => String::from(&line[0..3]), // include for DOM/TOM the next digit
department => String::from(department),
};
let len = line.len();
Self {
/* Identifier */
code_fantoir: String::from(&line[0..11]),
/* Part 1 - commune */
departement,
code_commune: line[3..6].parse().expect("Can't parse code commune"),
code_insee: format!("{:02}{:03}", &line[0..2], &line[3..6]),
type_commune: parse_optional_string(&line[43..44]),
is_pseudo_recensee: &line[45..46] == "3",
/* Part 2 - voie */
identifiant_communal_voie: String::from(&line[6..10]),
cle_rivoli: String::from(&line[10..11]),
code_nature_voie: parse_optional_string(&line[11..15]),
libelle_voie: String::from(line[15..41].trim()),
type_voie: line[108..109].parse().expect("Can't parse type de voie."),
is_public: &line[48..49] == "0",
/* Part 3 - population */
is_large: &line[49..50] == "*",
population_a_part: line[59..66].parse().expect("Can't parse population à part"),
population_fictive: line[66..73].parse().expect("Can't parse population fictive"),
/* Part 4 - metadata */
is_cancelled: &line[73..74] != " ",
cancel_date: parse_fantoir_date(&line[74..81]),
creation_date: parse_fantoir_date(&line[81..88]),
code_majic: line[103..108].parse().expect("Can't parse MAJIC"),
last_alpha_word: String::from(&line[112..len]),
}
}
pub async fn insert_to_db(&self, pool: &PgPool, table: &str) {
let mut query = format!("INSERT INTO {}", table);
query.push_str(
r#"
(code_fantoir,
departement, code_commune, code_insee, type_commune, is_pseudo_recensee,
identifiant_communal_voie, cle_rivoli, code_nature_voie, libelle_voie, type_voie, is_public,
is_large, population_a_part, population_fictive,
is_cancelled, cancel_date, creation_date, code_majic, last_alpha_word
)
VALUES
($1,
$2, $3, $4, $5, $6,
$7, $8, $9, $10, $11, $12,
$13, $14, $15,
$16, $17, $18, $19, $20
)"#
);
sqlx::query(&query)
/* Identifiers */
.bind(&self.code_fantoir)
/* Part 1 - commune */
.bind(&self.departement)
.bind(&self.code_commune)
.bind(&self.code_insee)
.bind(&self.type_commune)
.bind(&self.is_pseudo_recensee)
/* Part 2 - Voie */
.bind(&self.identifiant_communal_voie)
.bind(&self.cle_rivoli)
.bind(&self.code_nature_voie)
.bind(&self.libelle_voie)
.bind(&self.type_voie)
.bind(&self.is_public)
/* Part 3 - Population */
.bind(&self.is_large)
.bind(&self.population_a_part)
.bind(&self.population_fictive)
/* Part 4 - Metadata */
.bind(&self.is_cancelled)
.bind(&self.cancel_date)
.bind(&self.creation_date)
.bind(&self.code_majic)
.bind(&self.last_alpha_word)
.execute(pool)
.await
.expect("Can't insert entry to database");
}
}
pub fn parse_fantoir_date (date: &str) -> Option<NaiveDate> {
if date == "0000000" {
return None;
}
let year = date[0..4].parse().expect("Can't parse date: year part");
let ord = date[4..7].parse().expect("Can't parse date: ordinal part");
NaiveDate::from_yo_opt(year, ord)
}
fn parse_optional_string (expression: &str) -> Option<String> {
let expression = expression.trim();
if expression.len() > 0 {
Some(String::from(expression))
} else {
None
}
}
/// A fixed FANTOIR code result
#[derive(Debug, Eq, PartialEq)]
pub enum FixedFantoirCode {
/// The code has been fully computed
Computed(String),
/// Information needed to query the code has been extracted, but code direction is unknown
/// Such result can be queried through search_code_fantoir()
ToSearch { code_insee: String, identifiant_communal_voie: String },
}
/// Transforms FANTOIR code from BAN into regular FANTOIR codes.
/// BAN sometimes uses <insee code>_<identifiant voie commune> without Rivoli key.
pub fn fix_fantoir_code(code: &str) -> FixedFantoirCode {
let mut code = code.to_string();
if code.contains("_") {
// 97231_B026 -> 972231B026
code = if code.starts_with("97") {
// Code direction = department last digit
format!("{}{}{}", &code[0..=2], &code[2..5], &code[6..])
} else if uses_specific_code_direction(&code) {
// We can't fix it by computation, we need to search it in the database
return FixedFantoirCode::ToSearch {
code_insee: code[0..5].to_string(),
identifiant_communal_voie: code[6..10].to_string(),
}
} else {
// Code direction = 0
format!("{}0{}{}", &code[0..=2], &code[3..5], &code[6..])
};
}
if code.len() == 10 {
let last_char = code.chars().last().unwrap();
match last_char {
'0'..='9' => {
code.push(compute_rivoli_key(&code));
}
'A'..='Z' => {
// 441090516U -> 4401090516U
code = if uses_specific_code_direction(&code) {
// We can't fix it by computation, we need to search it in the database
// 920514135A -> 92051 4135
return FixedFantoirCode::ToSearch {
code_insee: code[0..5].to_string(),
identifiant_communal_voie: code[5..9].to_string(),
}
} else {
format!("{}0{}", &code[0..2], &code[2..])
};
}
_ => unreachable!(),
}
}
FixedFantoirCode::Computed(code)
}
pub fn uses_specific_code_direction (code: &str) -> bool {
DEPARTMENTS_WITH_CODE_DIRECTION
.iter()
.any(|&dpt| code.starts_with(dpt))
}
pub fn compute_rivoli_key (code: &str) -> char {
// See https://georezo.net/forum/viewtopic.php?id=102292
if code.starts_with("2A") || code.starts_with("2B") {
// 2A would be 2 10 and 2B would be 2 11, but how to build a number to multiply by 19?
unimplemented!()
}
let part_commune: i32 = code[0..6].parse().unwrap();
let type_voie = code.chars().nth(6).unwrap();
let type_voie = if type_voie.is_alphabetic() {
type_voie as u32 - 55
} else {
type_voie.to_digit(10).unwrap()
};
let numero_identifiant_communal_voie: i32 = code[7..].parse().unwrap();
let index = (part_commune * 19 + type_voie as i32 * 11 + numero_identifiant_communal_voie) % 23;
return RIVOLI_STRING[index as usize];
}
+/// Determines if the specified expression looks like a FANTOIR code,
+/// as used by DGFiP official FANTOIR file.
+///
+/// The IGN or OpenStreetMap format variants will return false.
+///
+/// This method does NOT check the RIVOLI key, only the format.
+pub fn looks_like_canonical_fantoir_code (expression: &str) -> bool {
+ RE_FANTOIR.is_match(expression)
+}
+
#[cfg(test)]
mod tests {
// Note this useful idiom: importing names from outer (for mod tests) scope.
use super::*;
#[test]
fn test_parse_fantoir_date() {
let expected = NaiveDate::from_ymd_opt(1987, 1, 1).unwrap();
let actual = parse_fantoir_date("1987001").unwrap();
assert_eq!(expected, actual);
}
#[test]
fn test_parse_optional_string() {
assert_eq!(Some(String::from("quux")), parse_optional_string("quux"));
}
#[test]
fn test_parse_optional_string_with_trailing_spaces() {
assert_eq!(Some(String::from("quux")), parse_optional_string("quux "));
}
#[test]
fn test_parse_optional_string_when_empty() {
assert_eq!(true, parse_optional_string("").is_none());
}
#[test]
fn test_parse_optional_string_when_only_spaces() {
assert_eq!(true, parse_optional_string(" ").is_none());
}
#[test]
pub fn test_fix_fantoir_code () {
assert_fixed_fantoir_code("755112P144L", fix_fantoir_code("755112P144L"));
assert_fixed_fantoir_code("972231B026U", fix_fantoir_code("97231_B026"));
assert_fixed_fantoir_code("4401090516U", fix_fantoir_code("441090516U"));
assert_fixed_fantoir_code("972222B305L", fix_fantoir_code("972222B305"));
}
fn assert_fixed_fantoir_code (expected: &str, actual: FixedFantoirCode) {
match actual {
FixedFantoirCode::Computed(code) => {
assert_eq!(expected, &code);
},
_ => assert!(false, "Expected a computed FANTOIR code")
}
}
#[test]
pub fn test_fix_fantoir_code_when_it_cannot_be_computed () {
let expected = FixedFantoirCode::ToSearch {
code_insee: "92002".to_string(),
identifiant_communal_voie: "5130".to_string()
};
assert_eq!(expected, fix_fantoir_code("920025130X"), "As code direction can't be computed, this code should be to search");
assert_eq!(expected, fix_fantoir_code("92002_5130"), "As code direction can't be computed, this code should be to search");
}
-
#[test]
pub fn test_compute_rivoli_key() {
assert_eq!('W', compute_rivoli_key("380003B001"));
assert_eq!('U', compute_rivoli_key("972231B026"));
}
#[test]
pub fn test_compute_rivoli_key_with_type_voie_zero() {
assert_eq!('C', compute_rivoli_key("9722230261"));
}
+
+ #[test]
+ pub fn test_looks_like_canonical_fantoir_code () {
+ assert!(looks_like_canonical_fantoir_code("770246B015C"));
+ }
+
+ #[test]
+ pub fn test_looks_like_canonical_fantoir_code_for_variants () {
+ assert!(!looks_like_canonical_fantoir_code("770246B015"));
+ assert!(!looks_like_canonical_fantoir_code("77246_B015"));
+ assert!(!looks_like_canonical_fantoir_code("77246B015C"));
+ }
}
diff --git a/fantoir-datasource/src/main.rs b/fantoir-datasource/src/main.rs
index 364c97e..1f1cdcd 100644
--- a/fantoir-datasource/src/main.rs
+++ b/fantoir-datasource/src/main.rs
@@ -1,120 +1,120 @@
use std::env;
use clap::{Args, Parser};
use crate::commands::promote::promote;
mod commands;
mod db;
mod fantoir;
mod services;
#[derive(Debug, Parser)]
#[command(name = "fantoir-datasource")]
#[clap(author="Nasqueron project", version, about="Import FANTOIR database into PostgreSQL", long_about=None)]
enum FantoirCommand {
/// Fetch the last version of the FANTOIR file
Fetch(FetchArgs),
/// Import from FANTOIR file generated by the DGFIP
#[command(arg_required_else_help = true)]
Import(ImportArgs),
/// Promote an imported FANTOIR table as the current FANTOIR table to use
#[command(arg_required_else_help = true)]
Promote(PromoteArgs),
/// Query Wikidata SPARQL end-point to enrich FANTOIR information
Wikidata(WikidataArgs),
/// Query the imported FANTOIR table
Query(QueryArgs)
}
#[derive(Debug, Args)]
pub struct FetchArgs {
/// Overwrite file if already existing
#[arg(long)]
overwrite: bool,
}
#[derive(Debug, Args)]
pub struct ImportArgs {
/// Create table if it doesn't exist
#[arg(short = 'c')]
create_table: bool,
/// Truncate table if it already exists, allowing the overwrite mode.
/// If not specified, the script will fail if table exists.
#[arg(short = 't')]
overwrite_table: bool,
/// The FANTOIR file to import
fantoir_file: String,
/// The name of the table to populate
fantoir_table: String,
}
#[derive(Debug, Args)]
pub struct PromoteArgs {
/// The name of the table to promote
fantoir_table: String,
}
#[derive(Debug, Args)]
pub struct WikidataArgs {
/// Create table if it doesn't exist
#[arg(short = 'c')]
create_table: bool,
/// Truncate table if it already exists, allowing the overwrite mode.
/// If not specified, the script will fail if table exists.
#[arg(short = 't')]
overwrite_table: bool,
/// Generate a Wikidata maintenance report instead to print errors to stderr
#[arg(long)]
maintenance_report: bool,
}
#[derive(Debug, Args)]
#[clap(trailing_var_arg=true)]
pub struct QueryArgs {
/// INSEE code to identify a commune
#[arg(long)]
code_insee: Option<String>,
/// Identifier of the voie by the commune
#[arg(long)]
code_voie: Option<String>,
- /// Expression to search
- libelle: Vec<String>,
+ /// Expression to search, FANTOIR code or label of the voie
+ expression: Vec<String>,
}
#[tokio::main]
async fn main() {
let command = FantoirCommand::parse(); // Will exit if argument is missing or --help/--version provided.
let database_url = env::var("DATABASE_URL")
.expect("The environment variable DATABASE_URL need to be set to your PostgreSQL database.");
match command {
FantoirCommand::Fetch(args) => {
commands::fetch::fetch(args.overwrite).await;
},
FantoirCommand::Import(args) => {
commands::import::import(&args, &database_url).await;
},
FantoirCommand::Promote(args) => {
promote(&args.fantoir_table, &database_url).await;
},
FantoirCommand::Wikidata(args) => {
commands::wikidata::import(&args, &database_url).await
},
FantoirCommand::Query(args) => {
commands::query::search(args, &database_url).await
},
};
}

File Metadata

Mime Type
text/x-diff
Expires
Sun, Nov 24, 19:19 (5 h, 22 m)
Storage Engine
blob
Storage Format
Raw Data
Storage Handle
2256214
Default Alt Text
(19 KB)

Event Timeline