Page Menu
Home
DevCentral
Search
Configure Global Search
Log In
Files
F3766617
No One
Temporary
Actions
View File
Edit File
Delete File
View Transforms
Subscribe
Mute Notifications
Award Token
Flag For Later
Size
19 KB
Referenced Files
None
Subscribers
None
View Options
diff --git a/fantoir-datasource/Cargo.toml b/fantoir-datasource/Cargo.toml
index 64da1e4..c8b9d2d 100644
--- a/fantoir-datasource/Cargo.toml
+++ b/fantoir-datasource/Cargo.toml
@@ -1,36 +1,37 @@
[package]
name = "fantoir-datasource"
version = "0.1.0"
edition = "2021"
description = "Generates a Postgres table from FANTOIR raw file"
authors = [
"Sébastien Santoro <dereckson@espace-win.org>"
]
license = "BSD-2-Clause"
[dependencies]
chrono = "~0.4.23"
lazy_static = "~1.4.0"
opendatasoft-explore-api = { version = "0.1.0", path = "../opendatasoft-explore-api" }
oxrdf = "~0.1.1"
+regex = "~1.7.1"
sparesults = "~0.1.3"
[dependencies.async-scoped]
version = "~0.7.1"
features = ["use-tokio"]
[dependencies.clap]
version = "~4.0.32"
features = ["derive"]
[dependencies.reqwest]
version = "~0.11.13"
features = ["gzip", "deflate"]
[dependencies.sqlx]
version = "~0.6.2"
features = ["runtime-tokio-native-tls", "postgres", "chrono"]
[dependencies.tokio]
version = "~1.23.0"
features = ["full"]
diff --git a/fantoir-datasource/src/commands/query.rs b/fantoir-datasource/src/commands/query.rs
index 05e141e..2f47b7d 100644
--- a/fantoir-datasource/src/commands/query.rs
+++ b/fantoir-datasource/src/commands/query.rs
@@ -1,76 +1,90 @@
use std::process::exit;
use sqlx::PgPool;
use crate::db::connect_to_db;
+use crate::fantoir::looks_like_canonical_fantoir_code;
use crate::QueryArgs;
use crate::services::query::*;
static EXIT_CODE_NO_RESULT_FOUND: i32 = 4;
pub async fn search(args: QueryArgs, database_url: &str) {
let pool = connect_to_db(database_url).await;
if args.code_insee.is_some() && args.code_voie.is_some() {
let code_fantoir = search_fantoir_code(
&pool,
&args.code_insee.unwrap(),
&args.code_voie.unwrap(),
).await;
if let Some(code) = code_fantoir {
search_one_row(&pool, &code).await;
return;
}
exit(EXIT_CODE_NO_RESULT_FOUND);
}
- if args.libelle.len() > 0 {
+ if args.expression.len() > 0 {
+ if let Some(code) = pick_fantoir_code_from_args(&args.expression) {
+ search_one_row(&pool, &code).await;
+ return;
+ }
+
search_libelle(&pool, args).await;
return;
}
unimplemented!()
}
async fn search_one_row(pool: &PgPool, code_fantoir: &str) {
match query_fantoir_code(pool, code_fantoir).await {
None => {
exit(EXIT_CODE_NO_RESULT_FOUND);
}
Some(result) => {
println!("{}", result);
}
}
}
async fn search_libelle(pool: &PgPool, args: QueryArgs) {
- let expression = args.libelle.join(" ");
+ let expression = args.expression.join(" ");
let mut found = false;
query_libelle(pool, &expression)
.await
.iter()
.filter(|&entry| entry_matches_conditions(entry, &args))
.for_each(|entry| {
found = true;
println!("{}", entry);
});
if !found {
exit(EXIT_CODE_NO_RESULT_FOUND);
}
}
fn entry_matches_conditions(entry: &FantoirVoieResult, conditions: &QueryArgs) -> bool {
if let Some(code_insee) = &conditions.code_insee {
if &entry.code_insee != code_insee {
return false;
}
}
return true;
}
+
+fn pick_fantoir_code_from_args (expressions: &Vec<String>) -> Option<String> {
+ if expressions.len() == 1 && looks_like_canonical_fantoir_code(&expressions[0]) {
+ Some(expressions[0].clone())
+ } else {
+ None
+ }
+}
diff --git a/fantoir-datasource/src/fantoir.rs b/fantoir-datasource/src/fantoir.rs
index daed379..224a0e3 100644
--- a/fantoir-datasource/src/fantoir.rs
+++ b/fantoir-datasource/src/fantoir.rs
@@ -1,332 +1,358 @@
//! # Helper methods for FANTOIR database.
//!
//! This module offers a structure for a FANTOIR record, methods to parse the file and export it.
//! Database functions expect to work with an executor from sqlx crate.
use chrono::NaiveDate;
use lazy_static::lazy_static;
+use regex::Regex;
use sqlx::PgPool;
lazy_static! {
static ref DEPARTMENTS_WITH_CODE_DIRECTION: Vec<&'static str> = vec!["13", "59", "75", "92", "97"];
/// The alphabet without I O and Q.
static ref RIVOLI_STRING: Vec<char> = vec![
'A', 'B', 'C', 'D', 'E', 'F', 'G', 'H', 'J', 'K', 'L', 'M',
'N', 'P', 'R', 'S', 'T', 'U', 'V', 'W', 'X', 'Y', 'Z'
];
+
+ static ref RE_FANTOIR: Regex = Regex::new(
+ "^[0-9][0-9A-Z][0-9][0-9][0-9][0-9][0-9A-Z][0-9][0-9][0-9][A-Z]$"
+ ).unwrap();
}
/// A voie in the FANTOIR database
#[derive(Debug)]
pub struct FantoirEntry {
/* Identifiers */
code_fantoir: String,
/* Part 1 - commune */
departement: String, // Generally an integer, but INSEE uses 2A and 2B for Corse
code_commune: i32,
code_insee: String, // Afa in Corse has 2A001
type_commune: Option<String>,
is_pseudo_recensee: bool,
/* Part 2 - voie */
identifiant_communal_voie: String,
cle_rivoli: String,
code_nature_voie: Option<String>,
libelle_voie: String,
type_voie: i32, // 1: voie, 2: ens. immo, 3: lieu-dit, 4: pseudo-voie, 5: provisoire
is_public: bool,
/* Part 3 - population */
is_large: bool,
population_a_part: i32,
population_fictive: i32,
/* Part 4 - metadata */
is_cancelled: bool,
cancel_date: Option<NaiveDate>,
creation_date: Option<NaiveDate>,
code_majic: i32,
last_alpha_word: String,
}
impl FantoirEntry {
pub fn parse_line(line: &str) -> Self {
let departement = match &line[0..2] {
"97" => String::from(&line[0..3]), // include for DOM/TOM the next digit
department => String::from(department),
};
let len = line.len();
Self {
/* Identifier */
code_fantoir: String::from(&line[0..11]),
/* Part 1 - commune */
departement,
code_commune: line[3..6].parse().expect("Can't parse code commune"),
code_insee: format!("{:02}{:03}", &line[0..2], &line[3..6]),
type_commune: parse_optional_string(&line[43..44]),
is_pseudo_recensee: &line[45..46] == "3",
/* Part 2 - voie */
identifiant_communal_voie: String::from(&line[6..10]),
cle_rivoli: String::from(&line[10..11]),
code_nature_voie: parse_optional_string(&line[11..15]),
libelle_voie: String::from(line[15..41].trim()),
type_voie: line[108..109].parse().expect("Can't parse type de voie."),
is_public: &line[48..49] == "0",
/* Part 3 - population */
is_large: &line[49..50] == "*",
population_a_part: line[59..66].parse().expect("Can't parse population à part"),
population_fictive: line[66..73].parse().expect("Can't parse population fictive"),
/* Part 4 - metadata */
is_cancelled: &line[73..74] != " ",
cancel_date: parse_fantoir_date(&line[74..81]),
creation_date: parse_fantoir_date(&line[81..88]),
code_majic: line[103..108].parse().expect("Can't parse MAJIC"),
last_alpha_word: String::from(&line[112..len]),
}
}
pub async fn insert_to_db(&self, pool: &PgPool, table: &str) {
let mut query = format!("INSERT INTO {}", table);
query.push_str(
r#"
(code_fantoir,
departement, code_commune, code_insee, type_commune, is_pseudo_recensee,
identifiant_communal_voie, cle_rivoli, code_nature_voie, libelle_voie, type_voie, is_public,
is_large, population_a_part, population_fictive,
is_cancelled, cancel_date, creation_date, code_majic, last_alpha_word
)
VALUES
($1,
$2, $3, $4, $5, $6,
$7, $8, $9, $10, $11, $12,
$13, $14, $15,
$16, $17, $18, $19, $20
)"#
);
sqlx::query(&query)
/* Identifiers */
.bind(&self.code_fantoir)
/* Part 1 - commune */
.bind(&self.departement)
.bind(&self.code_commune)
.bind(&self.code_insee)
.bind(&self.type_commune)
.bind(&self.is_pseudo_recensee)
/* Part 2 - Voie */
.bind(&self.identifiant_communal_voie)
.bind(&self.cle_rivoli)
.bind(&self.code_nature_voie)
.bind(&self.libelle_voie)
.bind(&self.type_voie)
.bind(&self.is_public)
/* Part 3 - Population */
.bind(&self.is_large)
.bind(&self.population_a_part)
.bind(&self.population_fictive)
/* Part 4 - Metadata */
.bind(&self.is_cancelled)
.bind(&self.cancel_date)
.bind(&self.creation_date)
.bind(&self.code_majic)
.bind(&self.last_alpha_word)
.execute(pool)
.await
.expect("Can't insert entry to database");
}
}
pub fn parse_fantoir_date (date: &str) -> Option<NaiveDate> {
if date == "0000000" {
return None;
}
let year = date[0..4].parse().expect("Can't parse date: year part");
let ord = date[4..7].parse().expect("Can't parse date: ordinal part");
NaiveDate::from_yo_opt(year, ord)
}
fn parse_optional_string (expression: &str) -> Option<String> {
let expression = expression.trim();
if expression.len() > 0 {
Some(String::from(expression))
} else {
None
}
}
/// A fixed FANTOIR code result
#[derive(Debug, Eq, PartialEq)]
pub enum FixedFantoirCode {
/// The code has been fully computed
Computed(String),
/// Information needed to query the code has been extracted, but code direction is unknown
/// Such result can be queried through search_code_fantoir()
ToSearch { code_insee: String, identifiant_communal_voie: String },
}
/// Transforms FANTOIR code from BAN into regular FANTOIR codes.
/// BAN sometimes uses <insee code>_<identifiant voie commune> without Rivoli key.
pub fn fix_fantoir_code(code: &str) -> FixedFantoirCode {
let mut code = code.to_string();
if code.contains("_") {
// 97231_B026 -> 972231B026
code = if code.starts_with("97") {
// Code direction = department last digit
format!("{}{}{}", &code[0..=2], &code[2..5], &code[6..])
} else if uses_specific_code_direction(&code) {
// We can't fix it by computation, we need to search it in the database
return FixedFantoirCode::ToSearch {
code_insee: code[0..5].to_string(),
identifiant_communal_voie: code[6..10].to_string(),
}
} else {
// Code direction = 0
format!("{}0{}{}", &code[0..=2], &code[3..5], &code[6..])
};
}
if code.len() == 10 {
let last_char = code.chars().last().unwrap();
match last_char {
'0'..='9' => {
code.push(compute_rivoli_key(&code));
}
'A'..='Z' => {
// 441090516U -> 4401090516U
code = if uses_specific_code_direction(&code) {
// We can't fix it by computation, we need to search it in the database
// 920514135A -> 92051 4135
return FixedFantoirCode::ToSearch {
code_insee: code[0..5].to_string(),
identifiant_communal_voie: code[5..9].to_string(),
}
} else {
format!("{}0{}", &code[0..2], &code[2..])
};
}
_ => unreachable!(),
}
}
FixedFantoirCode::Computed(code)
}
pub fn uses_specific_code_direction (code: &str) -> bool {
DEPARTMENTS_WITH_CODE_DIRECTION
.iter()
.any(|&dpt| code.starts_with(dpt))
}
pub fn compute_rivoli_key (code: &str) -> char {
// See https://georezo.net/forum/viewtopic.php?id=102292
if code.starts_with("2A") || code.starts_with("2B") {
// 2A would be 2 10 and 2B would be 2 11, but how to build a number to multiply by 19?
unimplemented!()
}
let part_commune: i32 = code[0..6].parse().unwrap();
let type_voie = code.chars().nth(6).unwrap();
let type_voie = if type_voie.is_alphabetic() {
type_voie as u32 - 55
} else {
type_voie.to_digit(10).unwrap()
};
let numero_identifiant_communal_voie: i32 = code[7..].parse().unwrap();
let index = (part_commune * 19 + type_voie as i32 * 11 + numero_identifiant_communal_voie) % 23;
return RIVOLI_STRING[index as usize];
}
+/// Determines if the specified expression looks like a FANTOIR code,
+/// as used by DGFiP official FANTOIR file.
+///
+/// The IGN or OpenStreetMap format variants will return false.
+///
+/// This method does NOT check the RIVOLI key, only the format.
+pub fn looks_like_canonical_fantoir_code (expression: &str) -> bool {
+ RE_FANTOIR.is_match(expression)
+}
+
#[cfg(test)]
mod tests {
// Note this useful idiom: importing names from outer (for mod tests) scope.
use super::*;
#[test]
fn test_parse_fantoir_date() {
let expected = NaiveDate::from_ymd_opt(1987, 1, 1).unwrap();
let actual = parse_fantoir_date("1987001").unwrap();
assert_eq!(expected, actual);
}
#[test]
fn test_parse_optional_string() {
assert_eq!(Some(String::from("quux")), parse_optional_string("quux"));
}
#[test]
fn test_parse_optional_string_with_trailing_spaces() {
assert_eq!(Some(String::from("quux")), parse_optional_string("quux "));
}
#[test]
fn test_parse_optional_string_when_empty() {
assert_eq!(true, parse_optional_string("").is_none());
}
#[test]
fn test_parse_optional_string_when_only_spaces() {
assert_eq!(true, parse_optional_string(" ").is_none());
}
#[test]
pub fn test_fix_fantoir_code () {
assert_fixed_fantoir_code("755112P144L", fix_fantoir_code("755112P144L"));
assert_fixed_fantoir_code("972231B026U", fix_fantoir_code("97231_B026"));
assert_fixed_fantoir_code("4401090516U", fix_fantoir_code("441090516U"));
assert_fixed_fantoir_code("972222B305L", fix_fantoir_code("972222B305"));
}
fn assert_fixed_fantoir_code (expected: &str, actual: FixedFantoirCode) {
match actual {
FixedFantoirCode::Computed(code) => {
assert_eq!(expected, &code);
},
_ => assert!(false, "Expected a computed FANTOIR code")
}
}
#[test]
pub fn test_fix_fantoir_code_when_it_cannot_be_computed () {
let expected = FixedFantoirCode::ToSearch {
code_insee: "92002".to_string(),
identifiant_communal_voie: "5130".to_string()
};
assert_eq!(expected, fix_fantoir_code("920025130X"), "As code direction can't be computed, this code should be to search");
assert_eq!(expected, fix_fantoir_code("92002_5130"), "As code direction can't be computed, this code should be to search");
}
-
#[test]
pub fn test_compute_rivoli_key() {
assert_eq!('W', compute_rivoli_key("380003B001"));
assert_eq!('U', compute_rivoli_key("972231B026"));
}
#[test]
pub fn test_compute_rivoli_key_with_type_voie_zero() {
assert_eq!('C', compute_rivoli_key("9722230261"));
}
+
+ #[test]
+ pub fn test_looks_like_canonical_fantoir_code () {
+ assert!(looks_like_canonical_fantoir_code("770246B015C"));
+ }
+
+ #[test]
+ pub fn test_looks_like_canonical_fantoir_code_for_variants () {
+ assert!(!looks_like_canonical_fantoir_code("770246B015"));
+ assert!(!looks_like_canonical_fantoir_code("77246_B015"));
+ assert!(!looks_like_canonical_fantoir_code("77246B015C"));
+ }
}
diff --git a/fantoir-datasource/src/main.rs b/fantoir-datasource/src/main.rs
index 364c97e..1f1cdcd 100644
--- a/fantoir-datasource/src/main.rs
+++ b/fantoir-datasource/src/main.rs
@@ -1,120 +1,120 @@
use std::env;
use clap::{Args, Parser};
use crate::commands::promote::promote;
mod commands;
mod db;
mod fantoir;
mod services;
#[derive(Debug, Parser)]
#[command(name = "fantoir-datasource")]
#[clap(author="Nasqueron project", version, about="Import FANTOIR database into PostgreSQL", long_about=None)]
enum FantoirCommand {
/// Fetch the last version of the FANTOIR file
Fetch(FetchArgs),
/// Import from FANTOIR file generated by the DGFIP
#[command(arg_required_else_help = true)]
Import(ImportArgs),
/// Promote an imported FANTOIR table as the current FANTOIR table to use
#[command(arg_required_else_help = true)]
Promote(PromoteArgs),
/// Query Wikidata SPARQL end-point to enrich FANTOIR information
Wikidata(WikidataArgs),
/// Query the imported FANTOIR table
Query(QueryArgs)
}
#[derive(Debug, Args)]
pub struct FetchArgs {
/// Overwrite file if already existing
#[arg(long)]
overwrite: bool,
}
#[derive(Debug, Args)]
pub struct ImportArgs {
/// Create table if it doesn't exist
#[arg(short = 'c')]
create_table: bool,
/// Truncate table if it already exists, allowing the overwrite mode.
/// If not specified, the script will fail if table exists.
#[arg(short = 't')]
overwrite_table: bool,
/// The FANTOIR file to import
fantoir_file: String,
/// The name of the table to populate
fantoir_table: String,
}
#[derive(Debug, Args)]
pub struct PromoteArgs {
/// The name of the table to promote
fantoir_table: String,
}
#[derive(Debug, Args)]
pub struct WikidataArgs {
/// Create table if it doesn't exist
#[arg(short = 'c')]
create_table: bool,
/// Truncate table if it already exists, allowing the overwrite mode.
/// If not specified, the script will fail if table exists.
#[arg(short = 't')]
overwrite_table: bool,
/// Generate a Wikidata maintenance report instead to print errors to stderr
#[arg(long)]
maintenance_report: bool,
}
#[derive(Debug, Args)]
#[clap(trailing_var_arg=true)]
pub struct QueryArgs {
/// INSEE code to identify a commune
#[arg(long)]
code_insee: Option<String>,
/// Identifier of the voie by the commune
#[arg(long)]
code_voie: Option<String>,
- /// Expression to search
- libelle: Vec<String>,
+ /// Expression to search, FANTOIR code or label of the voie
+ expression: Vec<String>,
}
#[tokio::main]
async fn main() {
let command = FantoirCommand::parse(); // Will exit if argument is missing or --help/--version provided.
let database_url = env::var("DATABASE_URL")
.expect("The environment variable DATABASE_URL need to be set to your PostgreSQL database.");
match command {
FantoirCommand::Fetch(args) => {
commands::fetch::fetch(args.overwrite).await;
},
FantoirCommand::Import(args) => {
commands::import::import(&args, &database_url).await;
},
FantoirCommand::Promote(args) => {
promote(&args.fantoir_table, &database_url).await;
},
FantoirCommand::Wikidata(args) => {
commands::wikidata::import(&args, &database_url).await
},
FantoirCommand::Query(args) => {
commands::query::search(args, &database_url).await
},
};
}
File Metadata
Details
Attached
Mime Type
text/x-diff
Expires
Sun, Nov 24, 19:19 (2 h, 31 m)
Storage Engine
blob
Storage Format
Raw Data
Storage Handle
2256214
Default Alt Text
(19 KB)
Attached To
Mode
rDS Nasqueron Datasources
Attached
Detach File
Event Timeline
Log In to Comment