Page MenuHomeDevCentral

D2731.id6937.diff
No OneTemporary

D2731.id6937.diff

diff --git a/Cargo.toml b/Cargo.toml
--- a/Cargo.toml
+++ b/Cargo.toml
@@ -9,6 +9,9 @@
license = "BSD-2-Clause"
[dependencies]
+lazy_static = "~1.4.0"
+oxrdf = "~0.1.1"
+sparesults = "~0.1.3"
[dependencies.async-scoped]
version = "~0.7.1"
@@ -18,6 +21,10 @@
version = "~4.0.32"
features = ["derive"]
+[dependencies.reqwest]
+version = "~0.11.13"
+features = ["gzip", "deflate"]
+
[dependencies.sqlx]
version = "~0.6.2"
features = ["runtime-tokio-native-tls", "postgres", "chrono"]
diff --git a/src/commands/import.rs b/src/commands/import.rs
--- a/src/commands/import.rs
+++ b/src/commands/import.rs
@@ -12,12 +12,33 @@
use crate::db::*;
use crate::fantoir::FantoirEntry;
+impl ToTableInitializationArgs for &ImportArgs {
+ fn to_table_initialization_args (&self) -> TableInitializationArgs {
+ TableInitializationArgs {
+ table_name: self.fantoir_table.clone(),
+ create_table: self.create_table,
+ overwrite_table: self.overwrite_table,
+ }
+ }
+}
+
+async fn create_table(pool: &PgPool, table: &str) {
+ let queries = include_str!("../schema/fantoir.sql")
+ .replace("/*table*/fantoir", table)
+ .replace("/*index*/index_fantoir_", format!("index_{}_", table).as_ref());
+
+ run_multiple_queries(pool, &queries).await;
+}
+
pub async fn import(args: &ImportArgs, database_url: &str) {
let fd = File::open(&args.fantoir_file).await.expect("Can't open file.");
let pool = connect_to_db(database_url).await;
// Create/truncate table as needed and as allowed by options
- if let Err(error) = initialize_table(args, &pool).await {
+ let callback = async {
+ create_table(&pool, &args.fantoir_table).await;
+ };
+ if let Err(error) = initialize_table(&pool, callback, args).await {
eprintln!("{}", &error);
exit(1);
}
@@ -46,39 +67,3 @@
.await
}
}
-
-async fn initialize_table(args: &ImportArgs, pool: &PgPool) -> Result<(), String> {
- if is_table_exists(pool, &args.fantoir_table).await {
- if is_table_empty(&pool, &args.fantoir_table).await {
- return Ok(());
- }
-
- if args.overwrite_table {
- truncate_table(&pool, &args.fantoir_table).await;
- return Ok(());
- }
-
- return Err(format!(
- "Table {} already exists and contains rows. To overwrite it, run the import tool with -t option.",
- &args.fantoir_table
- ));
- }
-
- if args.create_table {
- create_table(&pool, &args.fantoir_table).await;
- return Ok(());
- }
-
- Err(format!(
- "Table {} doesn't exist. To create it, run the import tool with -c option.",
- &args.fantoir_table
- ))
-}
-
-async fn create_table(pool: &PgPool, table: &str) {
- let queries = include_str!("../schema/fantoir.sql")
- .replace("/*table*/fantoir", table)
- .replace("/*index*/index_fantoir_", format!("index_{}_", table).as_ref());
-
- run_multiple_queries(pool, &queries).await;
-}
diff --git a/src/commands/mod.rs b/src/commands/mod.rs
--- a/src/commands/mod.rs
+++ b/src/commands/mod.rs
@@ -3,3 +3,4 @@
pub(crate) mod import;
pub(crate) mod promote;
pub(crate) mod query;
+pub(crate) mod wikidata;
diff --git a/src/commands/promote/mod.rs b/src/commands/promote/mod.rs
--- a/src/commands/promote/mod.rs
+++ b/src/commands/promote/mod.rs
@@ -1,23 +1,28 @@
//! Command to promote a table as the one to use.
use sqlx::PgPool;
-use crate::db::{connect_to_db, run_multiple_queries_groups};
+use crate::commands::wikidata::WIKIDATA_TABLE;
+use crate::db::*;
/// Promotes a FANTOIR table as the relevant version to use
pub async fn promote (fantoir_table: &str, database_url: &str) {
let pool = connect_to_db(database_url).await;
- let queries_groups = get_queries_groups(&pool, fantoir_table);
+ let queries_groups = get_queries_groups(&pool, fantoir_table).await;
run_multiple_queries_groups(&pool, &queries_groups);
}
/// Determines the groups of queries needed for promotion
-fn get_queries_groups (pool: &PgPool, fantoir_table: &str) -> Vec<String> {
+async fn get_queries_groups (pool: &PgPool, fantoir_table: &str) -> Vec<String> {
let mut queries_groups = vec![
include_str!("../../schema/promote/config.sql"),
include_str!("../../schema/promote/fantoir_view.sql"),
];
+ if is_table_exists(pool, WIKIDATA_TABLE).await {
+ queries_groups.push(include_str!("../../schema/promote/wikidata.sql"));
+ }
+
queries_groups
.into_iter()
.map(|queries| queries
diff --git a/src/commands/wikidata/mod.rs b/src/commands/wikidata/mod.rs
new file mode 100644
--- /dev/null
+++ b/src/commands/wikidata/mod.rs
@@ -0,0 +1,214 @@
+//! Query Wikidata SPARQL end-point and import result into PostgreSQL
+
+mod qualification;
+
+use std::collections::HashMap;
+use std::process::exit;
+use oxrdf::Term;
+use sqlx::PgPool;
+
+use crate::db::*;
+use crate::{sparql, WikidataArgs};
+use crate::commands::wikidata::qualification::determine_p31_winner;
+use crate::fantoir::{fix_fantoir_code, FixedFantoirCode};
+use crate::services::http_client::get_user_agent;
+use crate::services::query::search_fantoir_code;
+use crate::sparql::{is_term_empty, parse_literal, parse_term_uri};
+
+pub static WIKIDATA_TABLE: &str = "fantoir_wikidata";
+
+/* -------------------------------------------------------------
+ Import task
+ - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - */
+
+pub async fn import (args: &WikidataArgs, database_url: &str) {
+ let pool = connect_to_db(database_url).await;
+
+ // Create/truncate table as needed and as allowed by options
+ let callback = async {
+ let queries = include_str!("../../schema/wikidata.sql");
+ run_multiple_queries(&pool, &queries).await;
+ };
+ if let Err(error) = initialize_table(&pool, callback, args).await {
+ eprintln!("{}", &error);
+ exit(1);
+ }
+
+ // Query Wikidata
+ let client = sparql::Client::new("https://query.wikidata.org/sparql", get_user_agent());
+
+ let mut what_map = HashMap::new();
+ client.query(include_str!("../../queries/wikidata.sparql"))
+ .await
+ .iter()
+ .filter(|entry| !is_term_empty(&entry["code_fantoir"]))
+ .for_each(|entry| {
+ // Build a map of the different P31 (instance of) values for a specified code.
+
+ let key = WikidataEntryKey::parse(entry);
+ let what = parse_wikidata_entity_uri(&entry["what"]).expect("Can't parse P31 what result");
+
+ what_map
+ .entry(key)
+ .or_insert(Vec::new())
+ .push(what);
+ });
+
+ // Consolidate entries and insert them into the database.
+ // To avoid an async closure, we don't use HOF pattern.
+ for (key, candidates) in what_map {
+ if let Some(entry) = WikidataEntry::consolidate_set(&pool, &key, candidates).await {
+ entry.insert_to_db(&pool).await;
+ continue;
+ }
+
+ eprintln!();
+ eprintln!("Can't insert Wikidata information for the following entry:");
+ eprintln!("{:?}", &key);
+ eprintln!("Can't resolve FANTOIR code.");
+ }
+
+}
+
+/* -------------------------------------------------------------
+ Arguments parsing
+ - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - */
+
+impl ToTableInitializationArgs for &WikidataArgs {
+ fn to_table_initialization_args(&self) -> TableInitializationArgs {
+ TableInitializationArgs {
+ table_name: String::from(WIKIDATA_TABLE),
+ create_table: self.create_table,
+ overwrite_table: self.overwrite_table,
+ }
+ }
+}
+
+/* -------------------------------------------------------------
+ Wikidata entry structures
+
+ WikidataEntry represents the data ready to be inserted
+ in our database.
+
+ WikidataEntryKey is a subset of WikidataEntry to identify
+ a set (FANTOIR code, Wikidata item) to be used as HashMap key
+ when a SPARQL query returns several rows for such set.
+
+ For example, here, we ask for P31 values, and if a Wikidata
+ entity offers several P31 values, we'll get one row per value.
+ - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - */
+
+#[derive(Debug, Clone)]
+struct WikidataEntry {
+ code_fantoir: String,
+ code_fantoir_wikidata: String,
+ item: String,
+ item_label: String,
+ what: String,
+}
+
+#[derive(Debug, Clone, Eq, PartialEq, Hash)]
+struct WikidataEntryKey {
+ code_fantoir_wikidata: String,
+ item: String,
+ item_label: String,
+}
+
+impl WikidataEntryKey {
+ fn parse(entry: &HashMap<String, Term>) -> Self {
+ Self {
+ code_fantoir_wikidata: parse_literal(&entry["code_fantoir"]).expect("Can't parse code"),
+ item: parse_wikidata_entity_uri(&entry["item"]).expect("Can't parse item"),
+ item_label: parse_literal(&entry["itemLabel"]).expect("Can't parse item label"),
+ }
+ }
+}
+
+impl WikidataEntry {
+ async fn consolidate_set(pool: &PgPool, key: &WikidataEntryKey, what_candidates: Vec<String>) -> Option<Self> {
+ let what = determine_p31_winner(&what_candidates);
+
+ let code_fantoir = match fix_fantoir_code(&key.code_fantoir_wikidata) {
+ FixedFantoirCode::Computed(code) => code,
+ FixedFantoirCode::ToSearch { code_insee, identifiant_communal_voie } => {
+ search_fantoir_code(pool, &code_insee, &identifiant_communal_voie).await?
+ }
+ };
+
+ Some(Self {
+ code_fantoir,
+ code_fantoir_wikidata: key.code_fantoir_wikidata.clone(),
+ item: key.item.clone(),
+ item_label: key.item_label.clone(),
+ what,
+ })
+ }
+
+ async fn insert_to_db (&self, pool: &PgPool) {
+ let mut query = format!("INSERT INTO {}", WIKIDATA_TABLE);
+ query.push_str(
+ r#"
+ (code_fantoir, code_fantoir_wikidata, item, item_label, what)
+ VALUES
+ ($1, $2, $3, $4, $5)"#
+ );
+
+ if let Err(error) = sqlx::query(&query)
+ .bind(&self.code_fantoir)
+ .bind(&self.code_fantoir_wikidata)
+ .bind(&self.item)
+ .bind(&self.item_label)
+ .bind(&self.what)
+
+ .execute(pool)
+ .await {
+ eprintln!();
+ eprintln!("Can't insert Wikidata information for the following entry:");
+ eprintln!("{:?}", self);
+ eprintln!("{}", error);
+ }
+ }
+}
+
+/* -------------------------------------------------------------
+ Wikidata helper methods
+ - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - */
+
+/// Parses a Wikidata entity URI from a RDF term.
+///
+/// For example, to parse a term representing Q1234:
+///
+/// ```
+/// let term = Term::NamedNode(
+/// NamedNode::new("http://www.wikidata.org/entity/Q1234").unwrap()
+/// );
+/// let entity = parse_wikidata_entity_uri(&term).unwrap();
+///
+/// assert_eq!("Q1234", &entity);
+/// ```
+pub fn parse_wikidata_entity_uri (term: &Term) -> Option<String> {
+ parse_term_uri(term)
+ .map(|uri| {
+ let pos = uri.rfind('/').expect("URI doesn't contain any /") + 1;
+
+ uri[pos..].to_string()
+ })
+}
+
+/* -------------------------------------------------------------
+ Tests
+ - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - */
+
+#[cfg(test)]
+mod tests {
+ use oxrdf::NamedNode;
+ use super::*;
+
+ #[test]
+ pub fn test_parse_wikidata_entity_uri () {
+ let node = NamedNode::new("http://www.wikidata.org/entity/Q849777").unwrap();
+ let term = Term::NamedNode(node);
+
+ assert_eq!("Q849777", &parse_wikidata_entity_uri(&term).unwrap());
+ }
+}
diff --git a/src/commands/wikidata/qualification.rs b/src/commands/wikidata/qualification.rs
new file mode 100644
--- /dev/null
+++ b/src/commands/wikidata/qualification.rs
@@ -0,0 +1,96 @@
+//! Helper for items qualification.
+//!
+//! Wikidata uses the P31 "instance of" property to qualify items,
+//! which is helpful to identify voies, especially the pseudo-voies
+//! not furthermore described in FANTOIR.
+
+use lazy_static::lazy_static;
+
+lazy_static! {
+ static ref P31_WINNERS: Vec<&'static str> = vec![
+ // Important values
+
+ "Q928830", // metro station
+ "Q18615527", // tram bridge
+ "Q1793804", // station de RER
+ "Q55488", // gare ferroviaire
+ "Q55485", // gare ferroviaire en cul-de-sac
+
+ "Q510662", // ring road
+ "Q2376564", // échangeur autoroutier
+
+ // Less important values, as probably already qualified by FANTOIR
+
+ "Q3558430", // villa, a name used for Paris private roads
+ "Q15070223", // cité, same thing
+
+ "Q207934", // allée
+ "Q54114", // boulevard
+ "Q99228502", // avenue (a road called avenue, not matching the avenue concept)
+ "Q7543083", // avenue (a true one)
+ "Q283977", // parvis
+ "Q174782", // place
+ "Q164419", // galerie
+
+ "Q12731", // impasse, shoud lose against avenue (some Paris avenues are so qualified)
+ "Q13634881", // passage
+ "Q1251403", // ruelle
+ "Q3840711", // quai
+ "Q88372", // esplanade, should win against jardin public
+ "Q787113", // promenade
+ "Q17383262", // cour
+ "Q1068842", // passerelle
+ "Q641406", // terrasse
+ "Q16634966", // escalier
+ "Q628179", // sentier
+ "Q5004679", // chemin
+ "Q3352369", // chemin piétonnier
+
+ "Q1529", // rond-point
+ "Q1525", // carrefour giratoire
+
+ "Q4421", // forêt, used for bois de Boulogne, bois de Vincennes
+ "Q22698", // parc
+ "Q2026833", // square, type jardin public
+ "Q22746", // jardin public
+ "Q3215290", // lac artificiel
+
+ "Q12280", // pont, should lost against place (large places at Paris are also bridges)
+ "Q158438", // pont en arc
+ "Q537127", // pont routier
+ "Q1440300", // tour d'observation
+
+ "Q16560", // palais
+ "Q2080521", // halle
+ "Q16917", // hôpital
+
+ // Those values are probably too generic, so they're kept in last
+
+ "Q1302778", // voie rapide
+ "Q79007", // street, wins against road but loses against boulevard
+ "Q83620", // voie de communication
+ ];
+}
+
+/// Determine amongst a sets of items which one is the more relevant
+/// to describe a pseudo-voie.
+///
+/// This is useful when a Wikidata entity has several values for P31
+/// to decide which one is the most interesting to keep in our context.
+pub fn determine_p31_winner(candidates: &Vec<String>) -> String {
+ if candidates.len() == 1 {
+ // If there is only one, that's the one to use.
+ return candidates[0].clone();
+ }
+
+ for winner_candidate in P31_WINNERS.iter() {
+ for actual_candidate in candidates {
+ if winner_candidate == actual_candidate {
+ return actual_candidate.clone();
+ }
+ }
+ }
+
+ eprintln!("Can't determine P31 winner amongst {:?}, {} is picked.", candidates, candidates[0]);
+ candidates[0].clone()
+}
diff --git a/src/db.rs b/src/db.rs
--- a/src/db.rs
+++ b/src/db.rs
@@ -3,12 +3,24 @@
//! This module provides helpers to interact with a PostgreSQL database.
//! Functions expect to work with an executor from sqlx crate.
+use std::future::Future;
+
use async_scoped::TokioScope;
use sqlx::PgPool;
use sqlx::postgres::PgPoolOptions;
static QUERIES_SEPARATOR: &str = "\n\n\n";
+pub struct TableInitializationArgs {
+ pub table_name: String,
+ pub create_table: bool,
+ pub overwrite_table: bool,
+}
+
+pub trait ToTableInitializationArgs {
+ fn to_table_initialization_args(&self) -> TableInitializationArgs;
+}
+
pub async fn connect_to_db (database_url: &str) -> PgPool {
PgPoolOptions::new()
.max_connections(3)
@@ -62,6 +74,41 @@
.expect("Can't truncate table.");
}
+pub async fn initialize_table<F, T> (
+ pool: &PgPool,
+ callback: F,
+ args: T
+) -> Result<(), String>
+ where F: Future, T: ToTableInitializationArgs
+{
+ let args = args.to_table_initialization_args();
+ if is_table_exists(pool, &args.table_name).await {
+ if is_table_empty(&pool, &args.table_name).await {
+ return Ok(());
+ }
+
+ if args.overwrite_table {
+ truncate_table(&pool, &args.table_name).await;
+ return Ok(());
+ }
+
+ return Err(format!(
+ "Table {} already exists and contains rows. To overwrite it, run the import tool with -t option.",
+ &args.table_name
+ ));
+ }
+
+ if args.create_table {
+ callback.await;
+ return Ok(());
+ }
+
+ Err(format!(
+ "Table {} doesn't exist. To create it, run the import tool with -c option.",
+ &args.table_name
+ ))
+}
+
pub async fn run_multiple_queries(pool: &PgPool, queries: &str) {
for query in queries.split(QUERIES_SEPARATOR) {
sqlx::query(&query)
diff --git a/src/fantoir.rs b/src/fantoir.rs
--- a/src/fantoir.rs
+++ b/src/fantoir.rs
@@ -3,9 +3,20 @@
//! This module offers a structure for a FANTOIR record, methods to parse the file and export it.
//! Database functions expect to work with an executor from sqlx crate.
+use lazy_static::lazy_static;
use sqlx::PgPool;
use sqlx::types::chrono::NaiveDate;
+lazy_static! {
+ static ref DEPARTMENTS_WITH_CODE_DIRECTION: Vec<&'static str> = vec!["13", "59", "75", "92", "97"];
+
+ /// The alphabet without I O and Q.
+ static ref RIVOLI_STRING: Vec<char> = vec![
+ 'A', 'B', 'C', 'D', 'E', 'F', 'G', 'H', 'J', 'K', 'L', 'M',
+ 'N', 'P', 'R', 'S', 'T', 'U', 'V', 'W', 'X', 'Y', 'Z'
+ ];
+}
+
/// A voie in the FANTOIR database
#[derive(Debug)]
pub struct FantoirEntry {
@@ -158,6 +169,95 @@
}
}
+/// A fixed FANTOIR code result
+#[derive(Debug, Eq, PartialEq)]
+pub enum FixedFantoirCode {
+ /// The code has been fully computed
+ Computed(String),
+
+ /// Information needed to query the code has been extracted, but code direction is unknown
+ /// Such result can be queried through search_code_fantoir()
+ ToSearch { code_insee: String, identifiant_communal_voie: String },
+}
+
+/// Transforms FANTOIR code from BAN into regular FANTOIR codes.
+/// BAN sometimes uses <insee code>_<identifiant voie commune> without Rivoli key.
+pub fn fix_fantoir_code(code: &str) -> FixedFantoirCode {
+ let mut code = code.to_string();
+
+ if code.contains("_") {
+ // 97231_B026 -> 972231B026
+ code = if code.starts_with("97") {
+ // Code direction = department last digit
+ format!("{}{}{}", &code[0..=2], &code[2..5], &code[6..])
+ } else if uses_specific_code_direction(&code) {
+ // We can't fix it by computation, we need to search it in the database
+ return FixedFantoirCode::ToSearch {
+ code_insee: code[0..5].to_string(),
+ identifiant_communal_voie: code[6..10].to_string(),
+ }
+ } else {
+ // Code direction = 0
+ format!("{}0{}{}", &code[0..=2], &code[3..5], &code[6..])
+ };
+ }
+
+ if code.len() == 10 {
+ let last_char = code.chars().last().unwrap();
+
+ match last_char {
+ '0'..='9' => {
+ code.push(compute_rivoli_key(&code));
+ }
+
+ 'A'..='Z' => {
+ // 441090516U -> 4401090516U
+ code = if uses_specific_code_direction(&code) {
+ // We can't fix it by computation, we need to search it in the database
+ // 920514135A -> 92051 4135
+ return FixedFantoirCode::ToSearch {
+ code_insee: code[0..5].to_string(),
+ identifiant_communal_voie: code[5..9].to_string(),
+ }
+ } else {
+ format!("{}0{}", &code[0..2], &code[2..])
+ };
+ }
+
+ _ => unreachable!(),
+ }
+ }
+
+ FixedFantoirCode::Computed(code)
+}
+
+pub fn uses_specific_code_direction (code: &str) -> bool {
+ DEPARTMENTS_WITH_CODE_DIRECTION
+ .iter()
+ .any(|&dpt| code.starts_with(dpt))
+}
+
+pub fn compute_rivoli_key (code: &str) -> char {
+ // See https://georezo.net/forum/viewtopic.php?id=102292
+
+ if code.starts_with("2A") || code.starts_with("2B") {
+ // 2A would be 2 10 and 2B would be 2 11, but how to build a number to multiply by 19?
+ unimplemented!()
+ }
+
+ let part_commune: i32 = code[0..6].parse().unwrap();
+ let type_voie = code.chars().nth(6).unwrap();
+ let type_voie = if type_voie.is_alphabetic() {
+ type_voie as u32 - 55
+ } else {
+ type_voie.to_digit(10).unwrap()
+ };
+ let numero_identifiant_communal_voie: i32 = code[7..].parse().unwrap();
+
+ let index = (part_commune * 19 + type_voie as i32 * 11 + numero_identifiant_communal_voie) % 23;
+ return RIVOLI_STRING[index as usize];
+}
+
#[cfg(test)]
mod tests {
// Note this useful idiom: importing names from outer (for mod tests) scope.
@@ -166,7 +266,7 @@
#[test]
fn test_parse_fantoir_date() {
let expected = NaiveDate::from_ymd_opt(1987, 1, 1).unwrap();
- let actual = parse_fantoir_date("1987001");
+ let actual = parse_fantoir_date("1987001").unwrap();
assert_eq!(expected, actual);
}
@@ -189,4 +289,44 @@
fn test_parse_optional_string_when_only_spaces() {
assert_eq!(true, parse_optional_string(" ").is_none());
}
+
+ #[test]
+ pub fn test_fix_fantoir_code () {
+ assert_fixed_fantoir_code("755112P144L", fix_fantoir_code("755112P144L"));
+ assert_fixed_fantoir_code("972231B026U", fix_fantoir_code("97231_B026"));
+ assert_fixed_fantoir_code("4401090516U", fix_fantoir_code("441090516U"));
+ assert_fixed_fantoir_code("972222B305L", fix_fantoir_code("972222B305"));
+ }
+
+ fn assert_fixed_fantoir_code (expected: &str, actual: FixedFantoirCode) {
+ match actual {
+ FixedFantoirCode::Computed(code) => {
+ assert_eq!(expected, &code);
+ },
+ _ => assert!(false, "Expected a computed FANTOIR code")
+ }
+ }
+
+ #[test]
+ pub fn test_fix_fantoir_code_when_it_cannot_be_computed () {
+ let expected = FixedFantoirCode::ToSearch {
+ code_insee: "92002".to_string(),
+ identifiant_communal_voie: "5130".to_string()
+ };
+
+ assert_eq!(expected, fix_fantoir_code("920025130X"), "As code direction can't be computed, this code should be to search");
+ assert_eq!(expected, fix_fantoir_code("92002_5130"), "As code direction can't be computed, this code should be to search");
+ }
+
+
+ #[test]
+ pub fn test_compute_rivoli_key() {
+ assert_eq!('W', compute_rivoli_key("380003B001"));
+ assert_eq!('U', compute_rivoli_key("972231B026"));
+ }
+
+ #[test]
+ pub fn test_compute_rivoli_key_with_type_voie_zero() {
+ assert_eq!('C', compute_rivoli_key("9722230261"));
+ }
}
diff --git a/src/main.rs b/src/main.rs
--- a/src/main.rs
+++ b/src/main.rs
@@ -8,6 +8,7 @@
mod db;
mod fantoir;
mod services;
+mod sparql;
#[derive(Debug, Parser)]
#[command(name = "fantoir-datasource")]
@@ -21,8 +22,10 @@
#[command(arg_required_else_help = true)]
Promote(PromoteArgs),
+ /// Query Wikidata SPARQL end-point to enrich FANTOIR information
+ Wikidata(WikidataArgs),
+
/// Query the imported FANTOIR table
- #[command(arg_required_else_help = true)]
Query(QueryArgs)
}
@@ -50,6 +53,18 @@
fantoir_table: String,
}
+#[derive(Debug, Args)]
+pub struct WikidataArgs {
+ /// Create table if it doesn't exist
+ #[arg(short = 'c')]
+ create_table: bool,
+
+ /// Truncate table if it already exists, allowing the overwrite mode.
+ /// If not specified, the script will fail if table exists.
+ #[arg(short = 't')]
+ overwrite_table: bool,
+}
+
#[derive(Debug, Args)]
#[clap(trailing_var_arg=true)]
pub struct QueryArgs {
@@ -79,6 +94,9 @@
FantoirCommand::Promote(args) => {
promote(&args.fantoir_table, &database_url).await;
},
+ FantoirCommand::Wikidata(args) => {
+ commands::wikidata::import(&args, &database_url).await
+ },
FantoirCommand::Query(args) => {
commands::query::search(args, &database_url).await
},
diff --git a/src/queries/wikidata.sparql b/src/queries/wikidata.sparql
new file mode 100644
--- /dev/null
+++ b/src/queries/wikidata.sparql
@@ -0,0 +1,12 @@
+PREFIX bd: <http://www.bigdata.com/rdf#>
+PREFIX wikibase: <http://wikiba.se/ontology#>
+PREFIX wdt: <http://www.wikidata.org/prop/direct/>
+
+# Streets with FANTOIR code
+SELECT DISTINCT ?code_fantoir ?item ?itemLabel ?what
+WHERE
+{
+ ?item wdt:P3182 ?code_fantoir .
+ ?item wdt:P31 ?what
+ SERVICE wikibase:label { bd:serviceParam wikibase:language "fr". }
+}
diff --git a/src/schema/promote/wikidata.sql b/src/schema/promote/wikidata.sql
new file mode 100644
--- /dev/null
+++ b/src/schema/promote/wikidata.sql
@@ -0,0 +1,7 @@
+alter table fantoir_wikidata
+ drop constraint if exists fantoir_wikidata_code_fantoir_fk;
+
+
+alter table fantoir_wikidata
+ add constraint fantoir_wikidata_code_fantoir_fk
+ foreign key (code_fantoir) references /*table*/fantoir (code_fantoir);
diff --git a/src/schema/wikidata.sql b/src/schema/wikidata.sql
new file mode 100644
--- /dev/null
+++ b/src/schema/wikidata.sql
@@ -0,0 +1,29 @@
+-- This table matches Wikidata entities and FANTOIR codes.
+--
+-- If you provide several instructions, separate those with TWO blank lines.
+-- Indexes have to match every WHERE clause used against the database.
+--
+-- This schema is compiled as part of the program, as such you need to rebuild
+-- (`cargo build`) the project after any schema modification.
+
+CREATE TABLE IF NOT EXISTS /*table*/fantoir_wikidata
+(
+ -- Identifiers
+ code_fantoir char(11) NOT NULL
+ constraint /*index*/index_fantoir_wikidata_pk
+ primary key,
+ code_fantoir_wikidata char(11) NOT NULL,
+
+ -- Wikidata information
+ item varchar(12) NOT NULL,
+ item_label text,
+ what varchar(12) NOT NULL,
+
+ -- Constraints
+ UNIQUE (code_fantoir_wikidata)
+);
+
+
+CREATE INDEX CONCURRENTLY /*index*/index_fantoir_wikidata_voie_trigram
+ ON /*table*/fantoir_wikidata
+ USING gin (item_label gin_trgm_ops);
diff --git a/src/services/http_client.rs b/src/services/http_client.rs
new file mode 100644
--- /dev/null
+++ b/src/services/http_client.rs
@@ -0,0 +1,12 @@
+use lazy_static::lazy_static;
+
+lazy_static! {
+ pub static ref USER_AGENT: String = format!(
+ "{}/{} (https://databases.nasqueron.org/)",
+ env!("CARGO_PKG_NAME"), env!("CARGO_PKG_VERSION")
+ );
+}
+
+pub fn get_user_agent () -> &'static str {
+ &USER_AGENT
+}
diff --git a/src/services/mod.rs b/src/services/mod.rs
--- a/src/services/mod.rs
+++ b/src/services/mod.rs
@@ -1 +1,2 @@
pub mod query;
+pub mod http_client;
diff --git a/src/sparql.rs b/src/sparql.rs
new file mode 100644
--- /dev/null
+++ b/src/sparql.rs
@@ -0,0 +1,119 @@
+//! # SPARQL client
+
+use std::collections::HashMap;
+use std::env;
+use oxrdf::Term;
+
+use reqwest::{ClientBuilder, Url};
+use reqwest::Client as HttpClient;
+use reqwest::header::{HeaderMap, HeaderValue};
+use sparesults::{QueryResultsFormat, QueryResultsParser, QueryResultsReader};
+
+static DEFAULT_USER_AGENT: &str = concat!(
+ env!("CARGO_PKG_NAME"), "/", env!("CARGO_PKG_VERSION"),
+);
+
+pub struct Client {
+ pub endpoint: String,
+ client: HttpClient,
+}
+
+impl Client {
+ pub fn new (endpoint: &str, user_agent: &str) -> Self {
+ let client = ClientBuilder::new()
+ .user_agent(user_agent)
+ .default_headers( {
+ let mut headers = HeaderMap::new();
+ headers.insert("Accept", HeaderValue::from_static("Accept: application/sparql-results+json"));
+ headers
+ })
+ .gzip(true)
+ .deflate(true)
+ .build()
+ .expect("Can't build HTTP client");
+
+ Self {
+ endpoint: String::from(endpoint),
+ client,
+ }
+ }
+
+ pub fn with_default_user_agent(endpoint: &str) -> Self {
+ let user_agent = Self::determine_user_agent();
+
+ Self::new(endpoint, &user_agent)
+ }
+
+ fn determine_user_agent () -> String {
+ env::current_exe()
+ .ok()
+ .and_then(|path| path.file_name().map(|s| s.to_os_string()))
+ .and_then(|program_name| program_name.into_string().ok())
+ .unwrap_or(String::from(DEFAULT_USER_AGENT))
+ }
+
+ pub async fn query (&self, query: &str) -> Vec<HashMap<String, Term>> {
+ let result = include_str!("tmp/wikidata-query-result.xml").to_string();
+
+ // let url = Url::parse_with_params(&self.endpoint, &[("query", query)])
+ // .expect("Can't parse endpoint as absolute URL.");
+ //
+ // let result = reqwest::get(url)
+ // .await
+ // .expect("Can't query endpoint")
+ // .text()
+ // .await
+ // .expect("End-point didn't return a reply.");
+
+ let mut entries = Vec::new();
+
+ if let QueryResultsReader::Solutions(solutions) = QueryResultsParser
+ ::from_format(QueryResultsFormat::Xml)
+ .read_results(result.as_bytes())
+ .expect("Can't read Wikidata reply")
+ {
+ for solution in solutions {
+ let entry: HashMap<_, _> = solution
+ .expect("Can't read solution")
+ .iter()
+ .map(|(variable, term)| (
+ variable.as_str().to_string(),
+ term.clone(),
+ ))
+ .collect();
+ entries.push(entry);
+ }
+ } else {
+ panic!("Can't parse SPARQL result as a solution.");
+ }
+
+ entries
+ }
+}
+
+pub fn parse_term_uri (term: &Term) -> Option<String> {
+ if let Term::NamedNode(node) = term {
+ Some(node.as_str().to_string())
+ } else {
+ None
+ }
+}
+
+pub fn parse_literal (term: &Term) -> Option<String> {
+ if let Term::Literal(literal) = term {
+ Some(literal.value().to_string())
+ } else {
+ None
+ }
+}
+
+pub fn is_term_empty(term: &Term) -> bool {
+ match term {
+ Term::NamedNode(node) => {
+ // Special values IRI are considered as empty values.
+ node.as_str().contains("/.well-known/genid/")
+ }
+ Term::BlankNode(_) => true,
+ Term::Literal(_) => false,
+ }
+}

File Metadata

Mime Type
text/plain
Expires
Tue, Oct 1, 14:36 (20 h, 17 m)
Storage Engine
blob
Storage Format
Raw Data
Storage Handle
2168587
Default Alt Text
D2731.id6937.diff (30 KB)

Event Timeline