Page Menu
Home
DevCentral
Search
Configure Global Search
Log In
Files
F3912547
D2731.id6945.diff
No One
Temporary
Actions
View File
Edit File
Delete File
View Transforms
Subscribe
Mute Notifications
Award Token
Flag For Later
Size
31 KB
Referenced Files
None
Subscribers
None
D2731.id6945.diff
View Options
diff --git a/Cargo.toml b/Cargo.toml
--- a/Cargo.toml
+++ b/Cargo.toml
@@ -9,6 +9,9 @@
license = "BSD-2-Clause"
[dependencies]
+lazy_static = "~1.4.0"
+oxrdf = "~0.1.1"
+sparesults = "~0.1.3"
[dependencies.async-scoped]
version = "~0.7.1"
@@ -18,6 +21,10 @@
version = "~4.0.32"
features = ["derive"]
+[dependencies.reqwest]
+version = "~0.11.13"
+features = ["gzip", "deflate"]
+
[dependencies.sqlx]
version = "~0.6.2"
features = ["runtime-tokio-native-tls", "postgres", "chrono"]
diff --git a/src/commands/import.rs b/src/commands/import.rs
--- a/src/commands/import.rs
+++ b/src/commands/import.rs
@@ -12,12 +12,33 @@
use crate::db::*;
use crate::fantoir::FantoirEntry;
+impl ToTableInitializationArgs for &ImportArgs {
+ fn to_table_initialization_args (&self) -> TableInitializationArgs {
+ TableInitializationArgs {
+ table_name: self.fantoir_table.clone(),
+ create_table: self.create_table,
+ overwrite_table: self.overwrite_table,
+ }
+ }
+}
+
+async fn create_table(pool: &PgPool, table: &str) {
+ let queries = include_str!("../schema/fantoir.sql")
+ .replace("/*table*/fantoir", table)
+ .replace("/*index*/index_fantoir_", format!("index_{}_", table).as_ref());
+
+ run_multiple_queries(pool, &queries).await;
+}
+
pub async fn import(args: &ImportArgs, database_url: &str) {
let fd = File::open(&args.fantoir_file).await.expect("Can't open file.");
let pool = connect_to_db(database_url).await;
// Create/truncate table as needed and as allowed by options
- if let Err(error) = initialize_table(args, &pool).await {
+ let callback = async {
+ create_table(&pool, &args.fantoir_table).await;
+ };
+ if let Err(error) = initialize_table(&pool, callback, args).await {
eprintln!("{}", &error);
exit(1);
}
@@ -46,39 +67,3 @@
.await
}
}
-
-async fn initialize_table(args: &ImportArgs, pool: &PgPool) -> Result<(), String> {
- if is_table_exists(pool, &args.fantoir_table).await {
- if is_table_empty(&pool, &args.fantoir_table).await {
- return Ok(());
- }
-
- if args.overwrite_table {
- truncate_table(&pool, &args.fantoir_table).await;
- return Ok(());
- }
-
- return Err(format!(
- "Table {} already exists and contains rows. To overwrite it, run the import tool with -t option.",
- &args.fantoir_table
- ));
- }
-
- if args.create_table {
- create_table(&pool, &args.fantoir_table).await;
- return Ok(());
- }
-
- Err(format!(
- "Table {} doesn't exist. To create it, run the import tool with -c option.",
- &args.fantoir_table
- ))
-}
-
-async fn create_table(pool: &PgPool, table: &str) {
- let queries = include_str!("../schema/fantoir.sql")
- .replace("/*table*/fantoir", table)
- .replace("/*index*/index_fantoir_", format!("index_{}_", table).as_ref());
-
- run_multiple_queries(pool, &queries).await;
-}
diff --git a/src/commands/mod.rs b/src/commands/mod.rs
--- a/src/commands/mod.rs
+++ b/src/commands/mod.rs
@@ -3,3 +3,4 @@
pub(crate) mod import;
pub(crate) mod promote;
pub(crate) mod query;
+pub(crate) mod wikidata;
diff --git a/src/commands/promote/mod.rs b/src/commands/promote/mod.rs
--- a/src/commands/promote/mod.rs
+++ b/src/commands/promote/mod.rs
@@ -1,23 +1,28 @@
//! Command to promote a table as the one to use.
use sqlx::PgPool;
-use crate::db::{connect_to_db, run_multiple_queries_groups};
+use crate::commands::wikidata::WIKIDATA_TABLE;
+use crate::db::*;
/// Promotes a FANTOIR table as the relevant version to use
pub async fn promote (fantoir_table: &str, database_url: &str) {
let pool = connect_to_db(database_url).await;
- let queries_groups = get_queries_groups(&pool, fantoir_table);
+ let queries_groups = get_queries_groups(&pool, fantoir_table).await;
run_multiple_queries_groups(&pool, &queries_groups);
}
/// Determines the groups of queries needed for promotion
-fn get_queries_groups (pool: &PgPool, fantoir_table: &str) -> Vec<String> {
+async fn get_queries_groups (pool: &PgPool, fantoir_table: &str) -> Vec<String> {
let mut queries_groups = vec![
include_str!("../../schema/promote/config.sql"),
include_str!("../../schema/promote/fantoir_view.sql"),
];
+ if is_table_exists(pool, WIKIDATA_TABLE).await {
+ queries_groups.push(include_str!("../../schema/promote/wikidata.sql"));
+ }
+
queries_groups
.into_iter()
.map(|queries| queries
diff --git a/src/commands/wikidata/mod.rs b/src/commands/wikidata/mod.rs
new file mode 100644
--- /dev/null
+++ b/src/commands/wikidata/mod.rs
@@ -0,0 +1,213 @@
+//! Query Wikidata SPARQL end-point and import result into PostgreSQL
+
+mod qualification;
+
+use std::collections::HashMap;
+use std::process::exit;
+use oxrdf::Term;
+use sqlx::PgPool;
+
+use crate::db::*;
+use crate::{sparql, WikidataArgs};
+use crate::commands::wikidata::qualification::determine_p31_winner;
+use crate::fantoir::{fix_fantoir_code, FixedFantoirCode};
+use crate::services::query::search_fantoir_code;
+use crate::sparql::{is_term_empty, parse_literal, parse_term_uri};
+
+pub static WIKIDATA_TABLE: &str = "fantoir_wikidata";
+
+/* -------------------------------------------------------------
+ Import task
+ - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - */
+
+pub async fn import (args: &WikidataArgs, database_url: &str) {
+ let pool = connect_to_db(database_url).await;
+
+ // Create/truncate table as needed and as allowed by options
+ let callback = async {
+ let queries = include_str!("../../schema/wikidata.sql");
+ run_multiple_queries(&pool, &queries).await;
+ };
+ if let Err(error) = initialize_table(&pool, callback, args).await {
+ eprintln!("{}", &error);
+ exit(1);
+ }
+
+ // Query Wikidata
+ let client = sparql::Client::new("https://query.wikidata.org/sparql");
+
+ let mut what_map = HashMap::new();
+ client.query(include_str!("../../queries/wikidata.sparql"))
+ .await
+ .iter()
+ .filter(|entry| !is_term_empty(&entry["code_fantoir"]))
+ .for_each(|entry| {
+ // Build a map of the different P31 (instance of) values for a specified code.
+
+ let key = WikidataEntryKey::parse(entry);
+ let what = parse_wikidata_entity_uri(&entry["what"]).expect("Can't parse P31 what result");
+
+ what_map
+ .entry(key)
+ .or_insert(Vec::new())
+ .push(what);
+ });
+
+ // Consolidate entries and insert them into the database.
+ // To avoid an async closure, we don't use HOF pattern.
+ for (key, candidates) in what_map {
+ if let Some(entry) = WikidataEntry::consolidate_set(&pool, &key, candidates).await {
+ entry.insert_to_db(&pool).await;
+ continue;
+ }
+
+ eprintln!();
+ eprintln!("Can't insert Wikidata information for the following entry:");
+ eprintln!("{:?}", &key);
+ eprintln!("Can't resolve FANTOIR code.");
+ }
+
+}
+
+/* -------------------------------------------------------------
+ Arguments parsing
+ - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - */
+
+impl ToTableInitializationArgs for &WikidataArgs {
+ fn to_table_initialization_args(&self) -> TableInitializationArgs {
+ TableInitializationArgs {
+ table_name: String::from(WIKIDATA_TABLE),
+ create_table: self.create_table,
+ overwrite_table: self.overwrite_table,
+ }
+ }
+}
+
+/* -------------------------------------------------------------
+ Wikidata entry structures
+
+ WikidataEntry represents the data ready to be inserted
+ in our database.
+
+ WikidataEntryKey is a subset of WikidataEntry to identify
+ a set (FANTOIR code, Wikidata item) to be used as HashMap key
+ when a SPARQL query returns several rows for such set.
+
+ For example, here, we ask for P31 values, and if a Wikidata
+ entity offers several P31 values, we'll get one row per value.
+ - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - */
+
+#[derive(Debug, Clone)]
+struct WikidataEntry {
+ code_fantoir: String,
+ code_fantoir_wikidata: String,
+ item: String,
+ item_label: String,
+ what: String,
+}
+
+#[derive(Debug, Clone, Eq, PartialEq, Hash)]
+struct WikidataEntryKey {
+ code_fantoir_wikidata: String,
+ item: String,
+ item_label: String,
+}
+
+impl WikidataEntryKey {
+ fn parse(entry: &HashMap<String, Term>) -> Self {
+ Self {
+ code_fantoir_wikidata: parse_literal(&entry["code_fantoir"]).expect("Can't parse code"),
+ item: parse_wikidata_entity_uri(&entry["item"]).expect("Can't parse item"),
+ item_label: parse_literal(&entry["itemLabel"]).expect("Can't parse item label"),
+ }
+ }
+}
+
+impl WikidataEntry {
+ async fn consolidate_set(pool: &PgPool, key: &WikidataEntryKey, what_candidates: Vec<String>) -> Option<Self> {
+ let what = determine_p31_winner(&what_candidates);
+
+ let code_fantoir = match fix_fantoir_code(&key.code_fantoir_wikidata) {
+ FixedFantoirCode::Computed(code) => code,
+ FixedFantoirCode::ToSearch { code_insee, identifiant_communal_voie } => {
+ search_fantoir_code(pool, &code_insee, &identifiant_communal_voie).await?
+ }
+ };
+
+ Some(Self {
+ code_fantoir,
+ code_fantoir_wikidata: key.code_fantoir_wikidata.clone(),
+ item: key.item.clone(),
+ item_label: key.item_label.clone(),
+ what,
+ })
+ }
+
+ async fn insert_to_db (&self, pool: &PgPool) {
+ let mut query = format!("INSERT INTO {}", WIKIDATA_TABLE);
+ query.push_str(
+ r#"
+ (code_fantoir, code_fantoir_wikidata, item, item_label, what)
+ VALUES
+ ($1, $2, $3, $4, $5)"#
+ );
+
+ if let Err(error) = sqlx::query(&query)
+ .bind(&self.code_fantoir)
+ .bind(&self.code_fantoir_wikidata)
+ .bind(&self.item)
+ .bind(&self.item_label)
+ .bind(&self.what)
+
+ .execute(pool)
+ .await {
+ eprintln!();
+ eprintln!("Can't insert Wikidata information for the following entry:");
+ eprintln!("{:?}", self);
+ eprintln!("{}", error);
+ }
+ }
+}
+
+/* -------------------------------------------------------------
+ Wikidata helper methods
+ - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - */
+
+/// Parses a Wikidata entity URI from a RDF term.
+///
+/// For example, to parse a term representing Q1234:
+///
+/// ```
+/// let term = Term::NamedNode(
+/// NamedNode::new("http://www.wikidata.org/entity/Q1234").unwrap()
+/// );
+/// let entity = parse_wikidata_entity_uri(&term).unwrap();
+///
+/// assert_eq!("Q1234", &entity);
+/// ```
+pub fn parse_wikidata_entity_uri (term: &Term) -> Option<String> {
+ parse_term_uri(term)
+ .map(|uri| {
+ let pos = uri.rfind('/').expect("URI doesn't contain any /") + 1;
+
+ uri[pos..].to_string()
+ })
+}
+
+/* -------------------------------------------------------------
+ Tests
+ - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - */
+
+#[cfg(test)]
+mod tests {
+ use oxrdf::NamedNode;
+ use super::*;
+
+ #[test]
+ pub fn test_parse_wikidata_entity_uri () {
+ let node = NamedNode::new("http://www.wikidata.org/entity/Q849777").unwrap();
+ let term = Term::NamedNode(node);
+
+ assert_eq!("Q849777", &parse_wikidata_entity_uri(&term).unwrap());
+ }
+}
diff --git a/src/commands/wikidata/qualification.rs b/src/commands/wikidata/qualification.rs
new file mode 100644
--- /dev/null
+++ b/src/commands/wikidata/qualification.rs
@@ -0,0 +1,96 @@
+//! Helper for items qualification.
+//!
+//! Wikidata uses the P31 "instance of" property to qualify items,
+//! which is helpful to identify voies, especially the pseudo-voies
+//! not furthermore described in FANTOIR.
+
+use lazy_static::lazy_static;
+
+lazy_static! {
+ static ref P31_WINNERS: Vec<&'static str> = vec![
+ // Important values
+
+ "Q928830", // metro station
+ "Q18615527", // tram bridge
+ "Q1793804", // station de RER
+ "Q55488", // gare ferroviaire
+ "Q55485", // gare ferroviaire en cul-de-sac
+
+ "Q510662", // ring road
+ "Q2376564", // échangeur autoroutier
+
+ // Less important values, as probably already qualified by FANTOIR
+
+ "Q3558430", // villa, a name used for Paris private roads
+ "Q15070223", // cité, same thing
+
+ "Q207934", // allée
+ "Q54114", // boulevard
+ "Q99228502", // avenue (a road called avenue, not matching the avenue concept)
+ "Q7543083", // avenue (a true one)
+ "Q283977", // parvis
+ "Q174782", // place
+ "Q164419", // galerie
+
+ "Q12731", // impasse, shoud lose against avenue (some Paris avenues are so qualified)
+ "Q13634881", // passage
+ "Q1251403", // ruelle
+ "Q3840711", // quai
+ "Q88372", // esplanade, should win against jardin public
+ "Q787113", // promenade
+ "Q17383262", // cour
+ "Q1068842", // passerelle
+ "Q641406", // terrasse
+ "Q16634966", // escalier
+ "Q628179", // sentier
+ "Q5004679", // chemin
+ "Q3352369", // chemin piétonnier
+
+ "Q1529", // rond-point
+ "Q1525", // carrefour giratoire
+
+ "Q4421", // forêt, used for bois de Boulogne, bois de Vincennes
+ "Q22698", // parc
+ "Q2026833", // square, type jardin public
+ "Q22746", // jardin public
+ "Q3215290", // lac artificiel
+
+ "Q12280", // pont, should lost against place (large places at Paris are also bridges)
+ "Q158438", // pont en arc
+ "Q537127", // pont routier
+ "Q1440300", // tour d'observation
+
+ "Q16560", // palais
+ "Q2080521", // halle
+ "Q16917", // hôpital
+
+ // Those values are probably too generic, so they're kept in last
+
+ "Q1302778", // voie rapide
+ "Q79007", // street, wins against road but loses against boulevard
+ "Q83620", // voie de communication
+ ];
+}
+
+/// Determine amongst a sets of items which one is the more relevant
+/// to describe a pseudo-voie.
+///
+/// This is useful when a Wikidata entity has several values for P31
+/// to decide which one is the most interesting to keep in our context.
+pub fn determine_p31_winner(candidates: &Vec<String>) -> String {
+ if candidates.len() == 1 {
+ // If there is only one, that's the one to use.
+ return candidates[0].clone();
+ }
+
+ for winner_candidate in P31_WINNERS.iter() {
+ for actual_candidate in candidates {
+ if winner_candidate == actual_candidate {
+ return actual_candidate.clone();
+ }
+ }
+ }
+
+ eprintln!("Can't determine P31 winner amongst {:?}, {} is picked.", candidates, candidates[0]);
+ candidates[0].clone()
+}
diff --git a/src/db.rs b/src/db.rs
--- a/src/db.rs
+++ b/src/db.rs
@@ -3,12 +3,24 @@
//! This module provides helpers to interact with a PostgreSQL database.
//! Functions expect to work with an executor from sqlx crate.
+use std::future::Future;
+
use async_scoped::TokioScope;
use sqlx::PgPool;
use sqlx::postgres::PgPoolOptions;
static QUERIES_SEPARATOR: &str = "\n\n\n";
+pub struct TableInitializationArgs {
+ pub table_name: String,
+ pub create_table: bool,
+ pub overwrite_table: bool,
+}
+
+pub trait ToTableInitializationArgs {
+ fn to_table_initialization_args(&self) -> TableInitializationArgs;
+}
+
pub async fn connect_to_db (database_url: &str) -> PgPool {
PgPoolOptions::new()
.max_connections(3)
@@ -62,6 +74,41 @@
.expect("Can't truncate table.");
}
+pub async fn initialize_table<F, T> (
+ pool: &PgPool,
+ callback: F,
+ args: T
+) -> Result<(), String>
+ where F: Future, T: ToTableInitializationArgs
+{
+ let args = args.to_table_initialization_args();
+ if is_table_exists(pool, &args.table_name).await {
+ if is_table_empty(&pool, &args.table_name).await {
+ return Ok(());
+ }
+
+ if args.overwrite_table {
+ truncate_table(&pool, &args.table_name).await;
+ return Ok(());
+ }
+
+ return Err(format!(
+ "Table {} already exists and contains rows. To overwrite it, run the import tool with -t option.",
+ &args.table_name
+ ));
+ }
+
+ if args.create_table {
+ callback.await;
+ return Ok(());
+ }
+
+ Err(format!(
+ "Table {} doesn't exist. To create it, run the import tool with -c option.",
+ &args.table_name
+ ))
+}
+
pub async fn run_multiple_queries(pool: &PgPool, queries: &str) {
for query in queries.split(QUERIES_SEPARATOR) {
sqlx::query(&query)
diff --git a/src/fantoir.rs b/src/fantoir.rs
--- a/src/fantoir.rs
+++ b/src/fantoir.rs
@@ -3,9 +3,20 @@
//! This module offers a structure for a FANTOIR record, methods to parse the file and export it.
//! Database functions expect to work with an executor from sqlx crate.
+use lazy_static::lazy_static;
use sqlx::PgPool;
use sqlx::types::chrono::NaiveDate;
+lazy_static! {
+ static ref DEPARTMENTS_WITH_CODE_DIRECTION: Vec<&'static str> = vec!["13", "59", "75", "92", "97"];
+
+ /// The alphabet without I O and Q.
+ static ref RIVOLI_STRING: Vec<char> = vec![
+ 'A', 'B', 'C', 'D', 'E', 'F', 'G', 'H', 'J', 'K', 'L', 'M',
+ 'N', 'P', 'R', 'S', 'T', 'U', 'V', 'W', 'X', 'Y', 'Z'
+ ];
+}
+
/// A voie in the FANTOIR database
#[derive(Debug)]
pub struct FantoirEntry {
@@ -158,6 +169,95 @@
}
}
+/// A fixed FANTOIR code result
+#[derive(Debug, Eq, PartialEq)]
+pub enum FixedFantoirCode {
+ /// The code has been fully computed
+ Computed(String),
+
+ /// Information needed to query the code has been extracted, but code direction is unknown
+ /// Such result can be queried through search_code_fantoir()
+ ToSearch { code_insee: String, identifiant_communal_voie: String },
+}
+
+/// Transforms FANTOIR code from BAN into regular FANTOIR codes.
+/// BAN sometimes uses <insee code>_<identifiant voie commune> without Rivoli key.
+pub fn fix_fantoir_code(code: &str) -> FixedFantoirCode {
+ let mut code = code.to_string();
+
+ if code.contains("_") {
+ // 97231_B026 -> 972231B026
+ code = if code.starts_with("97") {
+ // Code direction = department last digit
+ format!("{}{}{}", &code[0..=2], &code[2..5], &code[6..])
+ } else if uses_specific_code_direction(&code) {
+ // We can't fix it by computation, we need to search it in the database
+ return FixedFantoirCode::ToSearch {
+ code_insee: code[0..5].to_string(),
+ identifiant_communal_voie: code[6..10].to_string(),
+ }
+ } else {
+ // Code direction = 0
+ format!("{}0{}{}", &code[0..=2], &code[3..5], &code[6..])
+ };
+ }
+
+ if code.len() == 10 {
+ let last_char = code.chars().last().unwrap();
+
+ match last_char {
+ '0'..='9' => {
+ code.push(compute_rivoli_key(&code));
+ }
+
+ 'A'..='Z' => {
+ // 441090516U -> 4401090516U
+ code = if uses_specific_code_direction(&code) {
+ // We can't fix it by computation, we need to search it in the database
+ // 920514135A -> 92051 4135
+ return FixedFantoirCode::ToSearch {
+ code_insee: code[0..5].to_string(),
+ identifiant_communal_voie: code[5..9].to_string(),
+ }
+ } else {
+ format!("{}0{}", &code[0..2], &code[2..])
+ };
+ }
+
+ _ => unreachable!(),
+ }
+ }
+
+ FixedFantoirCode::Computed(code)
+}
+
+pub fn uses_specific_code_direction (code: &str) -> bool {
+ DEPARTMENTS_WITH_CODE_DIRECTION
+ .iter()
+ .any(|&dpt| code.starts_with(dpt))
+}
+
+pub fn compute_rivoli_key (code: &str) -> char {
+ // See https://georezo.net/forum/viewtopic.php?id=102292
+
+ if code.starts_with("2A") || code.starts_with("2B") {
+ // 2A would be 2 10 and 2B would be 2 11, but how to build a number to multiply by 19?
+ unimplemented!()
+ }
+
+ let part_commune: i32 = code[0..6].parse().unwrap();
+ let type_voie = code.chars().nth(6).unwrap();
+ let type_voie = if type_voie.is_alphabetic() {
+ type_voie as u32 - 55
+ } else {
+ type_voie.to_digit(10).unwrap()
+ };
+ let numero_identifiant_communal_voie: i32 = code[7..].parse().unwrap();
+
+ let index = (part_commune * 19 + type_voie as i32 * 11 + numero_identifiant_communal_voie) % 23;
+ return RIVOLI_STRING[index as usize];
+}
+
#[cfg(test)]
mod tests {
// Note this useful idiom: importing names from outer (for mod tests) scope.
@@ -166,7 +266,7 @@
#[test]
fn test_parse_fantoir_date() {
let expected = NaiveDate::from_ymd_opt(1987, 1, 1).unwrap();
- let actual = parse_fantoir_date("1987001");
+ let actual = parse_fantoir_date("1987001").unwrap();
assert_eq!(expected, actual);
}
@@ -189,4 +289,44 @@
fn test_parse_optional_string_when_only_spaces() {
assert_eq!(true, parse_optional_string(" ").is_none());
}
+
+ #[test]
+ pub fn test_fix_fantoir_code () {
+ assert_fixed_fantoir_code("755112P144L", fix_fantoir_code("755112P144L"));
+ assert_fixed_fantoir_code("972231B026U", fix_fantoir_code("97231_B026"));
+ assert_fixed_fantoir_code("4401090516U", fix_fantoir_code("441090516U"));
+ assert_fixed_fantoir_code("972222B305L", fix_fantoir_code("972222B305"));
+ }
+
+ fn assert_fixed_fantoir_code (expected: &str, actual: FixedFantoirCode) {
+ match actual {
+ FixedFantoirCode::Computed(code) => {
+ assert_eq!(expected, &code);
+ },
+ _ => assert!(false, "Expected a computed FANTOIR code")
+ }
+ }
+
+ #[test]
+ pub fn test_fix_fantoir_code_when_it_cannot_be_computed () {
+ let expected = FixedFantoirCode::ToSearch {
+ code_insee: "92002".to_string(),
+ identifiant_communal_voie: "5130".to_string()
+ };
+
+ assert_eq!(expected, fix_fantoir_code("920025130X"), "As code direction can't be computed, this code should be to search");
+ assert_eq!(expected, fix_fantoir_code("92002_5130"), "As code direction can't be computed, this code should be to search");
+ }
+
+
+ #[test]
+ pub fn test_compute_rivoli_key() {
+ assert_eq!('W', compute_rivoli_key("380003B001"));
+ assert_eq!('U', compute_rivoli_key("972231B026"));
+ }
+
+ #[test]
+ pub fn test_compute_rivoli_key_with_type_voie_zero() {
+ assert_eq!('C', compute_rivoli_key("9722230261"));
+ }
}
diff --git a/src/main.rs b/src/main.rs
--- a/src/main.rs
+++ b/src/main.rs
@@ -8,6 +8,7 @@
mod db;
mod fantoir;
mod services;
+mod sparql;
#[derive(Debug, Parser)]
#[command(name = "fantoir-datasource")]
@@ -21,8 +22,10 @@
#[command(arg_required_else_help = true)]
Promote(PromoteArgs),
+ /// Query Wikidata SPARQL end-point to enrich FANTOIR information
+ Wikidata(WikidataArgs),
+
/// Query the imported FANTOIR table
- #[command(arg_required_else_help = true)]
Query(QueryArgs)
}
@@ -50,6 +53,18 @@
fantoir_table: String,
}
+#[derive(Debug, Args)]
+pub struct WikidataArgs {
+ /// Create table if it doesn't exist
+ #[arg(short = 'c')]
+ create_table: bool,
+
+ /// Truncate table if it already exists, allowing the overwrite mode.
+ /// If not specified, the script will fail if table exists.
+ #[arg(short = 't')]
+ overwrite_table: bool,
+}
+
#[derive(Debug, Args)]
#[clap(trailing_var_arg=true)]
pub struct QueryArgs {
@@ -79,6 +94,9 @@
FantoirCommand::Promote(args) => {
promote(&args.fantoir_table, &database_url).await;
},
+ FantoirCommand::Wikidata(args) => {
+ commands::wikidata::import(&args, &database_url).await
+ },
FantoirCommand::Query(args) => {
commands::query::search(args, &database_url).await
},
diff --git a/src/queries/wikidata.sparql b/src/queries/wikidata.sparql
new file mode 100644
--- /dev/null
+++ b/src/queries/wikidata.sparql
@@ -0,0 +1,12 @@
+PREFIX bd: <http://www.bigdata.com/rdf#>
+PREFIX wikibase: <http://wikiba.se/ontology#>
+PREFIX wdt: <http://www.wikidata.org/prop/direct/>
+
+# Streets with FANTOIR code
+SELECT DISTINCT ?code_fantoir ?item ?itemLabel ?what
+WHERE
+{
+ ?item wdt:P3182 ?code_fantoir .
+ ?item wdt:P31 ?what
+ SERVICE wikibase:label { bd:serviceParam wikibase:language "fr". }
+}
diff --git a/src/schema/promote/wikidata.sql b/src/schema/promote/wikidata.sql
new file mode 100644
--- /dev/null
+++ b/src/schema/promote/wikidata.sql
@@ -0,0 +1,7 @@
+alter table fantoir_wikidata
+ drop constraint if exists fantoir_wikidata_code_fantoir_fk;
+
+
+alter table fantoir_wikidata
+ add constraint fantoir_wikidata_code_fantoir_fk
+ foreign key (code_fantoir) references /*table*/fantoir (code_fantoir);
diff --git a/src/schema/wikidata.sql b/src/schema/wikidata.sql
new file mode 100644
--- /dev/null
+++ b/src/schema/wikidata.sql
@@ -0,0 +1,29 @@
+-- This table matches Wikidata entities and FANTOIR codes.
+--
+-- If you provide several instructions, separate those with TWO blank lines.
+-- Indexes have to match every WHERE clause used against the database.
+--
+-- This schema is compiled as part of the program, as such you need to rebuild
+-- (`cargo build`) the project after any schema modification.
+
+CREATE TABLE IF NOT EXISTS /*table*/fantoir_wikidata
+(
+ -- Identifiers
+ code_fantoir char(11) NOT NULL
+ constraint /*index*/index_fantoir_wikidata_pk
+ primary key,
+ code_fantoir_wikidata char(11) NOT NULL,
+
+ -- Wikidata information
+ item varchar(12) NOT NULL,
+ item_label text,
+ what varchar(12) NOT NULL,
+
+ -- Constraints
+ UNIQUE (code_fantoir_wikidata)
+);
+
+
+CREATE INDEX CONCURRENTLY /*index*/index_fantoir_wikidata_voie_trigram
+ ON /*table*/fantoir_wikidata
+ USING gin (item_label gin_trgm_ops);
diff --git a/src/services/http_client.rs b/src/services/http_client.rs
new file mode 100644
--- /dev/null
+++ b/src/services/http_client.rs
@@ -0,0 +1,56 @@
+use lazy_static::lazy_static;
+
+use reqwest::{Client as ReqwestClient, ClientBuilder, Error, IntoUrl, Response};
+use reqwest::header::HeaderMap;
+
+/* -------------------------------------------------------------
+ User agent
+
+ The USER_AGENT variable is computed at build time.
+ - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - */
+
+lazy_static! {
+ pub static ref USER_AGENT: String = format!(
+ "{}/{} (https://databases.nasqueron.org/)",
+ env!("CARGO_PKG_NAME"), env!("CARGO_PKG_VERSION")
+ );
+}
+
+pub fn get_user_agent () -> &'static str {
+ &USER_AGENT
+}
+
+/* -------------------------------------------------------------
+ HTTP client
+ - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - */
+
+pub struct Client {
+ client: ReqwestClient,
+}
+
+impl Client {
+ pub fn new(headers: Option<HeaderMap>) -> Self {
+ let headers = headers
+ .unwrap_or(HeaderMap::new());
+
+ let client = ClientBuilder::new()
+ .user_agent(get_user_agent())
+ .default_headers(headers)
+ .gzip(true)
+ .deflate(true)
+ .build()
+ .expect("Can't build HTTP client");
+
+ Self {
+ client,
+ }
+ }
+
+ pub async fn get<T>(&self, url: T) -> Result<Response, Error>
+ where T: IntoUrl {
+ self.client
+ .get(url)
+ .send()
+ .await
+ }
+}
diff --git a/src/services/mod.rs b/src/services/mod.rs
--- a/src/services/mod.rs
+++ b/src/services/mod.rs
@@ -1 +1,2 @@
pub mod query;
+pub mod http_client;
diff --git a/src/sparql.rs b/src/sparql.rs
new file mode 100644
--- /dev/null
+++ b/src/sparql.rs
@@ -0,0 +1,108 @@
+//! # SPARQL client
+
+use std::collections::HashMap;
+use std::io::BufRead;
+
+use oxrdf::Term;
+use crate::services::http_client::Client as HttpClient;
+use reqwest::header::{HeaderMap, HeaderValue};
+use reqwest::Url;
+use sparesults::{QueryResultsFormat, QueryResultsParser, QueryResultsReader, QuerySolution, SolutionsReader};
+
+type SparqlResult = HashMap<String, Term>;
+
+pub struct Client {
+ pub endpoint: String,
+ http_client: HttpClient,
+}
+
+impl Client {
+ pub fn new (endpoint: &str) -> Self {
+ let mut headers = HeaderMap::new();
+ headers.insert("Accept", HeaderValue::from_static("Accept: application/sparql-results+xml"));
+
+ Self {
+ endpoint: String::from(endpoint),
+ http_client: HttpClient::new(Some(headers)),
+ }
+ }
+
+ pub async fn query (&self, query: &str) -> Vec<SparqlResult> {
+ let url = Url::parse_with_params(&self.endpoint, &[("query", query)])
+ .expect("Can't parse endpoint as absolute URL.");
+
+ let query_results = self.http_client.get(url).await
+ .expect("Can't query endpoint")
+ .text().await
+ .expect("End-point didn't return a reply.");
+
+ parse_sparql_solutions_results(&query_results)
+ .expect("Can't parse SPARQL result as a solution.")
+ }
+}
+
+pub fn parse_sparql_solutions_results (query_results: &str) -> Option<Vec<SparqlResult>> {
+ let results_reader = get_query_results_xml_reader(query_results.as_bytes());
+
+ match results_reader {
+ QueryResultsReader::Solutions(solutions) => Some(parse_sparql_solutions(solutions)),
+ QueryResultsReader::Boolean(_) => None,
+ }
+}
+
+fn get_query_results_xml_reader<T>(reader: T) -> QueryResultsReader<T>
+ where T: BufRead
+{
+ QueryResultsParser::from_format(QueryResultsFormat::Xml)
+ .read_results(reader)
+ .expect("Can't read SPARQL results")
+}
+
+fn parse_sparql_solutions<T> (solutions: SolutionsReader<T>) -> Vec<SparqlResult>
+ where T: BufRead
+{
+ solutions
+ .map(|solution| {
+ parse_sparql_result(
+ solution.expect("Can't read solution")
+ )
+ })
+ .collect()
+}
+
+pub fn parse_sparql_result (solution: QuerySolution) -> SparqlResult {
+ solution
+ .iter()
+ .map(|(variable, term)| (
+ variable.as_str().to_string(),
+ term.clone(),
+ ))
+ .collect()
+}
+
+pub fn parse_term_uri (term: &Term) -> Option<String> {
+ if let Term::NamedNode(node) = term {
+ Some(node.as_str().to_string())
+ } else {
+ None
+ }
+}
+
+pub fn parse_literal (term: &Term) -> Option<String> {
+ if let Term::Literal(literal) = term {
+ Some(literal.value().to_string())
+ } else {
+ None
+ }
+}
+
+pub fn is_term_empty(term: &Term) -> bool {
+ match term {
+ Term::NamedNode(node) => {
+ // Special values IRI are considered as empty values.
+ node.as_str().contains("/.well-known/genid/")
+ }
+ Term::BlankNode(_) => true,
+ Term::Literal(_) => false,
+ }
+}
File Metadata
Details
Attached
Mime Type
text/plain
Expires
Fri, Dec 20, 06:10 (20 h, 56 m)
Storage Engine
blob
Storage Format
Raw Data
Storage Handle
2307646
Default Alt Text
D2731.id6945.diff (31 KB)
Attached To
Mode
D2731: Query Wikidata to enrich FANTOIR file
Attached
Detach File
Event Timeline
Log In to Comment