Page MenuHomeDevCentral

D2731.id6922.diff
No OneTemporary

D2731.id6922.diff

diff --git a/Cargo.toml b/Cargo.toml
--- a/Cargo.toml
+++ b/Cargo.toml
@@ -9,11 +9,18 @@
license = "BSD-2-Clause"
[dependencies]
+lazy_static = "~1.4.0"
+oxrdf = "~0.1.1"
+sparesults = "~0.1.3"
[dependencies.clap]
version = "~4.0.32"
features = ["derive"]
+[dependencies.reqwest]
+version = "~0.11.13"
+features = ["gzip", "deflate"]
+
[dependencies.sqlx]
version = "~0.6.2"
features = ["runtime-tokio-native-tls", "postgres", "chrono"]
diff --git a/src/commands/import.rs b/src/commands/import.rs
--- a/src/commands/import.rs
+++ b/src/commands/import.rs
@@ -12,12 +12,33 @@
use crate::db::*;
use crate::fantoir::FantoirEntry;
+impl ToTableInitializationArgs for &ImportArgs {
+ fn to_table_initialization_args (&self) -> TableInitializationArgs {
+ TableInitializationArgs {
+ table_name: self.fantoir_table.clone(),
+ create_table: self.create_table,
+ overwrite_table: self.overwrite_table,
+ }
+ }
+}
+
+async fn create_table(pool: &PgPool, table: &str) {
+ let queries = include_str!("../schema/fantoir.sql")
+ .replace("/*table*/fantoir", table)
+ .replace("/*index*/index_fantoir_", format!("index_{}_", table).as_ref());
+
+ run_multiple_queries(pool, &queries).await;
+}
+
pub async fn import(args: &ImportArgs, database_url: &str) {
let fd = File::open(&args.fantoir_file).await.expect("Can't open file.");
let pool = connect_to_db(database_url).await;
// Create/truncate table as needed and as allowed by options
- if let Err(error) = initialize_table(args, &pool).await {
+ let callback = async {
+ create_table(&pool, &args.fantoir_table).await;
+ };
+ if let Err(error) = initialize_table(&pool, callback, args).await {
eprintln!("{}", &error);
exit(1);
}
@@ -46,39 +67,3 @@
.await
}
}
-
-async fn initialize_table(args: &ImportArgs, pool: &PgPool) -> Result<(), String> {
- if is_table_exists(pool, &args.fantoir_table).await {
- if is_table_empty(&pool, &args.fantoir_table).await {
- return Ok(());
- }
-
- if args.overwrite_table {
- truncate_table(&pool, &args.fantoir_table).await;
- return Ok(());
- }
-
- return Err(format!(
- "Table {} already exists and contains rows. To overwrite it, run the import tool with -t option.",
- &args.fantoir_table
- ));
- }
-
- if args.create_table {
- create_table(&pool, &args.fantoir_table).await;
- return Ok(());
- }
-
- Err(format!(
- "Table {} doesn't exist. To create it, run the import tool with -c option.",
- &args.fantoir_table
- ))
-}
-
-async fn create_table(pool: &PgPool, table: &str) {
- let queries = include_str!("../schema/fantoir.sql")
- .replace("/*table*/fantoir", table)
- .replace("/*index*/index_fantoir_", format!("index_{}_", table).as_ref());
-
- run_multiple_queries(pool, &queries).await;
-}
diff --git a/src/commands/mod.rs b/src/commands/mod.rs
--- a/src/commands/mod.rs
+++ b/src/commands/mod.rs
@@ -1,3 +1,5 @@
//! Commands for the fantoir2db tool.
pub(crate) mod import;
+pub(crate) mod wikidata;
+pub(crate) mod update_foreign_keys;
diff --git a/src/commands/update_foreign_keys.rs b/src/commands/update_foreign_keys.rs
new file mode 100644
--- /dev/null
+++ b/src/commands/update_foreign_keys.rs
@@ -0,0 +1,12 @@
+//! Update foreign keys relations between tables
+
+use crate::db::*;
+
+pub async fn update (database_url: &str, fantoir_table: &str) {
+ let pool = connect_to_db(database_url).await;
+
+ let queries = include_str!("../schema/foreign_keys.sql")
+ .replace("/*table*/fantoir", fantoir_table);
+
+ run_multiple_queries(&pool, &queries).await;
+}
diff --git a/src/commands/wikidata/mod.rs b/src/commands/wikidata/mod.rs
new file mode 100644
--- /dev/null
+++ b/src/commands/wikidata/mod.rs
@@ -0,0 +1,229 @@
+//! Query Wikidata SPARQL end-point and import result into PostgreSQL
+
+mod qualification;
+
+use std::collections::HashMap;
+use std::process::exit;
+use oxrdf::Term;
+use sqlx::{Execute, PgPool};
+
+use crate::db::*;
+use crate::{sparql, WikidataArgs};
+use crate::commands::wikidata::qualification::determine_p31_winner;
+use crate::fantoir::{fix_fantoir_code, FixedFantoirCode};
+use crate::services::http_client::get_user_agent;
+use crate::sparql::{is_term_empty, parse_literal, parse_term_uri};
+
+static WIKIDATA_TABLE: &str = "fantoir_wikidata";
+
+/* -------------------------------------------------------------
+ Import task
+ - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - */
+
+pub async fn import (args: &WikidataArgs, database_url: &str) {
+ let pool = connect_to_db(database_url).await;
+
+ // Create/truncate table as needed and as allowed by options
+ let callback = async {
+ let queries = include_str!("../../schema/wikidata.sql");
+ run_multiple_queries(&pool, &queries).await;
+ };
+ if let Err(error) = initialize_table(&pool, callback, args).await {
+ eprintln!("{}", &error);
+ exit(1);
+ }
+
+ // Query Wikidata
+ let client = sparql::Client::new("https://query.wikidata.org/sparql", get_user_agent());
+
+ let mut what_map = HashMap::new();
+ client.query(include_str!("../../queries/wikidata.sparql"))
+ .await
+ .iter()
+ .filter(|entry| !is_term_empty(&entry["code_fantoir"]))
+ .for_each(|entry| {
+ // Build a map of the different P31 (instance of) values for a specified code.
+
+ let key = WikidataEntryKey::parse(entry);
+ let what = parse_wikidata_entity_uri(&entry["what"]).expect("Can't parse P31 what result");
+
+ what_map
+ .entry(key)
+ .or_insert(Vec::new())
+ .push(what);
+ });
+
+ // Consolidate entries and insert them into the database.
+ // To avoid a async closure, we need to stop the HOF in the map.
+ let entries = what_map
+ .into_iter()
+ .map(|(key, candidates)| WikidataEntry::consolidate(key, candidates));
+
+ for entry in entries {
+ entry
+ .insert_to_db(&pool)
+ .await
+ }
+
+}
+
+/* -------------------------------------------------------------
+ Arguments parsing
+ - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - */
+
+impl ToTableInitializationArgs for &WikidataArgs {
+ fn to_table_initialization_args(&self) -> TableInitializationArgs {
+ TableInitializationArgs {
+ table_name: String::from(WIKIDATA_TABLE),
+ create_table: self.create_table,
+ overwrite_table: self.overwrite_table,
+ }
+ }
+}
+
+/* -------------------------------------------------------------
+ Wikidata entry structures
+
+ WikidataEntry represents the data ready to be inserted
+ in our database.
+
+ WikidataEntryKey is a subset of WikidataEntry to identify
+ a set (FANTOIR code, Wikidata item) to be used as HashMap key
+ when a SPARQL query returns several rows for such set.
+
+ For example, here, we ask for P31 values, and if a Wikidata
+ entity offers several P31 values, we'll get one row per value.
+ - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - */
+
+#[derive(Debug, Clone)]
+struct WikidataEntry {
+ code_fantoir: String,
+ code_fantoir_wikidata: String,
+ item: String,
+ item_label: String,
+ what: String,
+}
+
+#[derive(Debug, Clone, Eq, PartialEq, Hash)]
+struct WikidataEntryKey {
+ code_fantoir_wikidata: String,
+ item: String,
+ item_label: String,
+}
+
+impl WikidataEntryKey {
+ fn parse(entry: &HashMap<String, Term>) -> Self {
+ Self {
+ code_fantoir_wikidata: parse_literal(&entry["code_fantoir"]).expect("Can't parse code"),
+ item: parse_wikidata_entity_uri(&entry["item"]).expect("Can't parse item"),
+ item_label: parse_literal(&entry["itemLabel"]).expect("Can't parse item label"),
+ }
+ }
+}
+
+impl WikidataEntry {
+ fn parse(entry: &HashMap<String, Term>) -> Self {
+ let code_fantoir_wikidata = parse_literal(&entry["code_fantoir"]).expect("Can't parse code");
+ let code_fantoir = match fix_fantoir_code(&code_fantoir_wikidata) {
+ FixedFantoirCode::Computed(code) => code,
+ FixedFantoirCode::ToSearch { code_insee, identifiant_communal_voie } => {
+ todo!()
+ }
+ };
+
+ Self {
+ code_fantoir,
+ code_fantoir_wikidata,
+ item: parse_wikidata_entity_uri(&entry["item"]).expect("Can't parse item"),
+ item_label: parse_literal(&entry["itemLabel"]).expect("Can't parse item label"),
+ what: parse_wikidata_entity_uri(&entry["what"]).expect("Can't parse P31 what result"),
+ }
+ }
+
+ fn consolidate (key: WikidataEntryKey, what_candidates: Vec<String>) -> Self {
+ let what = determine_p31_winner(&what_candidates);
+
+ let code_fantoir = match fix_fantoir_code(&key.code_fantoir_wikidata) {
+ FixedFantoirCode::Computed(code) => code,
+ FixedFantoirCode::ToSearch { code_insee, identifiant_communal_voie } => {
+ todo!();
+ }
+ };
+
+ Self {
+ code_fantoir,
+ code_fantoir_wikidata: key.code_fantoir_wikidata,
+ item: key.item,
+ item_label: key.item_label,
+ what,
+ }
+ }
+
+ async fn insert_to_db (&self, pool: &PgPool) {
+ let mut query = format!("INSERT INTO {}", WIKIDATA_TABLE);
+ query.push_str(
+ r#"
+ (code_fantoir, code_fantoir_wikidata, item, item_label, what)
+ VALUES
+ ($1, $2, $3, $4, $5)"#
+ );
+
+ if let Err(error) = sqlx::query(&query)
+ .bind(&self.code_fantoir)
+ .bind(&self.code_fantoir_wikidata)
+ .bind(&self.item)
+ .bind(&self.item_label)
+ .bind(&self.what)
+
+ .execute(pool)
+ .await {
+ eprintln!();
+ eprintln!("Can't insert Wikidata information for the following entry:");
+ eprintln!("{:?}", self);
+ eprintln!("{}", error);
+ }
+ }
+}
+
+/* -------------------------------------------------------------
+ Wikidata helper methods
+ - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - */
+
+/// Parses a Wikidata entity URI from a RDF term.
+///
+/// For example, to parse a term representing Q1234:
+///
+/// ```
+/// let term = Term::NamedNode(
+/// NamedNode::new("http://www.wikidata.org/entity/Q1234").unwrap()
+/// );
+/// let entity = parse_wikidata_entity_uri(&term).unwrap();
+///
+/// assert_eq!("Q1234", &entity);
+/// ```
+pub fn parse_wikidata_entity_uri (term: &Term) -> Option<String> {
+ parse_term_uri(term)
+ .map(|uri| {
+ let pos = uri.rfind('/').expect("URI doesn't contain any /") + 1;
+
+ uri[pos..].to_string()
+ })
+}
+
+/* -------------------------------------------------------------
+ Tests
+ - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - */
+
+#[cfg(test)]
+mod tests {
+ use oxrdf::NamedNode;
+ use super::*;
+
+ #[test]
+ pub fn test_parse_wikidata_entity_uri () {
+ let node = NamedNode::new("http://www.wikidata.org/entity/Q849777").unwrap();
+ let term = Term::NamedNode(node);
+
+ assert_eq!("Q849777", &parse_wikidata_entity_uri(&term).unwrap());
+ }
+}
diff --git a/src/commands/wikidata/qualification.rs b/src/commands/wikidata/qualification.rs
new file mode 100644
--- /dev/null
+++ b/src/commands/wikidata/qualification.rs
@@ -0,0 +1,54 @@
+//! Helper for items qualification.
+//!
+//! Wikidata uses the P31 "instance of" property to qualify items,
+//! which is helpful to identify voies, especially the pseudo-voies
+//! not furthermore described in FANTOIR.
+
+use lazy_static::lazy_static;
+
+lazy_static! {
+ static ref P31_WINNERS: Vec<&'static str> = vec![
+ // Important values
+ "Q510662", // ring road
+ "Q928830", // metro station
+ "Q18615527", // tram bridge
+
+ // Not interesting values, as FANTOIR already describes the road,
+ // but we still need to consolidate.
+ "Q2026833", // square, type jardin public
+ "Q207934", // allée
+ "Q54114", // boulevard
+ "Q79007", // street, wins against road but loses against boulevard
+
+ /*
+ Can't determine P31 winner amongst ["Q22698", "Q22746"], Q22698 is picked.
+ Can't determine P31 winner amongst ["Q703941", "Q13634881"], Q703941 is picked.
+ Can't determine P31 winner amongst ["Q703941", "Q17383262"], Q703941 is picked.
+ Can't determine P31 winner amongst ["Q703941", "Q7543083"], Q703941 is picked.
+ Can't determine P31 winner amongst ["Q703941", "Q3558430"], Q703941 is picked.
+ */
+ ];
+}
+
+/// Determine amongst a sets of items which one is the more relevant
+/// to describe a pseudo-voie.
+///
+/// This is useful when a Wikidata entity has several values for P31
+/// to decide which one is the most interesting to keep in our context.
+pub fn determine_p31_winner(candidates: &Vec<String>) -> String {
+ if candidates.len() == 1 {
+ // If there is only one, that's the one to use.
+ return candidates[0].clone();
+ }
+
+ for winner_candidate in P31_WINNERS.iter() {
+ for actual_candidate in candidates {
+ if winner_candidate == actual_candidate {
+ return actual_candidate.clone();
+ }
+ }
+ }
+
+ eprintln!("Can't determine P31 winner amongst {:?}, {} is picked.", candidates, candidates[0]);
+ candidates[0].clone()
+}
diff --git a/src/db.rs b/src/db.rs
--- a/src/db.rs
+++ b/src/db.rs
@@ -3,11 +3,22 @@
//! This module provides helpers to interact with a PostgreSQL database.
//! Functions expect to work with an executor from sqlx crate.
+use std::future::Future;
use sqlx::PgPool;
use sqlx::postgres::PgPoolOptions;
static QUERIES_SEPARATOR: &str = "\n\n\n";
+pub struct TableInitializationArgs {
+ pub table_name: String,
+ pub create_table: bool,
+ pub overwrite_table: bool,
+}
+
+pub trait ToTableInitializationArgs {
+ fn to_table_initialization_args(&self) -> TableInitializationArgs;
+}
+
pub async fn connect_to_db (database_url: &str) -> PgPool {
PgPoolOptions::new()
.max_connections(3)
@@ -61,6 +72,41 @@
.expect("Can't truncate table.");
}
+pub async fn initialize_table<F, T> (
+ pool: &PgPool,
+ callback: F,
+ args: T
+) -> Result<(), String>
+ where F: Future, T: ToTableInitializationArgs
+{
+ let args = args.to_table_initialization_args();
+ if is_table_exists(pool, &args.table_name).await {
+ if is_table_empty(&pool, &args.table_name).await {
+ return Ok(());
+ }
+
+ if args.overwrite_table {
+ truncate_table(&pool, &args.table_name).await;
+ return Ok(());
+ }
+
+ return Err(format!(
+ "Table {} already exists and contains rows. To overwrite it, run the import tool with -t option.",
+ &args.table_name
+ ));
+ }
+
+ if args.create_table {
+ callback.await;
+ return Ok(());
+ }
+
+ Err(format!(
+ "Table {} doesn't exist. To create it, run the import tool with -c option.",
+ &args.table_name
+ ))
+}
+
pub async fn run_multiple_queries(pool: &PgPool, queries: &str) {
for query in queries.split(QUERIES_SEPARATOR) {
sqlx::query(&query)
diff --git a/src/fantoir.rs b/src/fantoir.rs
--- a/src/fantoir.rs
+++ b/src/fantoir.rs
@@ -3,9 +3,20 @@
//! This module offers a structure for a FANTOIR record, methods to parse the file and export it.
//! Database functions expect to work with an executor from sqlx crate.
+use lazy_static::lazy_static;
use sqlx::PgPool;
use sqlx::types::chrono::NaiveDate;
+lazy_static! {
+ static ref DEPARTMENTS_WITH_CODE_DIRECTION: Vec<&'static str> = vec!["13", "59", "75", "92", "97"];
+
+ /// The alphabet without I O and Q.
+ static ref RIVOLI_STRING: Vec<char> = vec![
+ 'A', 'B', 'C', 'D', 'E', 'F', 'G', 'H', 'J', 'K', 'L', 'M',
+ 'N', 'P', 'R', 'S', 'T', 'U', 'V', 'W', 'X', 'Y', 'Z'
+ ];
+}
+
/// A voie in the FANTOIR database
#[derive(Debug)]
pub struct FantoirEntry {
@@ -158,6 +169,99 @@
}
}
+/// A fixed FANTOIR code result
+pub enum FixedFantoirCode {
+ /// The code has been fully computed
+ Computed(String),
+
+ /// Information needed to query the code has been extracted, but code direction is unknown
+ /// Such result can be queried through search_code_fantoir()
+ ToSearch { code_insee: String, identifiant_communal_voie: String }
+}
+
+/// Transforms FANTOIR code from BAN into regular FANTOIR codes.
+/// BAN sometimes uses <insee code>_<identifiant voie commune> without Rivoli key.
+pub fn fix_fantoir_code(code: &str) -> FixedFantoirCode {
+ let mut code = code.to_string();
+
+ if code.contains("_") {
+ // 97231_B026 -> 972231B026
+ code = if code.starts_with("97") {
+ // Code direction = department last digit
+ format!("{}{}{}", &code[0..=2], &code[2..5], &code[6..])
+ } else if uses_specific_code_direction(&code) {
+ // We can't fix it by computation, we need to search it in the database
+ return FixedFantoirCode::ToSearch {
+ code_insee: code[0..5].to_string(),
+ identifiant_communal_voie: code[6..10].to_string(),
+ }
+ } else {
+ // Code direction = 0
+ format!("{}0{}{}", &code[0..=2], &code[3..5], &code[6..])
+ };
+ }
+
+ if code.len() == 10 {
+ let last_char = code.chars().last().unwrap();
+ println!("Code: {}, ends by {}", code, last_char);
+
+ match last_char {
+ '0'..='9' => {
+ code.push(compute_rivoli_key(&code));
+ }
+
+ 'A'..='Z' => {
+ // 441090516U -> 4401090516U
+ code = if uses_specific_code_direction(&code) {
+ // We can't fix it by computation, we need to search it in the database
+ // 920514135A -> 92051 4135
+ return FixedFantoirCode::ToSearch {
+ code_insee: code[0..5].to_string(),
+ identifiant_communal_voie: code[5..9].to_string(),
+ }
+ //( , &code[5..9])
+ } else {
+ format!("{}0{}", &code[0..2], &code[2..])
+ };
+ }
+
+ _ => unreachable!(),
+ }
+ }
+
+ FixedFantoirCode::Computed(code)
+}
+
+pub fn uses_specific_code_direction (code: &str) -> bool {
+ DEPARTMENTS_WITH_CODE_DIRECTION
+ .iter()
+ .any(|&dpt| code.starts_with(dpt))
+}
+
+pub fn compute_rivoli_key (code: &str) -> char {
+ // See https://georezo.net/forum/viewtopic.php?id=102292
+
+ if code.starts_with("2A") || code.starts_with("2B") {
+ // 2A would be 2 10 and 2B would be 2 11, but how to build a number to multiply by 19?
+ unimplemented!()
+ }
+
+ println!();
+ println!("Compute code rivoli key for {}", code);
+
+ let part_commune: i32 = code[0..6].parse().unwrap();
+ let type_voie = code.chars().nth(6).unwrap();
+ let type_voie = if type_voie.is_alphabetic() {
+ type_voie as u32 - 55
+ } else {
+ type_voie.to_digit(10).unwrap()
+ };
+ let numero_identifiant_communal_voie: i32 = code[7..].parse().unwrap();
+
+ let index = (part_commune * 19 + type_voie as i32 * 11 + numero_identifiant_communal_voie) % 23;
+ return RIVOLI_STRING[index as usize];
+}
+
#[cfg(test)]
mod tests {
// Note this useful idiom: importing names from outer (for mod tests) scope.
@@ -166,7 +270,7 @@
#[test]
fn test_parse_fantoir_date() {
let expected = NaiveDate::from_ymd_opt(1987, 1, 1).unwrap();
- let actual = parse_fantoir_date("1987001");
+ let actual = parse_fantoir_date("1987001").unwrap();
assert_eq!(expected, actual);
}
@@ -189,4 +293,23 @@
fn test_parse_optional_string_when_only_spaces() {
assert_eq!(true, parse_optional_string(" ").is_none());
}
+
+ #[test]
+ pub fn test_fix_fantoir_code () {
+ assert_eq!("755112P144L", &fix_fantoir_code("755112P144L"));
+ assert_eq!("972231B026U", &fix_fantoir_code("97231_B026"));
+ assert_eq!("4401090516U", &fix_fantoir_code("441090516U"));
+ assert_eq!("972222B305L", &fix_fantoir_code("972222B305"));
+ }
+
+ #[test]
+ pub fn test_compute_rivoli_key() {
+ assert_eq!('W', compute_rivoli_key("380003B001"));
+ assert_eq!('U', compute_rivoli_key("972231B026"));
+ }
+
+ #[test]
+ pub fn test_compute_rivoli_key_with_type_voie_zero() {
+ assert_eq!('C', compute_rivoli_key("9722230261"));
+ }
}
diff --git a/src/main.rs b/src/main.rs
--- a/src/main.rs
+++ b/src/main.rs
@@ -2,11 +2,11 @@
use clap::{Args, Parser};
-use crate::commands::import::import;
-
mod commands;
mod db;
mod fantoir;
+mod services;
+mod sparql;
#[derive(Debug, Parser)]
#[command(name = "fantoir2db")]
@@ -15,6 +15,13 @@
/// Import from FANTOIR file generated by the DGFIP
#[command(arg_required_else_help = true)]
Import(ImportArgs),
+
+ /// Query Wikidata SPARQL end-point to enrich FANTOIR information
+ Wikidata(WikidataArgs),
+
+ /// Update foreign keys to the specified fantoir table
+ #[command(arg_required_else_help = true)]
+ UpdateForeignKeys(UpdateForeignKeysArgs),
}
#[derive(Debug, Args)]
@@ -35,6 +42,24 @@
fantoir_table: String,
}
+#[derive(Debug, Args)]
+pub struct WikidataArgs {
+ /// Create table if it doesn't exist
+ #[arg(short = 'c')]
+ create_table: bool,
+
+ /// Truncate table if it already exists, allowing the overwrite mode.
+ /// If not specified, the script will fail if table exists.
+ #[arg(short = 't')]
+ overwrite_table: bool,
+}
+
+#[derive(Debug, Args)]
+pub struct UpdateForeignKeysArgs {
+ /// The name of the FANTOIR table to use
+ fantoir_table: String,
+}
+
#[tokio::main]
async fn main() {
let command = FantoirCommand::parse(); // Will exit if argument is missing or --help/--version provided.
@@ -44,7 +69,13 @@
match command {
FantoirCommand::Import(args) => {
- import(&args, &database_url).await;
+ commands::import::import(&args, &database_url).await;
},
+ FantoirCommand::Wikidata(args) => {
+ commands::wikidata::import(&args, &database_url).await
+ }
+ FantoirCommand::UpdateForeignKeys(args) => {
+ commands::update_foreign_keys::update(&database_url, &args.fantoir_table).await;
+ }
};
}
diff --git a/src/queries/wikidata.sparql b/src/queries/wikidata.sparql
new file mode 100644
--- /dev/null
+++ b/src/queries/wikidata.sparql
@@ -0,0 +1,13 @@
+PREFIX bd: <http://www.bigdata.com/rdf#>
+PREFIX wikibase: <http://wikiba.se/ontology#>
+PREFIX wdt: <http://www.wikidata.org/prop/direct/>
+
+# Streets with FANTOIR code
+SELECT DISTINCT ?code_fantoir ?item ?itemLabel ?what
+WHERE
+{
+ ?item wdt:P3182 ?code_fantoir .
+ ?item wdt:P31 ?what
+ SERVICE wikibase:label { bd:serviceParam wikibase:language "fr". }
+}
+LIMIT 3
diff --git a/src/schema/foreign_keys.sql b/src/schema/foreign_keys.sql
new file mode 100644
--- /dev/null
+++ b/src/schema/foreign_keys.sql
@@ -0,0 +1,7 @@
+alter table fantoir_wikidata
+ drop constraint if exists fantoir_wikidata_code_fantoir_fk;
+
+
+alter table fantoir_wikidata
+ add constraint fantoir_wikidata_code_fantoir_fk
+ foreign key (code_fantoir) references /*table*/fantoir (code_fantoir);
diff --git a/src/schema/wikidata.sql b/src/schema/wikidata.sql
new file mode 100644
--- /dev/null
+++ b/src/schema/wikidata.sql
@@ -0,0 +1,29 @@
+-- This table matches Wikidata entities and FANTOIR codes.
+--
+-- If you provide several instructions, separate those with TWO blank lines.
+-- Indexes have to match every WHERE clause used against the database.
+--
+-- This schema is compiled as part of the program, as such you need to rebuild
+-- (`cargo build`) the project after any schema modification.
+
+CREATE TABLE IF NOT EXISTS /*table*/fantoir_wikidata
+(
+ -- Identifiers
+ code_fantoir char(11) NOT NULL
+ constraint /*index*/index_fantoir_wikidata_pk
+ primary key,
+ code_fantoir_wikidata char(11) NOT NULL,
+
+ -- Wikidata information
+ item varchar(12) NOT NULL,
+ item_label text,
+ what varchar(12) NOT NULL,
+
+ -- Constraints
+ UNIQUE (code_fantoir_wikidata)
+);
+
+
+CREATE INDEX CONCURRENTLY /*index*/index_fantoir_wikidata_voie_trigram
+ ON /*table*/fantoir_wikidata
+ USING gin (item_label gin_trgm_ops);
diff --git a/src/services/http_client.rs b/src/services/http_client.rs
new file mode 100644
--- /dev/null
+++ b/src/services/http_client.rs
@@ -0,0 +1,12 @@
+use lazy_static::lazy_static;
+
+lazy_static! {
+ pub static ref USER_AGENT: String = format!(
+ "{}/{} (https://databases.nasqueron.org/)",
+ env!("CARGO_PKG_NAME"), env!("CARGO_PKG_VERSION")
+ );
+}
+
+pub fn get_user_agent () -> &'static str {
+ &USER_AGENT
+}
diff --git a/src/services/mod.rs b/src/services/mod.rs
new file mode 100644
--- /dev/null
+++ b/src/services/mod.rs
@@ -0,0 +1 @@
+pub(crate) mod http_client;
diff --git a/src/sparql.rs b/src/sparql.rs
new file mode 100644
--- /dev/null
+++ b/src/sparql.rs
@@ -0,0 +1,120 @@
+//! # SPARQL client
+
+use std::collections::HashMap;
+use std::env;
+use oxrdf::Term;
+
+use reqwest::{ClientBuilder, Url};
+use reqwest::Client as HttpClient;
+use reqwest::header::{HeaderMap, HeaderValue};
+use sparesults::{QueryResultsFormat, QueryResultsParser, QueryResultsReader};
+
+static DEFAULT_USER_AGENT: &str = concat!(
+ env!("CARGO_PKG_NAME"), "/", env!("CARGO_PKG_VERSION"),
+);
+
+pub struct Client {
+ pub endpoint: String,
+ client: HttpClient,
+}
+
+impl Client {
+ pub fn new (endpoint: &str, user_agent: &str) -> Self {
+ let client = ClientBuilder::new()
+ .user_agent(user_agent)
+ .default_headers( {
+ let mut headers = HeaderMap::new();
+ headers.insert("Accept", HeaderValue::from_static("Accept: application/sparql-results+json"));
+ headers
+ })
+ .gzip(true)
+ .deflate(true)
+ .build()
+ .expect("Can't build HTTP client");
+
+ Self {
+ endpoint: String::from(endpoint),
+ client,
+ }
+ }
+
+ pub fn with_default_user_agent(endpoint: &str) -> Self {
+ let user_agent = Self::determine_user_agent();
+
+ Self::new(endpoint, &user_agent)
+ }
+
+ fn determine_user_agent () -> String {
+ env::current_exe()
+ .ok()
+ .and_then(|path| path.file_name().map(|s| s.to_os_string()))
+ .and_then(|program_name| program_name.into_string().ok())
+ .unwrap_or(String::from(DEFAULT_USER_AGENT))
+ }
+
+ pub async fn query (&self, query: &str) -> Vec<HashMap<String, Term>> {
+ let result = include_str!("tmp/wikidata-query-result.xml").to_string();
+
+ // let url = Url::parse_with_params(&self.endpoint, &[("query", query)])
+ // .expect("Can't parse endpoint as absolute URL.");
+ //
+ // let result = reqwest::get(url)
+ // .await
+ // .expect("Can't query endpoint")
+ // .text()
+ // .await
+ // .expect("End-point didn't return a reply.");
+
+ let mut entries = Vec::new();
+
+ if let QueryResultsReader::Solutions(solutions) = QueryResultsParser
+ ::from_format(QueryResultsFormat::Xml)
+ .read_results(result.as_bytes())
+ .expect("Can't read Wikidata reply")
+ {
+ for solution in solutions {
+ let entry: HashMap<_, _> = solution
+ .expect("Can't read solution")
+ .iter()
+ .map(|(variable, term)| (
+ variable.as_str().to_string(),
+ term.clone(),
+ ))
+ .collect();
+ entries.push(entry);
+ }
+ } else {
+ panic!("Can't parse SPARQL result as a solution.");
+ }
+
+ entries
+ }
+}
+
+pub fn parse_term_uri (term: &Term) -> Option<String> {
+ if let Term::NamedNode(node) = term {
+ Some(node.as_str().to_string())
+ } else {
+ None
+ }
+}
+
+pub fn parse_literal (term: &Term) -> Option<String> {
+ if let Term::Literal(literal) = term {
+ Some(literal.value().to_string())
+ } else {
+ None
+ }
+}
+
+pub fn is_term_empty(term: &Term) -> bool {
+ match term {
+ Term::NamedNode(node) => {
+ // Special values IRI are considered as empty values.
+ node.as_str().contains("/.well-known/genid/")
+ }
+ Term::BlankNode(_) => true,
+ Term::Literal(_) => false,
+ _ => unimplemented!(),
+ }
+}

File Metadata

Mime Type
text/plain
Expires
Sun, Jan 19, 12:43 (13 h, 30 m)
Storage Engine
blob
Storage Format
Raw Data
Storage Handle
2361036
Default Alt Text
D2731.id6922.diff (28 KB)

Event Timeline