Page Menu
Home
DevCentral
Search
Configure Global Search
Log In
Files
F3917402
D2731.id6931.diff
No One
Temporary
Actions
View File
Edit File
Delete File
View Transforms
Subscribe
Mute Notifications
Award Token
Flag For Later
Size
29 KB
Referenced Files
None
Subscribers
None
D2731.id6931.diff
View Options
diff --git a/Cargo.toml b/Cargo.toml
--- a/Cargo.toml
+++ b/Cargo.toml
@@ -9,6 +9,9 @@
license = "BSD-2-Clause"
[dependencies]
+lazy_static = "~1.4.0"
+oxrdf = "~0.1.1"
+sparesults = "~0.1.3"
[dependencies.async-scoped]
version = "~0.7.1"
@@ -18,6 +21,10 @@
version = "~4.0.32"
features = ["derive"]
+[dependencies.reqwest]
+version = "~0.11.13"
+features = ["gzip", "deflate"]
+
[dependencies.sqlx]
version = "~0.6.2"
features = ["runtime-tokio-native-tls", "postgres", "chrono"]
diff --git a/src/commands/import.rs b/src/commands/import.rs
--- a/src/commands/import.rs
+++ b/src/commands/import.rs
@@ -12,12 +12,33 @@
use crate::db::*;
use crate::fantoir::FantoirEntry;
+impl ToTableInitializationArgs for &ImportArgs {
+ fn to_table_initialization_args (&self) -> TableInitializationArgs {
+ TableInitializationArgs {
+ table_name: self.fantoir_table.clone(),
+ create_table: self.create_table,
+ overwrite_table: self.overwrite_table,
+ }
+ }
+}
+
+async fn create_table(pool: &PgPool, table: &str) {
+ let queries = include_str!("../schema/fantoir.sql")
+ .replace("/*table*/fantoir", table)
+ .replace("/*index*/index_fantoir_", format!("index_{}_", table).as_ref());
+
+ run_multiple_queries(pool, &queries).await;
+}
+
pub async fn import(args: &ImportArgs, database_url: &str) {
let fd = File::open(&args.fantoir_file).await.expect("Can't open file.");
let pool = connect_to_db(database_url).await;
// Create/truncate table as needed and as allowed by options
- if let Err(error) = initialize_table(args, &pool).await {
+ let callback = async {
+ create_table(&pool, &args.fantoir_table).await;
+ };
+ if let Err(error) = initialize_table(&pool, callback, args).await {
eprintln!("{}", &error);
exit(1);
}
@@ -46,39 +67,3 @@
.await
}
}
-
-async fn initialize_table(args: &ImportArgs, pool: &PgPool) -> Result<(), String> {
- if is_table_exists(pool, &args.fantoir_table).await {
- if is_table_empty(&pool, &args.fantoir_table).await {
- return Ok(());
- }
-
- if args.overwrite_table {
- truncate_table(&pool, &args.fantoir_table).await;
- return Ok(());
- }
-
- return Err(format!(
- "Table {} already exists and contains rows. To overwrite it, run the import tool with -t option.",
- &args.fantoir_table
- ));
- }
-
- if args.create_table {
- create_table(&pool, &args.fantoir_table).await;
- return Ok(());
- }
-
- Err(format!(
- "Table {} doesn't exist. To create it, run the import tool with -c option.",
- &args.fantoir_table
- ))
-}
-
-async fn create_table(pool: &PgPool, table: &str) {
- let queries = include_str!("../schema/fantoir.sql")
- .replace("/*table*/fantoir", table)
- .replace("/*index*/index_fantoir_", format!("index_{}_", table).as_ref());
-
- run_multiple_queries(pool, &queries).await;
-}
diff --git a/src/commands/mod.rs b/src/commands/mod.rs
--- a/src/commands/mod.rs
+++ b/src/commands/mod.rs
@@ -2,3 +2,4 @@
pub(crate) mod import;
pub(crate) mod promote;
+pub(crate) mod wikidata;
diff --git a/src/commands/promote/mod.rs b/src/commands/promote/mod.rs
--- a/src/commands/promote/mod.rs
+++ b/src/commands/promote/mod.rs
@@ -1,23 +1,27 @@
//! Command to promote a table as the one to use.
use sqlx::PgPool;
-use crate::db::{connect_to_db, run_multiple_queries_groups};
+use crate::db::*;
/// Promotes a FANTOIR table as the relevant version to use
pub async fn promote (fantoir_table: &str, database_url: &str) {
let pool = connect_to_db(database_url).await;
- let queries_groups = get_queries_groups(&pool, fantoir_table);
+ let queries_groups = get_queries_groups(&pool, fantoir_table).await;
run_multiple_queries_groups(&pool, &queries_groups);
}
/// Determines the groups of queries needed for promotion
-fn get_queries_groups (pool: &PgPool, fantoir_table: &str) -> Vec<String> {
+async fn get_queries_groups (pool: &PgPool, fantoir_table: &str) -> Vec<String> {
let mut queries_groups = vec![
include_str!("../../schema/promote/config.sql"),
include_str!("../../schema/promote/fantoir_view.sql"),
];
+ if is_table_exists(pool, "fantoir_wikidata").await {
+ queries_groups.push(include_str!("../../schema/promote/wikidata.sql"));
+ }
+
queries_groups
.into_iter()
.map(|queries| queries
diff --git a/src/commands/wikidata/mod.rs b/src/commands/wikidata/mod.rs
new file mode 100644
--- /dev/null
+++ b/src/commands/wikidata/mod.rs
@@ -0,0 +1,229 @@
+//! Query Wikidata SPARQL end-point and import result into PostgreSQL
+
+mod qualification;
+
+use std::collections::HashMap;
+use std::process::exit;
+use oxrdf::Term;
+use sqlx::{Execute, PgPool};
+
+use crate::db::*;
+use crate::{sparql, WikidataArgs};
+use crate::commands::wikidata::qualification::determine_p31_winner;
+use crate::fantoir::{fix_fantoir_code, FixedFantoirCode};
+use crate::services::http_client::get_user_agent;
+use crate::sparql::{is_term_empty, parse_literal, parse_term_uri};
+
+static WIKIDATA_TABLE: &str = "fantoir_wikidata";
+
+/* -------------------------------------------------------------
+ Import task
+ - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - */
+
+pub async fn import (args: &WikidataArgs, database_url: &str) {
+ let pool = connect_to_db(database_url).await;
+
+ // Create/truncate table as needed and as allowed by options
+ let callback = async {
+ let queries = include_str!("../../schema/wikidata.sql");
+ run_multiple_queries(&pool, &queries).await;
+ };
+ if let Err(error) = initialize_table(&pool, callback, args).await {
+ eprintln!("{}", &error);
+ exit(1);
+ }
+
+ // Query Wikidata
+ let client = sparql::Client::new("https://query.wikidata.org/sparql", get_user_agent());
+
+ let mut what_map = HashMap::new();
+ client.query(include_str!("../../queries/wikidata.sparql"))
+ .await
+ .iter()
+ .filter(|entry| !is_term_empty(&entry["code_fantoir"]))
+ .for_each(|entry| {
+ // Build a map of the different P31 (instance of) values for a specified code.
+
+ let key = WikidataEntryKey::parse(entry);
+ let what = parse_wikidata_entity_uri(&entry["what"]).expect("Can't parse P31 what result");
+
+ what_map
+ .entry(key)
+ .or_insert(Vec::new())
+ .push(what);
+ });
+
+ // Consolidate entries and insert them into the database.
+ // To avoid a async closure, we need to stop the HOF in the map.
+ let entries = what_map
+ .into_iter()
+ .map(|(key, candidates)| WikidataEntry::consolidate(key, candidates));
+
+ for entry in entries {
+ entry
+ .insert_to_db(&pool)
+ .await
+ }
+
+}
+
+/* -------------------------------------------------------------
+ Arguments parsing
+ - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - */
+
+impl ToTableInitializationArgs for &WikidataArgs {
+ fn to_table_initialization_args(&self) -> TableInitializationArgs {
+ TableInitializationArgs {
+ table_name: String::from(WIKIDATA_TABLE),
+ create_table: self.create_table,
+ overwrite_table: self.overwrite_table,
+ }
+ }
+}
+
+/* -------------------------------------------------------------
+ Wikidata entry structures
+
+ WikidataEntry represents the data ready to be inserted
+ in our database.
+
+ WikidataEntryKey is a subset of WikidataEntry to identify
+ a set (FANTOIR code, Wikidata item) to be used as HashMap key
+ when a SPARQL query returns several rows for such set.
+
+ For example, here, we ask for P31 values, and if a Wikidata
+ entity offers several P31 values, we'll get one row per value.
+ - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - */
+
+#[derive(Debug, Clone)]
+struct WikidataEntry {
+ code_fantoir: String,
+ code_fantoir_wikidata: String,
+ item: String,
+ item_label: String,
+ what: String,
+}
+
+#[derive(Debug, Clone, Eq, PartialEq, Hash)]
+struct WikidataEntryKey {
+ code_fantoir_wikidata: String,
+ item: String,
+ item_label: String,
+}
+
+impl WikidataEntryKey {
+ fn parse(entry: &HashMap<String, Term>) -> Self {
+ Self {
+ code_fantoir_wikidata: parse_literal(&entry["code_fantoir"]).expect("Can't parse code"),
+ item: parse_wikidata_entity_uri(&entry["item"]).expect("Can't parse item"),
+ item_label: parse_literal(&entry["itemLabel"]).expect("Can't parse item label"),
+ }
+ }
+}
+
+impl WikidataEntry {
+ fn parse(entry: &HashMap<String, Term>) -> Self {
+ let code_fantoir_wikidata = parse_literal(&entry["code_fantoir"]).expect("Can't parse code");
+ let code_fantoir = match fix_fantoir_code(&code_fantoir_wikidata) {
+ FixedFantoirCode::Computed(code) => code,
+ FixedFantoirCode::ToSearch { code_insee, identifiant_communal_voie } => {
+ todo!()
+ }
+ };
+
+ Self {
+ code_fantoir,
+ code_fantoir_wikidata,
+ item: parse_wikidata_entity_uri(&entry["item"]).expect("Can't parse item"),
+ item_label: parse_literal(&entry["itemLabel"]).expect("Can't parse item label"),
+ what: parse_wikidata_entity_uri(&entry["what"]).expect("Can't parse P31 what result"),
+ }
+ }
+
+ fn consolidate (key: WikidataEntryKey, what_candidates: Vec<String>) -> Self {
+ let what = determine_p31_winner(&what_candidates);
+
+ let code_fantoir = match fix_fantoir_code(&key.code_fantoir_wikidata) {
+ FixedFantoirCode::Computed(code) => code,
+ FixedFantoirCode::ToSearch { code_insee, identifiant_communal_voie } => {
+ todo!();
+ }
+ };
+
+ Self {
+ code_fantoir,
+ code_fantoir_wikidata: key.code_fantoir_wikidata,
+ item: key.item,
+ item_label: key.item_label,
+ what,
+ }
+ }
+
+ async fn insert_to_db (&self, pool: &PgPool) {
+ let mut query = format!("INSERT INTO {}", WIKIDATA_TABLE);
+ query.push_str(
+ r#"
+ (code_fantoir, code_fantoir_wikidata, item, item_label, what)
+ VALUES
+ ($1, $2, $3, $4, $5)"#
+ );
+
+ if let Err(error) = sqlx::query(&query)
+ .bind(&self.code_fantoir)
+ .bind(&self.code_fantoir_wikidata)
+ .bind(&self.item)
+ .bind(&self.item_label)
+ .bind(&self.what)
+
+ .execute(pool)
+ .await {
+ eprintln!();
+ eprintln!("Can't insert Wikidata information for the following entry:");
+ eprintln!("{:?}", self);
+ eprintln!("{}", error);
+ }
+ }
+}
+
+/* -------------------------------------------------------------
+ Wikidata helper methods
+ - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - */
+
+/// Parses a Wikidata entity URI from a RDF term.
+///
+/// For example, to parse a term representing Q1234:
+///
+/// ```
+/// let term = Term::NamedNode(
+/// NamedNode::new("http://www.wikidata.org/entity/Q1234").unwrap()
+/// );
+/// let entity = parse_wikidata_entity_uri(&term).unwrap();
+///
+/// assert_eq!("Q1234", &entity);
+/// ```
+pub fn parse_wikidata_entity_uri (term: &Term) -> Option<String> {
+ parse_term_uri(term)
+ .map(|uri| {
+ let pos = uri.rfind('/').expect("URI doesn't contain any /") + 1;
+
+ uri[pos..].to_string()
+ })
+}
+
+/* -------------------------------------------------------------
+ Tests
+ - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - */
+
+#[cfg(test)]
+mod tests {
+ use oxrdf::NamedNode;
+ use super::*;
+
+ #[test]
+ pub fn test_parse_wikidata_entity_uri () {
+ let node = NamedNode::new("http://www.wikidata.org/entity/Q849777").unwrap();
+ let term = Term::NamedNode(node);
+
+ assert_eq!("Q849777", &parse_wikidata_entity_uri(&term).unwrap());
+ }
+}
diff --git a/src/commands/wikidata/qualification.rs b/src/commands/wikidata/qualification.rs
new file mode 100644
--- /dev/null
+++ b/src/commands/wikidata/qualification.rs
@@ -0,0 +1,54 @@
+//! Helper for items qualification.
+//!
+//! Wikidata uses the P31 "instance of" property to qualify items,
+//! which is helpful to identify voies, especially the pseudo-voies
+//! not furthermore described in FANTOIR.
+
+use lazy_static::lazy_static;
+
+lazy_static! {
+ static ref P31_WINNERS: Vec<&'static str> = vec![
+ // Important values
+ "Q510662", // ring road
+ "Q928830", // metro station
+ "Q18615527", // tram bridge
+
+ // Not interesting values, as FANTOIR already describes the road,
+ // but we still need to consolidate.
+ "Q2026833", // square, type jardin public
+ "Q207934", // allée
+ "Q54114", // boulevard
+ "Q79007", // street, wins against road but loses against boulevard
+
+ /*
+ Can't determine P31 winner amongst ["Q22698", "Q22746"], Q22698 is picked.
+ Can't determine P31 winner amongst ["Q703941", "Q13634881"], Q703941 is picked.
+ Can't determine P31 winner amongst ["Q703941", "Q17383262"], Q703941 is picked.
+ Can't determine P31 winner amongst ["Q703941", "Q7543083"], Q703941 is picked.
+ Can't determine P31 winner amongst ["Q703941", "Q3558430"], Q703941 is picked.
+ */
+ ];
+}
+
+/// Determine amongst a sets of items which one is the more relevant
+/// to describe a pseudo-voie.
+///
+/// This is useful when a Wikidata entity has several values for P31
+/// to decide which one is the most interesting to keep in our context.
+pub fn determine_p31_winner(candidates: &Vec<String>) -> String {
+ if candidates.len() == 1 {
+ // If there is only one, that's the one to use.
+ return candidates[0].clone();
+ }
+
+ for winner_candidate in P31_WINNERS.iter() {
+ for actual_candidate in candidates {
+ if winner_candidate == actual_candidate {
+ return actual_candidate.clone();
+ }
+ }
+ }
+
+ eprintln!("Can't determine P31 winner amongst {:?}, {} is picked.", candidates, candidates[0]);
+ candidates[0].clone()
+}
diff --git a/src/db.rs b/src/db.rs
--- a/src/db.rs
+++ b/src/db.rs
@@ -3,12 +3,24 @@
//! This module provides helpers to interact with a PostgreSQL database.
//! Functions expect to work with an executor from sqlx crate.
+use std::future::Future;
+
use async_scoped::TokioScope;
use sqlx::PgPool;
use sqlx::postgres::PgPoolOptions;
static QUERIES_SEPARATOR: &str = "\n\n\n";
+pub struct TableInitializationArgs {
+ pub table_name: String,
+ pub create_table: bool,
+ pub overwrite_table: bool,
+}
+
+pub trait ToTableInitializationArgs {
+ fn to_table_initialization_args(&self) -> TableInitializationArgs;
+}
+
pub async fn connect_to_db (database_url: &str) -> PgPool {
PgPoolOptions::new()
.max_connections(3)
@@ -62,6 +74,41 @@
.expect("Can't truncate table.");
}
+pub async fn initialize_table<F, T> (
+ pool: &PgPool,
+ callback: F,
+ args: T
+) -> Result<(), String>
+ where F: Future, T: ToTableInitializationArgs
+{
+ let args = args.to_table_initialization_args();
+ if is_table_exists(pool, &args.table_name).await {
+ if is_table_empty(&pool, &args.table_name).await {
+ return Ok(());
+ }
+
+ if args.overwrite_table {
+ truncate_table(&pool, &args.table_name).await;
+ return Ok(());
+ }
+
+ return Err(format!(
+ "Table {} already exists and contains rows. To overwrite it, run the import tool with -t option.",
+ &args.table_name
+ ));
+ }
+
+ if args.create_table {
+ callback.await;
+ return Ok(());
+ }
+
+ Err(format!(
+ "Table {} doesn't exist. To create it, run the import tool with -c option.",
+ &args.table_name
+ ))
+}
+
pub async fn run_multiple_queries(pool: &PgPool, queries: &str) {
for query in queries.split(QUERIES_SEPARATOR) {
sqlx::query(&query)
diff --git a/src/fantoir.rs b/src/fantoir.rs
--- a/src/fantoir.rs
+++ b/src/fantoir.rs
@@ -3,9 +3,20 @@
//! This module offers a structure for a FANTOIR record, methods to parse the file and export it.
//! Database functions expect to work with an executor from sqlx crate.
+use lazy_static::lazy_static;
use sqlx::PgPool;
use sqlx::types::chrono::NaiveDate;
+lazy_static! {
+ static ref DEPARTMENTS_WITH_CODE_DIRECTION: Vec<&'static str> = vec!["13", "59", "75", "92", "97"];
+
+ /// The alphabet without I O and Q.
+ static ref RIVOLI_STRING: Vec<char> = vec![
+ 'A', 'B', 'C', 'D', 'E', 'F', 'G', 'H', 'J', 'K', 'L', 'M',
+ 'N', 'P', 'R', 'S', 'T', 'U', 'V', 'W', 'X', 'Y', 'Z'
+ ];
+}
+
/// A voie in the FANTOIR database
#[derive(Debug)]
pub struct FantoirEntry {
@@ -158,6 +169,99 @@
}
}
+/// A fixed FANTOIR code result
+pub enum FixedFantoirCode {
+ /// The code has been fully computed
+ Computed(String),
+
+ /// Information needed to query the code has been extracted, but code direction is unknown
+ /// Such result can be queried through search_code_fantoir()
+ ToSearch { code_insee: String, identifiant_communal_voie: String }
+}
+
+/// Transforms FANTOIR code from BAN into regular FANTOIR codes.
+/// BAN sometimes uses <insee code>_<identifiant voie commune> without Rivoli key.
+pub fn fix_fantoir_code(code: &str) -> FixedFantoirCode {
+ let mut code = code.to_string();
+
+ if code.contains("_") {
+ // 97231_B026 -> 972231B026
+ code = if code.starts_with("97") {
+ // Code direction = department last digit
+ format!("{}{}{}", &code[0..=2], &code[2..5], &code[6..])
+ } else if uses_specific_code_direction(&code) {
+ // We can't fix it by computation, we need to search it in the database
+ return FixedFantoirCode::ToSearch {
+ code_insee: code[0..5].to_string(),
+ identifiant_communal_voie: code[6..10].to_string(),
+ }
+ } else {
+ // Code direction = 0
+ format!("{}0{}{}", &code[0..=2], &code[3..5], &code[6..])
+ };
+ }
+
+ if code.len() == 10 {
+ let last_char = code.chars().last().unwrap();
+ println!("Code: {}, ends by {}", code, last_char);
+
+ match last_char {
+ '0'..='9' => {
+ code.push(compute_rivoli_key(&code));
+ }
+
+ 'A'..='Z' => {
+ // 441090516U -> 4401090516U
+ code = if uses_specific_code_direction(&code) {
+ // We can't fix it by computation, we need to search it in the database
+ // 920514135A -> 92051 4135
+ return FixedFantoirCode::ToSearch {
+ code_insee: code[0..5].to_string(),
+ identifiant_communal_voie: code[5..9].to_string(),
+ }
+ //( , &code[5..9])
+ } else {
+ format!("{}0{}", &code[0..2], &code[2..])
+ };
+ }
+
+ _ => unreachable!(),
+ }
+ }
+
+ FixedFantoirCode::Computed(code)
+}
+
+pub fn uses_specific_code_direction (code: &str) -> bool {
+ DEPARTMENTS_WITH_CODE_DIRECTION
+ .iter()
+ .any(|&dpt| code.starts_with(dpt))
+}
+
+pub fn compute_rivoli_key (code: &str) -> char {
+ // See https://georezo.net/forum/viewtopic.php?id=102292
+
+ if code.starts_with("2A") || code.starts_with("2B") {
+ // 2A would be 2 10 and 2B would be 2 11, but how to build a number to multiply by 19?
+ unimplemented!()
+ }
+
+ println!();
+ println!("Compute code rivoli key for {}", code);
+
+ let part_commune: i32 = code[0..6].parse().unwrap();
+ let type_voie = code.chars().nth(6).unwrap();
+ let type_voie = if type_voie.is_alphabetic() {
+ type_voie as u32 - 55
+ } else {
+ type_voie.to_digit(10).unwrap()
+ };
+ let numero_identifiant_communal_voie: i32 = code[7..].parse().unwrap();
+
+ let index = (part_commune * 19 + type_voie as i32 * 11 + numero_identifiant_communal_voie) % 23;
+ return RIVOLI_STRING[index as usize];
+}
+
#[cfg(test)]
mod tests {
// Note this useful idiom: importing names from outer (for mod tests) scope.
@@ -166,7 +270,7 @@
#[test]
fn test_parse_fantoir_date() {
let expected = NaiveDate::from_ymd_opt(1987, 1, 1).unwrap();
- let actual = parse_fantoir_date("1987001");
+ let actual = parse_fantoir_date("1987001").unwrap();
assert_eq!(expected, actual);
}
@@ -189,4 +293,23 @@
fn test_parse_optional_string_when_only_spaces() {
assert_eq!(true, parse_optional_string(" ").is_none());
}
+
+ #[test]
+ pub fn test_fix_fantoir_code () {
+ assert_eq!("755112P144L", &fix_fantoir_code("755112P144L"));
+ assert_eq!("972231B026U", &fix_fantoir_code("97231_B026"));
+ assert_eq!("4401090516U", &fix_fantoir_code("441090516U"));
+ assert_eq!("972222B305L", &fix_fantoir_code("972222B305"));
+ }
+
+ #[test]
+ pub fn test_compute_rivoli_key() {
+ assert_eq!('W', compute_rivoli_key("380003B001"));
+ assert_eq!('U', compute_rivoli_key("972231B026"));
+ }
+
+ #[test]
+ pub fn test_compute_rivoli_key_with_type_voie_zero() {
+ assert_eq!('C', compute_rivoli_key("9722230261"));
+ }
}
diff --git a/src/main.rs b/src/main.rs
--- a/src/main.rs
+++ b/src/main.rs
@@ -2,12 +2,13 @@
use clap::{Args, Parser};
-use crate::commands::import::import;
use crate::commands::promote::promote;
mod commands;
mod db;
mod fantoir;
+mod services;
+mod sparql;
#[derive(Debug, Parser)]
#[command(name = "fantoir-datasource")]
@@ -20,6 +21,9 @@
/// Promote an imported FANTOIR table as the current FANTOIR table to use
#[command(arg_required_else_help = true)]
Promote(PromoteArgs),
+
+ /// Query Wikidata SPARQL end-point to enrich FANTOIR information
+ Wikidata(WikidataArgs),
}
#[derive(Debug, Args)]
@@ -46,6 +50,18 @@
fantoir_table: String,
}
+#[derive(Debug, Args)]
+pub struct WikidataArgs {
+ /// Create table if it doesn't exist
+ #[arg(short = 'c')]
+ create_table: bool,
+
+ /// Truncate table if it already exists, allowing the overwrite mode.
+ /// If not specified, the script will fail if table exists.
+ #[arg(short = 't')]
+ overwrite_table: bool,
+}
+
#[tokio::main]
async fn main() {
let command = FantoirCommand::parse(); // Will exit if argument is missing or --help/--version provided.
@@ -55,10 +71,13 @@
match command {
FantoirCommand::Import(args) => {
- import(&args, &database_url).await;
+ commands::import::import(&args, &database_url).await;
},
FantoirCommand::Promote(args) => {
promote(&args.fantoir_table, &database_url).await;
},
+ FantoirCommand::Wikidata(args) => {
+ commands::wikidata::import(&args, &database_url).await
+ },
};
}
diff --git a/src/queries/wikidata.sparql b/src/queries/wikidata.sparql
new file mode 100644
--- /dev/null
+++ b/src/queries/wikidata.sparql
@@ -0,0 +1,12 @@
+PREFIX bd: <http://www.bigdata.com/rdf#>
+PREFIX wikibase: <http://wikiba.se/ontology#>
+PREFIX wdt: <http://www.wikidata.org/prop/direct/>
+
+# Streets with FANTOIR code
+SELECT DISTINCT ?code_fantoir ?item ?itemLabel ?what
+WHERE
+{
+ ?item wdt:P3182 ?code_fantoir .
+ ?item wdt:P31 ?what
+ SERVICE wikibase:label { bd:serviceParam wikibase:language "fr". }
+}
diff --git a/src/schema/promote/wikidata.sql b/src/schema/promote/wikidata.sql
new file mode 100644
--- /dev/null
+++ b/src/schema/promote/wikidata.sql
@@ -0,0 +1,7 @@
+alter table fantoir_wikidata
+ drop constraint if exists fantoir_wikidata_code_fantoir_fk;
+
+
+alter table fantoir_wikidata
+ add constraint fantoir_wikidata_code_fantoir_fk
+ foreign key (code_fantoir) references /*table*/fantoir (code_fantoir);
diff --git a/src/schema/wikidata.sql b/src/schema/wikidata.sql
new file mode 100644
--- /dev/null
+++ b/src/schema/wikidata.sql
@@ -0,0 +1,29 @@
+-- This table matches Wikidata entities and FANTOIR codes.
+--
+-- If you provide several instructions, separate those with TWO blank lines.
+-- Indexes have to match every WHERE clause used against the database.
+--
+-- This schema is compiled as part of the program, as such you need to rebuild
+-- (`cargo build`) the project after any schema modification.
+
+CREATE TABLE IF NOT EXISTS /*table*/fantoir_wikidata
+(
+ -- Identifiers
+ code_fantoir char(11) NOT NULL
+ constraint /*index*/index_fantoir_wikidata_pk
+ primary key,
+ code_fantoir_wikidata char(11) NOT NULL,
+
+ -- Wikidata information
+ item varchar(12) NOT NULL,
+ item_label text,
+ what varchar(12) NOT NULL,
+
+ -- Constraints
+ UNIQUE (code_fantoir_wikidata)
+);
+
+
+CREATE INDEX CONCURRENTLY /*index*/index_fantoir_wikidata_voie_trigram
+ ON /*table*/fantoir_wikidata
+ USING gin (item_label gin_trgm_ops);
diff --git a/src/services/http_client.rs b/src/services/http_client.rs
new file mode 100644
--- /dev/null
+++ b/src/services/http_client.rs
@@ -0,0 +1,12 @@
+use lazy_static::lazy_static;
+
+lazy_static! {
+ pub static ref USER_AGENT: String = format!(
+ "{}/{} (https://databases.nasqueron.org/)",
+ env!("CARGO_PKG_NAME"), env!("CARGO_PKG_VERSION")
+ );
+}
+
+pub fn get_user_agent () -> &'static str {
+ &USER_AGENT
+}
diff --git a/src/services/mod.rs b/src/services/mod.rs
new file mode 100644
--- /dev/null
+++ b/src/services/mod.rs
@@ -0,0 +1 @@
+pub(crate) mod http_client;
diff --git a/src/sparql.rs b/src/sparql.rs
new file mode 100644
--- /dev/null
+++ b/src/sparql.rs
@@ -0,0 +1,120 @@
+//! # SPARQL client
+
+use std::collections::HashMap;
+use std::env;
+use oxrdf::Term;
+
+use reqwest::{ClientBuilder, Url};
+use reqwest::Client as HttpClient;
+use reqwest::header::{HeaderMap, HeaderValue};
+use sparesults::{QueryResultsFormat, QueryResultsParser, QueryResultsReader};
+
+static DEFAULT_USER_AGENT: &str = concat!(
+ env!("CARGO_PKG_NAME"), "/", env!("CARGO_PKG_VERSION"),
+);
+
+pub struct Client {
+ pub endpoint: String,
+ client: HttpClient,
+}
+
+impl Client {
+ pub fn new (endpoint: &str, user_agent: &str) -> Self {
+ let client = ClientBuilder::new()
+ .user_agent(user_agent)
+ .default_headers( {
+ let mut headers = HeaderMap::new();
+ headers.insert("Accept", HeaderValue::from_static("Accept: application/sparql-results+json"));
+ headers
+ })
+ .gzip(true)
+ .deflate(true)
+ .build()
+ .expect("Can't build HTTP client");
+
+ Self {
+ endpoint: String::from(endpoint),
+ client,
+ }
+ }
+
+ pub fn with_default_user_agent(endpoint: &str) -> Self {
+ let user_agent = Self::determine_user_agent();
+
+ Self::new(endpoint, &user_agent)
+ }
+
+ fn determine_user_agent () -> String {
+ env::current_exe()
+ .ok()
+ .and_then(|path| path.file_name().map(|s| s.to_os_string()))
+ .and_then(|program_name| program_name.into_string().ok())
+ .unwrap_or(String::from(DEFAULT_USER_AGENT))
+ }
+
+ pub async fn query (&self, query: &str) -> Vec<HashMap<String, Term>> {
+ let result = include_str!("tmp/wikidata-query-result.xml").to_string();
+
+ // let url = Url::parse_with_params(&self.endpoint, &[("query", query)])
+ // .expect("Can't parse endpoint as absolute URL.");
+ //
+ // let result = reqwest::get(url)
+ // .await
+ // .expect("Can't query endpoint")
+ // .text()
+ // .await
+ // .expect("End-point didn't return a reply.");
+
+ let mut entries = Vec::new();
+
+ if let QueryResultsReader::Solutions(solutions) = QueryResultsParser
+ ::from_format(QueryResultsFormat::Xml)
+ .read_results(result.as_bytes())
+ .expect("Can't read Wikidata reply")
+ {
+ for solution in solutions {
+ let entry: HashMap<_, _> = solution
+ .expect("Can't read solution")
+ .iter()
+ .map(|(variable, term)| (
+ variable.as_str().to_string(),
+ term.clone(),
+ ))
+ .collect();
+ entries.push(entry);
+ }
+ } else {
+ panic!("Can't parse SPARQL result as a solution.");
+ }
+
+ entries
+ }
+}
+
+pub fn parse_term_uri (term: &Term) -> Option<String> {
+ if let Term::NamedNode(node) = term {
+ Some(node.as_str().to_string())
+ } else {
+ None
+ }
+}
+
+pub fn parse_literal (term: &Term) -> Option<String> {
+ if let Term::Literal(literal) = term {
+ Some(literal.value().to_string())
+ } else {
+ None
+ }
+}
+
+pub fn is_term_empty(term: &Term) -> bool {
+ match term {
+ Term::NamedNode(node) => {
+ // Special values IRI are considered as empty values.
+ node.as_str().contains("/.well-known/genid/")
+ }
+ Term::BlankNode(_) => true,
+ Term::Literal(_) => false,
+ _ => unimplemented!(),
+ }
+}
File Metadata
Details
Attached
Mime Type
text/plain
Expires
Sat, Dec 21, 03:58 (13 h, 8 m)
Storage Engine
blob
Storage Format
Raw Data
Storage Handle
2307611
Default Alt Text
D2731.id6931.diff (29 KB)
Attached To
Mode
D2731: Query Wikidata to enrich FANTOIR file
Attached
Detach File
Event Timeline
Log In to Comment