Page MenuHomeDevCentral

D2731.id6921.diff
No OneTemporary

D2731.id6921.diff

diff --git a/Cargo.toml b/Cargo.toml
--- a/Cargo.toml
+++ b/Cargo.toml
@@ -14,6 +14,10 @@
version = "~4.0.32"
features = ["derive"]
+[dependencies.reqwest]
+version = "~0.11.13"
+features = ["gzip", "deflate"]
+
[dependencies.sqlx]
version = "~0.6.2"
features = ["runtime-tokio-native-tls", "postgres", "chrono"]
diff --git a/src/commands/import.rs b/src/commands/import.rs
--- a/src/commands/import.rs
+++ b/src/commands/import.rs
@@ -12,12 +12,33 @@
use crate::db::*;
use crate::fantoir::FantoirEntry;
+impl ToTableInitializationArgs for &ImportArgs {
+ fn to_table_initialization_args (&self) -> TableInitializationArgs {
+ TableInitializationArgs {
+ table_name: self.fantoir_table.clone(),
+ create_table: self.create_table,
+ overwrite_table: self.overwrite_table,
+ }
+ }
+}
+
+async fn create_table(pool: &PgPool, table: &str) {
+ let queries = include_str!("../schema/fantoir.sql")
+ .replace("/*table*/fantoir", table)
+ .replace("/*index*/index_fantoir_", format!("index_{}_", table).as_ref());
+
+ run_multiple_queries(pool, &queries).await;
+}
+
pub async fn import(args: &ImportArgs, database_url: &str) {
let fd = File::open(&args.fantoir_file).await.expect("Can't open file.");
let pool = connect_to_db(database_url).await;
// Create/truncate table as needed and as allowed by options
- if let Err(error) = initialize_table(args, &pool).await {
+ let callback = async {
+ create_table(&pool, &args.fantoir_table).await;
+ };
+ if let Err(error) = initialize_table(&pool, callback, args).await {
eprintln!("{}", &error);
exit(1);
}
@@ -46,39 +67,3 @@
.await
}
}
-
-async fn initialize_table(args: &ImportArgs, pool: &PgPool) -> Result<(), String> {
- if is_table_exists(pool, &args.fantoir_table).await {
- if is_table_empty(&pool, &args.fantoir_table).await {
- return Ok(());
- }
-
- if args.overwrite_table {
- truncate_table(&pool, &args.fantoir_table).await;
- return Ok(());
- }
-
- return Err(format!(
- "Table {} already exists and contains rows. To overwrite it, run the import tool with -t option.",
- &args.fantoir_table
- ));
- }
-
- if args.create_table {
- create_table(&pool, &args.fantoir_table).await;
- return Ok(());
- }
-
- Err(format!(
- "Table {} doesn't exist. To create it, run the import tool with -c option.",
- &args.fantoir_table
- ))
-}
-
-async fn create_table(pool: &PgPool, table: &str) {
- let queries = include_str!("../schema/fantoir.sql")
- .replace("/*table*/fantoir", table)
- .replace("/*index*/index_fantoir_", format!("index_{}_", table).as_ref());
-
- run_multiple_queries(pool, &queries).await;
-}
diff --git a/src/commands/mod.rs b/src/commands/mod.rs
--- a/src/commands/mod.rs
+++ b/src/commands/mod.rs
@@ -1,3 +1,5 @@
//! Commands for the fantoir2db tool.
pub(crate) mod import;
+pub(crate) mod wikidata;
+pub(crate) mod update_foreign_keys;
diff --git a/src/commands/update_foreign_keys.rs b/src/commands/update_foreign_keys.rs
new file mode 100644
--- /dev/null
+++ b/src/commands/update_foreign_keys.rs
@@ -0,0 +1,12 @@
+//! Update foreign keys relations between tables
+
+use crate::db::*;
+
+pub async fn update (database_url: &str, fantoir_table: &str) {
+ let pool = connect_to_db(database_url).await;
+
+ let queries = include_str!("../schema/foreign_keys.sql")
+ .replace("/*table*/fantoir", fantoir_table);
+
+ run_multiple_queries(&pool, &queries).await;
+}
diff --git a/src/commands/wikidata.rs b/src/commands/wikidata.rs
new file mode 100644
--- /dev/null
+++ b/src/commands/wikidata.rs
@@ -0,0 +1,43 @@
+//! Query Wikidata SPARQL end-point and import result into PostgreSQL
+
+use std::process::exit;
+
+use crate::db::*;
+use crate::{sparql, WikidataArgs};
+
+static WIKIDATA_TABLE: &str = "fantoir_wikidata";
+
+impl ToTableInitializationArgs for &WikidataArgs {
+ fn to_table_initialization_args(&self) -> TableInitializationArgs {
+ TableInitializationArgs {
+ table_name: String::from(WIKIDATA_TABLE),
+ create_table: self.create_table,
+ overwrite_table: self.overwrite_table,
+ }
+ }
+}
+r
+pub async fn import (args: &WikidataArgs, database_url: &str) {
+ let pool = connect_to_db(database_url).await;
+
+ // Create/truncate table as needed and as allowed by options
+ let callback = async {
+ let queries = include_str!("../schema/wikidata.sql");
+ run_multiple_queries(&pool, &queries).await;
+ };
+ if let Err(error) = initialize_table(&pool, callback, args).await {
+ eprintln!("{}", &error);
+ exit(1);
+ }
+
+ // Query Wikidata
+ let user_agent = format!(
+ "{}/{} (https://databases.nasqueron.org/)",
+ env!("CARGO_PKG_NAME"), env!("CARGO_PKG_VERSION")
+ );
+
+ println!("User-Agent: {}", user_agent);
+ let client = sparql::Client::new("https://query.wikidata.org/sparql", &user_agent);
+ let entries = client.query(include_str!("../queries/wikidata.sparql"))
+ .await;
+}
diff --git a/src/db.rs b/src/db.rs
--- a/src/db.rs
+++ b/src/db.rs
@@ -3,11 +3,22 @@
//! This module provides helpers to interact with a PostgreSQL database.
//! Functions expect to work with an executor from sqlx crate.
+use std::future::Future;
use sqlx::PgPool;
use sqlx::postgres::PgPoolOptions;
static QUERIES_SEPARATOR: &str = "\n\n\n";
+pub struct TableInitializationArgs {
+ pub table_name: String,
+ pub create_table: bool,
+ pub overwrite_table: bool,
+}
+
+pub trait ToTableInitializationArgs {
+ fn to_table_initialization_args(&self) -> TableInitializationArgs;
+}
+
pub async fn connect_to_db (database_url: &str) -> PgPool {
PgPoolOptions::new()
.max_connections(3)
@@ -61,6 +72,41 @@
.expect("Can't truncate table.");
}
+pub async fn initialize_table<F, T> (
+ pool: &PgPool,
+ callback: F,
+ args: T
+) -> Result<(), String>
+ where F: Future, T: ToTableInitializationArgs
+{
+ let args = args.to_table_initialization_args();
+ if is_table_exists(pool, &args.table_name).await {
+ if is_table_empty(&pool, &args.table_name).await {
+ return Ok(());
+ }
+
+ if args.overwrite_table {
+ truncate_table(&pool, &args.table_name).await;
+ return Ok(());
+ }
+
+ return Err(format!(
+ "Table {} already exists and contains rows. To overwrite it, run the import tool with -t option.",
+ &args.table_name
+ ));
+ }
+
+ if args.create_table {
+ callback.await;
+ return Ok(());
+ }
+
+ Err(format!(
+ "Table {} doesn't exist. To create it, run the import tool with -c option.",
+ &args.table_name
+ ))
+}
+
pub async fn run_multiple_queries(pool: &PgPool, queries: &str) {
for query in queries.split(QUERIES_SEPARATOR) {
sqlx::query(&query)
diff --git a/src/main.rs b/src/main.rs
--- a/src/main.rs
+++ b/src/main.rs
@@ -2,11 +2,10 @@
use clap::{Args, Parser};
-use crate::commands::import::import;
-
mod commands;
mod db;
mod fantoir;
+mod sparql;
#[derive(Debug, Parser)]
#[command(name = "fantoir2db")]
@@ -15,6 +14,13 @@
/// Import from FANTOIR file generated by the DGFIP
#[command(arg_required_else_help = true)]
Import(ImportArgs),
+
+ /// Query Wikidata SPARQL end-point to enrich FANTOIR information
+ Wikidata(WikidataArgs),
+
+ /// Update foreign keys to the specified fantoir table
+ #[command(arg_required_else_help = true)]
+ UpdateForeignKeys(UpdateForeignKeysArgs),
}
#[derive(Debug, Args)]
@@ -35,6 +41,24 @@
fantoir_table: String,
}
+#[derive(Debug, Args)]
+pub struct WikidataArgs {
+ /// Create table if it doesn't exist
+ #[arg(short = 'c')]
+ create_table: bool,
+
+ /// Truncate table if it already exists, allowing the overwrite mode.
+ /// If not specified, the script will fail if table exists.
+ #[arg(short = 't')]
+ overwrite_table: bool,
+}
+
+#[derive(Debug, Args)]
+pub struct UpdateForeignKeysArgs {
+ /// The name of the FANTOIR table to use
+ fantoir_table: String,
+}
+
#[tokio::main]
async fn main() {
let command = FantoirCommand::parse(); // Will exit if argument is missing or --help/--version provided.
@@ -44,7 +68,13 @@
match command {
FantoirCommand::Import(args) => {
- import(&args, &database_url).await;
+ commands::import::import(&args, &database_url).await;
},
+ FantoirCommand::Wikidata(args) => {
+ commands::wikidata::import(&args, &database_url).await
+ }
+ FantoirCommand::UpdateForeignKeys(args) => {
+ commands::update_foreign_keys::update(&database_url, &args.fantoir_table).await;
+ }
};
}
diff --git a/src/queries/wikidata.sparql b/src/queries/wikidata.sparql
new file mode 100644
--- /dev/null
+++ b/src/queries/wikidata.sparql
@@ -0,0 +1,13 @@
+PREFIX bd: <http://www.bigdata.com/rdf#>
+PREFIX wikibase: <http://wikiba.se/ontology#>
+PREFIX wdt: <http://www.wikidata.org/prop/direct/>
+
+# Streets with FANTOIR code
+SELECT DISTINCT ?code_fantoir ?item ?itemLabel ?what
+WHERE
+{
+ ?item wdt:P3182 ?code_fantoir .
+ ?item wdt:P31 ?what
+ SERVICE wikibase:label { bd:serviceParam wikibase:language "fr". }
+}
+LIMIT 3
diff --git a/src/schema/foreign_keys.sql b/src/schema/foreign_keys.sql
new file mode 100644
--- /dev/null
+++ b/src/schema/foreign_keys.sql
@@ -0,0 +1,7 @@
+alter table fantoir_wikidata
+ drop constraint if exists fantoir_wikidata_code_fantoir_fk;
+
+
+alter table fantoir_wikidata
+ add constraint fantoir_wikidata_code_fantoir_fk
+ foreign key (code_fantoir) references /*table*/fantoir (code_fantoir);
diff --git a/src/schema/wikidata.sql b/src/schema/wikidata.sql
new file mode 100644
--- /dev/null
+++ b/src/schema/wikidata.sql
@@ -0,0 +1,29 @@
+-- This table matches Wikidata entities and FANTOIR codes.
+--
+-- If you provide several instructions, separate those with TWO blank lines.
+-- Indexes have to match every WHERE clause used against the database.
+--
+-- This schema is compiled as part of the program, as such you need to rebuild
+-- (`cargo build`) the project after any schema modification.
+
+CREATE TABLE IF NOT EXISTS /*table*/fantoir_wikidata
+(
+ -- Identifiers
+ code_fantoir char(11) NOT NULL
+ constraint /*index*/index_fantoir_wikidata_pk
+ primary key,
+ code_fantoir_wikidata char(11) NOT NULL,
+
+ -- Wikidata information
+ item varchar(12) NOT NULL,
+ item_label text,
+ what varchar(12) NOT NULL,
+
+ -- Constraints
+ UNIQUE (code_fantoir_wikidata)
+);
+
+
+CREATE INDEX CONCURRENTLY /*index*/index_fantoir_wikidata_voie_trigram
+ ON /*table*/fantoir_wikidata
+ USING gin (item_label gin_trgm_ops);
diff --git a/src/sparql.rs b/src/sparql.rs
new file mode 100644
--- /dev/null
+++ b/src/sparql.rs
@@ -0,0 +1,64 @@
+//! # SPARQL client
+
+use std::env;
+use reqwest::{ClientBuilder, Url};
+use reqwest::Client as HttpClient;
+use reqwest::header::{HeaderMap, HeaderValue};
+
+static DEFAULT_USER_AGENT: &str = concat!(
+ env!("CARGO_PKG_NAME"), "/", env!("CARGO_PKG_VERSION"),
+);
+
+pub struct Client {
+ pub endpoint: String,
+ client: HttpClient,
+}
+
+impl Client {
+ pub fn new (endpoint: &str, user_agent: &str) -> Self {
+ let client = ClientBuilder::new()
+ .user_agent(user_agent)
+ .default_headers( {
+ let mut headers = HeaderMap::new();
+ headers.insert("Accept", HeaderValue::from_static("Accept: application/sparql-results+json"));
+ headers
+ })
+ .gzip(true)
+ .deflate(true)
+ .build()
+ .expect("Can't build HTTP client");
+
+ Self {
+ endpoint: String::from(endpoint),
+ client,
+ }
+ }
+
+ pub fn with_default_user_agent(endpoint: &str) -> Self {
+ let user_agent = Self::determine_user_agent();
+
+ Self::new(endpoint, &user_agent)
+ }
+
+ fn determine_user_agent () -> String {
+ env::current_exe()
+ .ok()
+ .and_then(|path| path.file_name().map(|s| s.to_os_string()))
+ .and_then(|program_name| program_name.into_string().ok())
+ .unwrap_or(String::from(DEFAULT_USER_AGENT))
+ }
+
+ pub async fn query (&self, query: &str) {
+ let url = Url::parse_with_params(&self.endpoint, &[("query", query)])
+ .expect("Can't parse endpoint as absolute URL.");
+
+ let result = reqwest::get(url)
+ .await
+ .expect("Can't query endpoint")
+ .text()
+ .await
+ .expect("End-point didn't return a reply.");
+
+ println!("{}", result);
+ }
+}

File Metadata

Mime Type
text/plain
Expires
Tue, Oct 1, 14:20 (22 h, 22 s)
Storage Engine
blob
Storage Format
Raw Data
Storage Handle
2168489
Default Alt Text
D2731.id6921.diff (12 KB)

Event Timeline