Page Menu
Home
DevCentral
Search
Configure Global Search
Log In
Files
F3909124
D2731.id6921.diff
No One
Temporary
Actions
View File
Edit File
Delete File
View Transforms
Subscribe
Mute Notifications
Award Token
Flag For Later
Size
12 KB
Referenced Files
None
Subscribers
None
D2731.id6921.diff
View Options
diff --git a/Cargo.toml b/Cargo.toml
--- a/Cargo.toml
+++ b/Cargo.toml
@@ -14,6 +14,10 @@
version = "~4.0.32"
features = ["derive"]
+[dependencies.reqwest]
+version = "~0.11.13"
+features = ["gzip", "deflate"]
+
[dependencies.sqlx]
version = "~0.6.2"
features = ["runtime-tokio-native-tls", "postgres", "chrono"]
diff --git a/src/commands/import.rs b/src/commands/import.rs
--- a/src/commands/import.rs
+++ b/src/commands/import.rs
@@ -12,12 +12,33 @@
use crate::db::*;
use crate::fantoir::FantoirEntry;
+impl ToTableInitializationArgs for &ImportArgs {
+ fn to_table_initialization_args (&self) -> TableInitializationArgs {
+ TableInitializationArgs {
+ table_name: self.fantoir_table.clone(),
+ create_table: self.create_table,
+ overwrite_table: self.overwrite_table,
+ }
+ }
+}
+
+async fn create_table(pool: &PgPool, table: &str) {
+ let queries = include_str!("../schema/fantoir.sql")
+ .replace("/*table*/fantoir", table)
+ .replace("/*index*/index_fantoir_", format!("index_{}_", table).as_ref());
+
+ run_multiple_queries(pool, &queries).await;
+}
+
pub async fn import(args: &ImportArgs, database_url: &str) {
let fd = File::open(&args.fantoir_file).await.expect("Can't open file.");
let pool = connect_to_db(database_url).await;
// Create/truncate table as needed and as allowed by options
- if let Err(error) = initialize_table(args, &pool).await {
+ let callback = async {
+ create_table(&pool, &args.fantoir_table).await;
+ };
+ if let Err(error) = initialize_table(&pool, callback, args).await {
eprintln!("{}", &error);
exit(1);
}
@@ -46,39 +67,3 @@
.await
}
}
-
-async fn initialize_table(args: &ImportArgs, pool: &PgPool) -> Result<(), String> {
- if is_table_exists(pool, &args.fantoir_table).await {
- if is_table_empty(&pool, &args.fantoir_table).await {
- return Ok(());
- }
-
- if args.overwrite_table {
- truncate_table(&pool, &args.fantoir_table).await;
- return Ok(());
- }
-
- return Err(format!(
- "Table {} already exists and contains rows. To overwrite it, run the import tool with -t option.",
- &args.fantoir_table
- ));
- }
-
- if args.create_table {
- create_table(&pool, &args.fantoir_table).await;
- return Ok(());
- }
-
- Err(format!(
- "Table {} doesn't exist. To create it, run the import tool with -c option.",
- &args.fantoir_table
- ))
-}
-
-async fn create_table(pool: &PgPool, table: &str) {
- let queries = include_str!("../schema/fantoir.sql")
- .replace("/*table*/fantoir", table)
- .replace("/*index*/index_fantoir_", format!("index_{}_", table).as_ref());
-
- run_multiple_queries(pool, &queries).await;
-}
diff --git a/src/commands/mod.rs b/src/commands/mod.rs
--- a/src/commands/mod.rs
+++ b/src/commands/mod.rs
@@ -1,3 +1,5 @@
//! Commands for the fantoir2db tool.
pub(crate) mod import;
+pub(crate) mod wikidata;
+pub(crate) mod update_foreign_keys;
diff --git a/src/commands/update_foreign_keys.rs b/src/commands/update_foreign_keys.rs
new file mode 100644
--- /dev/null
+++ b/src/commands/update_foreign_keys.rs
@@ -0,0 +1,12 @@
+//! Update foreign keys relations between tables
+
+use crate::db::*;
+
+pub async fn update (database_url: &str, fantoir_table: &str) {
+ let pool = connect_to_db(database_url).await;
+
+ let queries = include_str!("../schema/foreign_keys.sql")
+ .replace("/*table*/fantoir", fantoir_table);
+
+ run_multiple_queries(&pool, &queries).await;
+}
diff --git a/src/commands/wikidata.rs b/src/commands/wikidata.rs
new file mode 100644
--- /dev/null
+++ b/src/commands/wikidata.rs
@@ -0,0 +1,43 @@
+//! Query Wikidata SPARQL end-point and import result into PostgreSQL
+
+use std::process::exit;
+
+use crate::db::*;
+use crate::{sparql, WikidataArgs};
+
+static WIKIDATA_TABLE: &str = "fantoir_wikidata";
+
+impl ToTableInitializationArgs for &WikidataArgs {
+ fn to_table_initialization_args(&self) -> TableInitializationArgs {
+ TableInitializationArgs {
+ table_name: String::from(WIKIDATA_TABLE),
+ create_table: self.create_table,
+ overwrite_table: self.overwrite_table,
+ }
+ }
+}
+r
+pub async fn import (args: &WikidataArgs, database_url: &str) {
+ let pool = connect_to_db(database_url).await;
+
+ // Create/truncate table as needed and as allowed by options
+ let callback = async {
+ let queries = include_str!("../schema/wikidata.sql");
+ run_multiple_queries(&pool, &queries).await;
+ };
+ if let Err(error) = initialize_table(&pool, callback, args).await {
+ eprintln!("{}", &error);
+ exit(1);
+ }
+
+ // Query Wikidata
+ let user_agent = format!(
+ "{}/{} (https://databases.nasqueron.org/)",
+ env!("CARGO_PKG_NAME"), env!("CARGO_PKG_VERSION")
+ );
+
+ println!("User-Agent: {}", user_agent);
+ let client = sparql::Client::new("https://query.wikidata.org/sparql", &user_agent);
+ let entries = client.query(include_str!("../queries/wikidata.sparql"))
+ .await;
+}
diff --git a/src/db.rs b/src/db.rs
--- a/src/db.rs
+++ b/src/db.rs
@@ -3,11 +3,22 @@
//! This module provides helpers to interact with a PostgreSQL database.
//! Functions expect to work with an executor from sqlx crate.
+use std::future::Future;
use sqlx::PgPool;
use sqlx::postgres::PgPoolOptions;
static QUERIES_SEPARATOR: &str = "\n\n\n";
+pub struct TableInitializationArgs {
+ pub table_name: String,
+ pub create_table: bool,
+ pub overwrite_table: bool,
+}
+
+pub trait ToTableInitializationArgs {
+ fn to_table_initialization_args(&self) -> TableInitializationArgs;
+}
+
pub async fn connect_to_db (database_url: &str) -> PgPool {
PgPoolOptions::new()
.max_connections(3)
@@ -61,6 +72,41 @@
.expect("Can't truncate table.");
}
+pub async fn initialize_table<F, T> (
+ pool: &PgPool,
+ callback: F,
+ args: T
+) -> Result<(), String>
+ where F: Future, T: ToTableInitializationArgs
+{
+ let args = args.to_table_initialization_args();
+ if is_table_exists(pool, &args.table_name).await {
+ if is_table_empty(&pool, &args.table_name).await {
+ return Ok(());
+ }
+
+ if args.overwrite_table {
+ truncate_table(&pool, &args.table_name).await;
+ return Ok(());
+ }
+
+ return Err(format!(
+ "Table {} already exists and contains rows. To overwrite it, run the import tool with -t option.",
+ &args.table_name
+ ));
+ }
+
+ if args.create_table {
+ callback.await;
+ return Ok(());
+ }
+
+ Err(format!(
+ "Table {} doesn't exist. To create it, run the import tool with -c option.",
+ &args.table_name
+ ))
+}
+
pub async fn run_multiple_queries(pool: &PgPool, queries: &str) {
for query in queries.split(QUERIES_SEPARATOR) {
sqlx::query(&query)
diff --git a/src/main.rs b/src/main.rs
--- a/src/main.rs
+++ b/src/main.rs
@@ -2,11 +2,10 @@
use clap::{Args, Parser};
-use crate::commands::import::import;
-
mod commands;
mod db;
mod fantoir;
+mod sparql;
#[derive(Debug, Parser)]
#[command(name = "fantoir2db")]
@@ -15,6 +14,13 @@
/// Import from FANTOIR file generated by the DGFIP
#[command(arg_required_else_help = true)]
Import(ImportArgs),
+
+ /// Query Wikidata SPARQL end-point to enrich FANTOIR information
+ Wikidata(WikidataArgs),
+
+ /// Update foreign keys to the specified fantoir table
+ #[command(arg_required_else_help = true)]
+ UpdateForeignKeys(UpdateForeignKeysArgs),
}
#[derive(Debug, Args)]
@@ -35,6 +41,24 @@
fantoir_table: String,
}
+#[derive(Debug, Args)]
+pub struct WikidataArgs {
+ /// Create table if it doesn't exist
+ #[arg(short = 'c')]
+ create_table: bool,
+
+ /// Truncate table if it already exists, allowing the overwrite mode.
+ /// If not specified, the script will fail if table exists.
+ #[arg(short = 't')]
+ overwrite_table: bool,
+}
+
+#[derive(Debug, Args)]
+pub struct UpdateForeignKeysArgs {
+ /// The name of the FANTOIR table to use
+ fantoir_table: String,
+}
+
#[tokio::main]
async fn main() {
let command = FantoirCommand::parse(); // Will exit if argument is missing or --help/--version provided.
@@ -44,7 +68,13 @@
match command {
FantoirCommand::Import(args) => {
- import(&args, &database_url).await;
+ commands::import::import(&args, &database_url).await;
},
+ FantoirCommand::Wikidata(args) => {
+ commands::wikidata::import(&args, &database_url).await
+ }
+ FantoirCommand::UpdateForeignKeys(args) => {
+ commands::update_foreign_keys::update(&database_url, &args.fantoir_table).await;
+ }
};
}
diff --git a/src/queries/wikidata.sparql b/src/queries/wikidata.sparql
new file mode 100644
--- /dev/null
+++ b/src/queries/wikidata.sparql
@@ -0,0 +1,13 @@
+PREFIX bd: <http://www.bigdata.com/rdf#>
+PREFIX wikibase: <http://wikiba.se/ontology#>
+PREFIX wdt: <http://www.wikidata.org/prop/direct/>
+
+# Streets with FANTOIR code
+SELECT DISTINCT ?code_fantoir ?item ?itemLabel ?what
+WHERE
+{
+ ?item wdt:P3182 ?code_fantoir .
+ ?item wdt:P31 ?what
+ SERVICE wikibase:label { bd:serviceParam wikibase:language "fr". }
+}
+LIMIT 3
diff --git a/src/schema/foreign_keys.sql b/src/schema/foreign_keys.sql
new file mode 100644
--- /dev/null
+++ b/src/schema/foreign_keys.sql
@@ -0,0 +1,7 @@
+alter table fantoir_wikidata
+ drop constraint if exists fantoir_wikidata_code_fantoir_fk;
+
+
+alter table fantoir_wikidata
+ add constraint fantoir_wikidata_code_fantoir_fk
+ foreign key (code_fantoir) references /*table*/fantoir (code_fantoir);
diff --git a/src/schema/wikidata.sql b/src/schema/wikidata.sql
new file mode 100644
--- /dev/null
+++ b/src/schema/wikidata.sql
@@ -0,0 +1,29 @@
+-- This table matches Wikidata entities and FANTOIR codes.
+--
+-- If you provide several instructions, separate those with TWO blank lines.
+-- Indexes have to match every WHERE clause used against the database.
+--
+-- This schema is compiled as part of the program, as such you need to rebuild
+-- (`cargo build`) the project after any schema modification.
+
+CREATE TABLE IF NOT EXISTS /*table*/fantoir_wikidata
+(
+ -- Identifiers
+ code_fantoir char(11) NOT NULL
+ constraint /*index*/index_fantoir_wikidata_pk
+ primary key,
+ code_fantoir_wikidata char(11) NOT NULL,
+
+ -- Wikidata information
+ item varchar(12) NOT NULL,
+ item_label text,
+ what varchar(12) NOT NULL,
+
+ -- Constraints
+ UNIQUE (code_fantoir_wikidata)
+);
+
+
+CREATE INDEX CONCURRENTLY /*index*/index_fantoir_wikidata_voie_trigram
+ ON /*table*/fantoir_wikidata
+ USING gin (item_label gin_trgm_ops);
diff --git a/src/sparql.rs b/src/sparql.rs
new file mode 100644
--- /dev/null
+++ b/src/sparql.rs
@@ -0,0 +1,64 @@
+//! # SPARQL client
+
+use std::env;
+use reqwest::{ClientBuilder, Url};
+use reqwest::Client as HttpClient;
+use reqwest::header::{HeaderMap, HeaderValue};
+
+static DEFAULT_USER_AGENT: &str = concat!(
+ env!("CARGO_PKG_NAME"), "/", env!("CARGO_PKG_VERSION"),
+);
+
+pub struct Client {
+ pub endpoint: String,
+ client: HttpClient,
+}
+
+impl Client {
+ pub fn new (endpoint: &str, user_agent: &str) -> Self {
+ let client = ClientBuilder::new()
+ .user_agent(user_agent)
+ .default_headers( {
+ let mut headers = HeaderMap::new();
+ headers.insert("Accept", HeaderValue::from_static("Accept: application/sparql-results+json"));
+ headers
+ })
+ .gzip(true)
+ .deflate(true)
+ .build()
+ .expect("Can't build HTTP client");
+
+ Self {
+ endpoint: String::from(endpoint),
+ client,
+ }
+ }
+
+ pub fn with_default_user_agent(endpoint: &str) -> Self {
+ let user_agent = Self::determine_user_agent();
+
+ Self::new(endpoint, &user_agent)
+ }
+
+ fn determine_user_agent () -> String {
+ env::current_exe()
+ .ok()
+ .and_then(|path| path.file_name().map(|s| s.to_os_string()))
+ .and_then(|program_name| program_name.into_string().ok())
+ .unwrap_or(String::from(DEFAULT_USER_AGENT))
+ }
+
+ pub async fn query (&self, query: &str) {
+ let url = Url::parse_with_params(&self.endpoint, &[("query", query)])
+ .expect("Can't parse endpoint as absolute URL.");
+
+ let result = reqwest::get(url)
+ .await
+ .expect("Can't query endpoint")
+ .text()
+ .await
+ .expect("End-point didn't return a reply.");
+
+ println!("{}", result);
+ }
+}
File Metadata
Details
Attached
Mime Type
text/plain
Expires
Thu, Dec 19, 18:23 (15 h, 42 m)
Storage Engine
blob
Storage Format
Raw Data
Storage Handle
2306695
Default Alt Text
D2731.id6921.diff (12 KB)
Attached To
Mode
D2731: Query Wikidata to enrich FANTOIR file
Attached
Detach File
Event Timeline
Log In to Comment