Page MenuHomeDevCentral

D2735.id6949.diff
No OneTemporary

D2735.id6949.diff

diff --git a/src/commands/wikidata/mod.rs b/src/commands/wikidata/mod.rs
--- a/src/commands/wikidata/mod.rs
+++ b/src/commands/wikidata/mod.rs
@@ -1,15 +1,18 @@
//! Query Wikidata SPARQL end-point and import result into PostgreSQL
mod qualification;
+mod report;
use std::collections::HashMap;
use std::process::exit;
+
use oxrdf::Term;
use sqlx::PgPool;
+use crate::commands::wikidata::qualification::determine_p31_winner;
+use crate::commands::wikidata::report::*;
use crate::db::*;
use crate::WikidataArgs;
-use crate::commands::wikidata::qualification::determine_p31_winner;
use crate::fantoir::{fix_fantoir_code, FixedFantoirCode};
use crate::services::query::search_fantoir_code;
use crate::services::sparql::*;
@@ -56,18 +59,38 @@
// Consolidate entries and insert them into the database.
// To avoid an async closure, we don't use HOF pattern.
+ let mut maintenance_report = HashMap::new();
for (key, candidates) in what_map {
if let Some(entry) = WikidataEntry::consolidate_set(&pool, &key, candidates).await {
- entry.insert_to_db(&pool).await;
+ if let Err(error) = entry.insert_to_db(&pool).await {
+ if args.maintenance_report {
+ update_report(&mut maintenance_report, key, error);
+ } else {
+ eprintln!();
+ eprintln!("Can't insert Wikidata information for the following entry:");
+ eprintln!("{:?}", entry);
+ eprintln!("{}", error);
+ }
+ }
continue;
}
- eprintln!();
- eprintln!("Can't insert Wikidata information for the following entry:");
- eprintln!("{:?}", &key);
- eprintln!("Can't resolve FANTOIR code.");
+ if args.maintenance_report {
+ let entry = maintenance_report
+ .entry("Can't resolve FANTOIR code")
+ .or_insert(Vec::new());
+ entry.push(key);
+ } else {
+ eprintln!();
+ eprintln!("Can't insert Wikidata information for the following entry:");
+ eprintln!("{:?}", &key);
+ eprintln!("Can't resolve FANTOIR code.");
+ }
}
+ if args.maintenance_report {
+ print_maintenance_report(maintenance_report);
+ }
}
/* -------------------------------------------------------------
@@ -108,10 +131,10 @@
}
#[derive(Debug, Clone, Eq, PartialEq, Hash)]
-struct WikidataEntryKey {
- code_fantoir_wikidata: String,
- item: String,
- item_label: String,
+pub struct WikidataEntryKey {
+ pub code_fantoir_wikidata: String,
+ pub item: String,
+ pub item_label: String,
}
impl WikidataEntryKey {
@@ -144,7 +167,7 @@
})
}
- async fn insert_to_db (&self, pool: &PgPool) {
+ async fn insert_to_db (&self, pool: &PgPool) -> Result<(), sqlx::Error> {
let mut query = format!("INSERT INTO {}", WIKIDATA_TABLE);
query.push_str(
r#"
@@ -153,7 +176,7 @@
($1, $2, $3, $4, $5)"#
);
- if let Err(error) = sqlx::query(&query)
+ sqlx::query(&query)
.bind(&self.code_fantoir)
.bind(&self.code_fantoir_wikidata)
.bind(&self.item)
@@ -161,12 +184,8 @@
.bind(&self.what)
.execute(pool)
- .await {
- eprintln!();
- eprintln!("Can't insert Wikidata information for the following entry:");
- eprintln!("{:?}", self);
- eprintln!("{}", error);
- }
+ .await
+ .map(|_result| ())
}
}
diff --git a/src/commands/wikidata/report.rs b/src/commands/wikidata/report.rs
new file mode 100644
--- /dev/null
+++ b/src/commands/wikidata/report.rs
@@ -0,0 +1,83 @@
+use std::cmp::Ordering;
+use std::collections::HashMap;
+
+use sqlx::Error;
+
+use crate::commands::wikidata::WikidataEntryKey;
+
+type MaintenanceReport = HashMap<&'static str, Vec<WikidataEntryKey>>;
+
+/* -------------------------------------------------------------
+ Report update and wiki code
+ - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - */
+
+pub fn update_report (maintenance_report: &mut MaintenanceReport, key: WikidataEntryKey, error: Error) {
+ let error_category = match error {
+ Error::Database(error) => {
+ if let Some(index) = error.constraint() {
+ match index {
+ "index_fantoir_wikidata_pk" => "Duplicate FANTOIR code",
+ "fantoir_wikidata_code_fantoir_fk" => "Not in FANTOIR national file",
+ _ => {
+ eprintln!("Unknown constraint index: {}", index);
+
+ unreachable!()
+ },
+ }
+ } else if let Some(code) = error.code() {
+ let code = code.to_string();
+ match code.as_str() {
+ "22001" => "FANTOIR code too long",
+ _ => unimplemented!(),
+ }
+ } else {
+ unimplemented!()
+ }
+ },
+ _ => unimplemented!(),
+ };
+
+ let entry = maintenance_report
+ .entry(error_category)
+ .or_insert(Vec::new());
+ entry.push(key);
+}
+
+pub fn print_maintenance_report (maintenance_report: MaintenanceReport) {
+ for (section_title, mut entries) in maintenance_report {
+ println!("== {} ==", section_title);
+ println!(r#"
+{{| class="wikitable sortable"
+|+ Items with issue
+|-
+! Item !! Item label in French !! FANTOIR code"#);
+
+ entries.sort();
+ for entry in entries {
+ println!(r#"|-
+| [[{}]] || {} || {}"#, &entry.item, &entry.item_label, &entry.code_fantoir_wikidata);
+ }
+
+ println!(r#"|}}"#);
+ println!();
+ }
+
+ println!("== Notes ==");
+ println!("This maintenance report has been generated automatically by fantoir-datasource tool, based on the issues encountered to cross-validate Wikidata entries and FANTOIR national file.");
+}
+
+/* -------------------------------------------------------------
+ Sort for report entries
+ - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - */
+
+impl PartialOrd for WikidataEntryKey {
+ fn partial_cmp(&self, other: &Self) -> Option<Ordering> {
+ Some(self.cmp(other))
+ }
+}
+
+impl Ord for WikidataEntryKey {
+ fn cmp(&self, other: &Self) -> Ordering {
+ self.code_fantoir_wikidata.cmp(&other.code_fantoir_wikidata)
+ }
+}
diff --git a/src/main.rs b/src/main.rs
--- a/src/main.rs
+++ b/src/main.rs
@@ -62,6 +62,10 @@
/// If not specified, the script will fail if table exists.
#[arg(short = 't')]
overwrite_table: bool,
+
+ /// Generate a Wikidata maintenance report instead to print errors to stderr
+ #[arg(long)]
+ maintenance_report: bool,
}
#[derive(Debug, Args)]

File Metadata

Mime Type
text/plain
Expires
Sun, Nov 24, 08:16 (9 h, 29 m)
Storage Engine
blob
Storage Format
Raw Data
Storage Handle
2259823
Default Alt Text
D2735.id6949.diff (6 KB)

Event Timeline