Page Menu
Home
DevCentral
Search
Configure Global Search
Log In
Files
F3912398
D2735.id6938.diff
No One
Temporary
Actions
View File
Edit File
Delete File
View Transforms
Subscribe
Mute Notifications
Award Token
Flag For Later
Size
7 KB
Referenced Files
None
Subscribers
None
D2735.id6938.diff
View Options
diff --git a/src/commands/wikidata/mod.rs b/src/commands/wikidata/mod.rs
--- a/src/commands/wikidata/mod.rs
+++ b/src/commands/wikidata/mod.rs
@@ -1,15 +1,18 @@
//! Query Wikidata SPARQL end-point and import result into PostgreSQL
mod qualification;
+mod report;
use std::collections::HashMap;
use std::process::exit;
use oxrdf::Term;
-use sqlx::PgPool;
+use sqlx::{Error, PgPool};
+use sqlx::error::DatabaseError;
use crate::db::*;
-use crate::{sparql, WikidataArgs};
+use crate::{main, sparql, WikidataArgs};
use crate::commands::wikidata::qualification::determine_p31_winner;
+use crate::commands::wikidata::report::print_maintenance_report;
use crate::fantoir::{fix_fantoir_code, FixedFantoirCode};
use crate::services::http_client::get_user_agent;
use crate::services::query::search_fantoir_code;
@@ -56,18 +59,72 @@
// Consolidate entries and insert them into the database.
// To avoid an async closure, we don't use HOF pattern.
+ let mut maintenance_report = HashMap::new();
for (key, candidates) in what_map {
if let Some(entry) = WikidataEntry::consolidate_set(&pool, &key, candidates).await {
- entry.insert_to_db(&pool).await;
+ if let Err(error) = entry.insert_to_db(&pool).await {
+ if args.maintenance_report {
+ let error_category = match error {
+ Error::Database(error) => {
+ if let Some(index) = error.constraint() {
+ match index {
+ "index_fantoir_wikidata_pk" => "Duplicate FANTOIR code",
+ "fantoir_wikidata_code_fantoir_fk" => "Not in FANTOIR national file",
+ _ => {
+ eprintln!("Unknown constraint index: {}", index);
+
+ unreachable!()
+ },
+ }
+ } else if let Some(code) = error.code() {
+ let code = code.to_string();
+ match code.as_str() {
+ "22001" => "FANTOIR code too long",
+ _ => {
+ eprintln!("{} {}", code, error.message());
+
+ "Misc database issues"
+ },
+ }
+ } else {
+ println!("{}", error.message());
+
+ "Database issue without error code"
+ }
+ },
+ _ => unimplemented!(),
+ };
+
+ let entry = maintenance_report
+ .entry(error_category)
+ .or_insert(Vec::new());
+ entry.push(key);
+ } else {
+ eprintln!();
+ eprintln!("Can't insert Wikidata information for the following entry:");
+ eprintln!("{:?}", entry);
+ eprintln!("{}", error);
+ }
+ }
continue;
}
- eprintln!();
- eprintln!("Can't insert Wikidata information for the following entry:");
- eprintln!("{:?}", &key);
- eprintln!("Can't resolve FANTOIR code.");
+ if args.maintenance_report {
+ let entry = maintenance_report
+ .entry("Can't resolve FANTOIR code")
+ .or_insert(Vec::new());
+ entry.push(key);
+ } else {
+ eprintln!();
+ eprintln!("Can't insert Wikidata information for the following entry:");
+ eprintln!("{:?}", &key);
+ eprintln!("Can't resolve FANTOIR code.");
+ }
}
+ if args.maintenance_report {
+ print_maintenance_report(maintenance_report);
+ }
}
/* -------------------------------------------------------------
@@ -108,10 +165,10 @@
}
#[derive(Debug, Clone, Eq, PartialEq, Hash)]
-struct WikidataEntryKey {
- code_fantoir_wikidata: String,
- item: String,
- item_label: String,
+pub struct WikidataEntryKey {
+ pub code_fantoir_wikidata: String,
+ pub item: String,
+ pub item_label: String,
}
impl WikidataEntryKey {
@@ -144,7 +201,7 @@
})
}
- async fn insert_to_db (&self, pool: &PgPool) {
+ async fn insert_to_db (&self, pool: &PgPool) -> Result<(), sqlx::Error> {
let mut query = format!("INSERT INTO {}", WIKIDATA_TABLE);
query.push_str(
r#"
@@ -153,7 +210,7 @@
($1, $2, $3, $4, $5)"#
);
- if let Err(error) = sqlx::query(&query)
+ sqlx::query(&query)
.bind(&self.code_fantoir)
.bind(&self.code_fantoir_wikidata)
.bind(&self.item)
@@ -161,12 +218,8 @@
.bind(&self.what)
.execute(pool)
- .await {
- eprintln!();
- eprintln!("Can't insert Wikidata information for the following entry:");
- eprintln!("{:?}", self);
- eprintln!("{}", error);
- }
+ .await
+ .map(|result| ())
}
}
diff --git a/src/commands/wikidata/report.rs b/src/commands/wikidata/report.rs
new file mode 100644
--- /dev/null
+++ b/src/commands/wikidata/report.rs
@@ -0,0 +1,46 @@
+use std::cmp::Ordering;
+use std::collections::HashMap;
+use crate::commands::wikidata::WikidataEntryKey;
+
+/* -------------------------------------------------------------
+ Report wiki code
+ - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - */
+
+pub fn print_maintenance_report (maintenance_report: HashMap<&'static str, Vec<WikidataEntryKey>>) {
+ for (section_title, mut entries) in maintenance_report {
+ println!("== {} ==", section_title);
+ println!(r#"
+{{| class="wikitable sortable"
+|+ Items with issue
+|-
+! Item !! Item label in French !! FANTOIR code"#);
+
+ entries.sort();
+ for entry in entries {
+ println!(r#"|-
+| [[{}]] || {} || {}"#, &entry.item, &entry.item_label, &entry.code_fantoir_wikidata);
+ }
+
+ println!(r#"|}}"#);
+ println!();
+ }
+
+ println!("== Notes ==");
+ println!("This maintenance report has been generated automatically by fantoir-datasource tool, based on the issues encountered to cross-validate Wikidata entries and FANTOIR national file.");
+}
+
+/* -------------------------------------------------------------
+ Sort for report entries
+ - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - */
+
+impl PartialOrd for WikidataEntryKey {
+ fn partial_cmp(&self, other: &Self) -> Option<Ordering> {
+ Some(self.cmp(other))
+ }
+}
+
+impl Ord for WikidataEntryKey {
+ fn cmp(&self, other: &Self) -> Ordering {
+ self.code_fantoir_wikidata.cmp(&other.code_fantoir_wikidata)
+ }
+}
diff --git a/src/main.rs b/src/main.rs
--- a/src/main.rs
+++ b/src/main.rs
@@ -63,6 +63,10 @@
/// If not specified, the script will fail if table exists.
#[arg(short = 't')]
overwrite_table: bool,
+
+ /// Generate a Wikidata maintenance report instead to print errors to stderr
+ #[arg(long)]
+ maintenance_report: bool,
}
#[derive(Debug, Args)]
File Metadata
Details
Attached
Mime Type
text/plain
Expires
Fri, Dec 20, 05:45 (20 h, 53 m)
Storage Engine
blob
Storage Format
Raw Data
Storage Handle
2307566
Default Alt Text
D2735.id6938.diff (7 KB)
Attached To
Mode
D2735: Generate Wikidate health report for FANTOIR code
Attached
Detach File
Event Timeline
Log In to Comment