Page Menu
Home
DevCentral
Search
Configure Global Search
Log In
Files
F3768490
D2735.id6949.diff
No One
Temporary
Actions
View File
Edit File
Delete File
View Transforms
Subscribe
Mute Notifications
Award Token
Flag For Later
Size
6 KB
Referenced Files
None
Subscribers
None
D2735.id6949.diff
View Options
diff --git a/src/commands/wikidata/mod.rs b/src/commands/wikidata/mod.rs
--- a/src/commands/wikidata/mod.rs
+++ b/src/commands/wikidata/mod.rs
@@ -1,15 +1,18 @@
//! Query Wikidata SPARQL end-point and import result into PostgreSQL
mod qualification;
+mod report;
use std::collections::HashMap;
use std::process::exit;
+
use oxrdf::Term;
use sqlx::PgPool;
+use crate::commands::wikidata::qualification::determine_p31_winner;
+use crate::commands::wikidata::report::*;
use crate::db::*;
use crate::WikidataArgs;
-use crate::commands::wikidata::qualification::determine_p31_winner;
use crate::fantoir::{fix_fantoir_code, FixedFantoirCode};
use crate::services::query::search_fantoir_code;
use crate::services::sparql::*;
@@ -56,18 +59,38 @@
// Consolidate entries and insert them into the database.
// To avoid an async closure, we don't use HOF pattern.
+ let mut maintenance_report = HashMap::new();
for (key, candidates) in what_map {
if let Some(entry) = WikidataEntry::consolidate_set(&pool, &key, candidates).await {
- entry.insert_to_db(&pool).await;
+ if let Err(error) = entry.insert_to_db(&pool).await {
+ if args.maintenance_report {
+ update_report(&mut maintenance_report, key, error);
+ } else {
+ eprintln!();
+ eprintln!("Can't insert Wikidata information for the following entry:");
+ eprintln!("{:?}", entry);
+ eprintln!("{}", error);
+ }
+ }
continue;
}
- eprintln!();
- eprintln!("Can't insert Wikidata information for the following entry:");
- eprintln!("{:?}", &key);
- eprintln!("Can't resolve FANTOIR code.");
+ if args.maintenance_report {
+ let entry = maintenance_report
+ .entry("Can't resolve FANTOIR code")
+ .or_insert(Vec::new());
+ entry.push(key);
+ } else {
+ eprintln!();
+ eprintln!("Can't insert Wikidata information for the following entry:");
+ eprintln!("{:?}", &key);
+ eprintln!("Can't resolve FANTOIR code.");
+ }
}
+ if args.maintenance_report {
+ print_maintenance_report(maintenance_report);
+ }
}
/* -------------------------------------------------------------
@@ -108,10 +131,10 @@
}
#[derive(Debug, Clone, Eq, PartialEq, Hash)]
-struct WikidataEntryKey {
- code_fantoir_wikidata: String,
- item: String,
- item_label: String,
+pub struct WikidataEntryKey {
+ pub code_fantoir_wikidata: String,
+ pub item: String,
+ pub item_label: String,
}
impl WikidataEntryKey {
@@ -144,7 +167,7 @@
})
}
- async fn insert_to_db (&self, pool: &PgPool) {
+ async fn insert_to_db (&self, pool: &PgPool) -> Result<(), sqlx::Error> {
let mut query = format!("INSERT INTO {}", WIKIDATA_TABLE);
query.push_str(
r#"
@@ -153,7 +176,7 @@
($1, $2, $3, $4, $5)"#
);
- if let Err(error) = sqlx::query(&query)
+ sqlx::query(&query)
.bind(&self.code_fantoir)
.bind(&self.code_fantoir_wikidata)
.bind(&self.item)
@@ -161,12 +184,8 @@
.bind(&self.what)
.execute(pool)
- .await {
- eprintln!();
- eprintln!("Can't insert Wikidata information for the following entry:");
- eprintln!("{:?}", self);
- eprintln!("{}", error);
- }
+ .await
+ .map(|_result| ())
}
}
diff --git a/src/commands/wikidata/report.rs b/src/commands/wikidata/report.rs
new file mode 100644
--- /dev/null
+++ b/src/commands/wikidata/report.rs
@@ -0,0 +1,83 @@
+use std::cmp::Ordering;
+use std::collections::HashMap;
+
+use sqlx::Error;
+
+use crate::commands::wikidata::WikidataEntryKey;
+
+type MaintenanceReport = HashMap<&'static str, Vec<WikidataEntryKey>>;
+
+/* -------------------------------------------------------------
+ Report update and wiki code
+ - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - */
+
+pub fn update_report (maintenance_report: &mut MaintenanceReport, key: WikidataEntryKey, error: Error) {
+ let error_category = match error {
+ Error::Database(error) => {
+ if let Some(index) = error.constraint() {
+ match index {
+ "index_fantoir_wikidata_pk" => "Duplicate FANTOIR code",
+ "fantoir_wikidata_code_fantoir_fk" => "Not in FANTOIR national file",
+ _ => {
+ eprintln!("Unknown constraint index: {}", index);
+
+ unreachable!()
+ },
+ }
+ } else if let Some(code) = error.code() {
+ let code = code.to_string();
+ match code.as_str() {
+ "22001" => "FANTOIR code too long",
+ _ => unimplemented!(),
+ }
+ } else {
+ unimplemented!()
+ }
+ },
+ _ => unimplemented!(),
+ };
+
+ let entry = maintenance_report
+ .entry(error_category)
+ .or_insert(Vec::new());
+ entry.push(key);
+}
+
+pub fn print_maintenance_report (maintenance_report: MaintenanceReport) {
+ for (section_title, mut entries) in maintenance_report {
+ println!("== {} ==", section_title);
+ println!(r#"
+{{| class="wikitable sortable"
+|+ Items with issue
+|-
+! Item !! Item label in French !! FANTOIR code"#);
+
+ entries.sort();
+ for entry in entries {
+ println!(r#"|-
+| [[{}]] || {} || {}"#, &entry.item, &entry.item_label, &entry.code_fantoir_wikidata);
+ }
+
+ println!(r#"|}}"#);
+ println!();
+ }
+
+ println!("== Notes ==");
+ println!("This maintenance report has been generated automatically by fantoir-datasource tool, based on the issues encountered to cross-validate Wikidata entries and FANTOIR national file.");
+}
+
+/* -------------------------------------------------------------
+ Sort for report entries
+ - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - */
+
+impl PartialOrd for WikidataEntryKey {
+ fn partial_cmp(&self, other: &Self) -> Option<Ordering> {
+ Some(self.cmp(other))
+ }
+}
+
+impl Ord for WikidataEntryKey {
+ fn cmp(&self, other: &Self) -> Ordering {
+ self.code_fantoir_wikidata.cmp(&other.code_fantoir_wikidata)
+ }
+}
diff --git a/src/main.rs b/src/main.rs
--- a/src/main.rs
+++ b/src/main.rs
@@ -62,6 +62,10 @@
/// If not specified, the script will fail if table exists.
#[arg(short = 't')]
overwrite_table: bool,
+
+ /// Generate a Wikidata maintenance report instead to print errors to stderr
+ #[arg(long)]
+ maintenance_report: bool,
}
#[derive(Debug, Args)]
File Metadata
Details
Attached
Mime Type
text/plain
Expires
Sun, Nov 24, 08:16 (9 h, 29 m)
Storage Engine
blob
Storage Format
Raw Data
Storage Handle
2259823
Default Alt Text
D2735.id6949.diff (6 KB)
Attached To
Mode
D2735: Generate Wikidate health report for FANTOIR code
Attached
Detach File
Event Timeline
Log In to Comment