Page MenuHomeDevCentral

D2737.id6941.diff
No OneTemporary

D2737.id6941.diff

diff --git a/README.md b/README.md
new file mode 100644
--- /dev/null
+++ b/README.md
@@ -0,0 +1,20 @@
+# Helper development tools
+
+This repository contains tools to help development, like code generators.
+
+## Datasources
+
+### fantoir-datasource
+
+* **p31-winner** is a small Rust utility to regenerate P31_WINNERS vector,
+ used by the determine_p31_winner method to pick the most relevant Wikidata
+ "instance of" (P31) claim for path qualification.
+
+ See also: [p31-winner README](datasources/fantoir/p31-winner/README.md).
+
+## Sites
+
+Two generators for www.nasqueron.org content from sites.json:
+
+ * one for the mousetrap shortcuts
+ * one for the question mark popup content
diff --git a/datasources/fantoir/p31-winner/.gitignore b/datasources/fantoir/p31-winner/.gitignore
new file mode 100644
--- /dev/null
+++ b/datasources/fantoir/p31-winner/.gitignore
@@ -0,0 +1,7 @@
+# Rust
+/target
+Cargo.lock
+
+# Test data
+query.csv
+wikidata-import.log
diff --git a/datasources/fantoir/p31-winner/Cargo.toml b/datasources/fantoir/p31-winner/Cargo.toml
new file mode 100644
--- /dev/null
+++ b/datasources/fantoir/p31-winner/Cargo.toml
@@ -0,0 +1,9 @@
+[package]
+name = "p31-winner"
+version = "0.1.0"
+edition = "2021"
+
+# See more keys and their definitions at https://doc.rust-lang.org/cargo/reference/manifest.html
+
+[dependencies]
+enontekio = "0.2.0"
diff --git a/datasources/fantoir/p31-winner/README.md b/datasources/fantoir/p31-winner/README.md
new file mode 100644
--- /dev/null
+++ b/datasources/fantoir/p31-winner/README.md
@@ -0,0 +1,30 @@
+## P31_WINNERS vector generator
+
+This tool allows to generate code for the `fantoir-datasource` repository.
+
+When running `fantoir-datasource wikidata`, Wikidata entities are imported
+as pairs of triplets for FANTOIR code and "instance of" (P31) properties.
+
+If an entity has several claims for the P31 property, an arbitrage is needed
+to pick the more relevant one, mainly to describe a pseudo-voie kind.
+
+When a P31 property matches several values, and all are unknown,
+the following message is emitted to stderr:
+
+`Can't determine P31 winner amongst ["Q174782", "Q62685721"], Q174782 is picked.`
+
+An update to the P31_WINNERS vector is then needed. This code will show you blocks
+of values for each entity claim.
+
+For example, for the message above:
+
+```
+ "Q174782", // place
+ "Q62685721", // rue piétonne
+```
+
+You can then order values, the most relevant at the top,
+the less relevant at bottom, and discard the not relevant ones.
+
+That allows to update P31_WINNERS vector in `src/commands/wikidata/qualificsation.rs`.
+Generate
diff --git a/datasources/fantoir/p31-winner/src/main.rs b/datasources/fantoir/p31-winner/src/main.rs
new file mode 100644
--- /dev/null
+++ b/datasources/fantoir/p31-winner/src/main.rs
@@ -0,0 +1,102 @@
+use std::collections::{HashMap, HashSet};
+use std::path::Path;
+use std::process::exit;
+
+use enontekio::parser;
+
+fn main() {
+ if !Path::new("query.csvv").is_file() {
+ println!("Run fantoir-datasource wikidata and save the stderr output to wikidata-import.log file.");
+ exit(1);
+ }
+
+ let choices = parse_import_log("wikidata-import.log");
+
+ if !Path::new("query.csv").is_file() {
+ println!("Let's fetch the labels of the items we've to import.");
+ println!("Run this SPARQL query and save result to query.csv:");
+ println!();
+ print_choices_query(&choices);
+ exit(2);
+ }
+
+ print_choices_conflict_resolution(&choices);
+}
+
+/////////// Tasks
+
+fn print_choices_query (choices: &HashSet<Vec<String>>) {
+ let terms: HashSet<_> = choices
+ .iter()
+ .flat_map(|choice| choice)
+ .map(|item| format!("wd:{}", item))
+ .collect();
+
+ println!("
+SELECT DISTINCT ?item ?itemLabel
+WHERE
+{{
+ VALUES ?item {{");
+
+ for term in terms {
+ println!(" {}", term);
+ }
+
+ println!(r#" }}
+ SERVICE wikibase:label {{ bd:serviceParam wikibase:language "fr" . }}
+}}
+ "#);
+}
+
+fn print_choices_conflict_resolution (choices: &HashSet<Vec<String>>) {
+ let terms = parse_terms_file("query.csv");
+
+ for choice in choices {
+ for item in choice {
+ let spaces = " ".repeat(12 - item.len());
+ println!(" \"{}\",{}// {}", &item, spaces, terms[item])
+ }
+ println!();
+ }
+}
+
+/////// Parsers
+
+fn parse_import_log(filename: &str) -> HashSet<Vec<String>> {
+ let choices: Vec<_> = parser::parse_file_by_line(filename,
+ |line| parse_line(&line.unwrap())
+ ).expect("Can't read Wikidata import log");
+
+ choices
+ .into_iter()
+ .filter(|choice| choice.is_some())
+ .map(|choice| choice.unwrap())
+ .collect()
+}
+
+fn parse_line (expression: &str) -> Option<Vec<String>> {
+ if !expression.starts_with("Can't determine P31 winner amongst ") {
+ return None;
+ }
+
+ let pos = expression.find("]")
+ .expect("Expression should contain ] to close the list.");
+
+ let mut choices: Vec<_> = expression[36..pos]
+ .split(", ")
+ .map(|item| item.replace("\"", ""))
+ .collect();
+
+ choices.sort();
+
+ Some(choices)
+}
+
+fn parse_terms_file(filename: &str) -> HashMap<String, String> {
+ parser::parse_file_by_line(filename, |line| {
+ let line = line.unwrap();
+ let parts: Vec<_> = line.split(",").collect();
+
+ (parts[0].replace("http://www.wikidata.org/entity/", ""), parts[1].to_string())
+ }).unwrap()
+}

File Metadata

Mime Type
text/plain
Expires
Tue, Oct 1, 12:23 (22 h, 5 m)
Storage Engine
blob
Storage Format
Raw Data
Storage Handle
2168373
Default Alt Text
D2737.id6941.diff (5 KB)

Event Timeline