D2754.id8426.diff
No OneTemporary
Actions

Size

46 KB

Referenced Files

None

Subscribers

None

D2754.id8426.diff
View Options

	diff --git a/.gitignore b/.gitignore
	--- a/.gitignore
	+++ b/.gitignore
	@@ -1,4 +1,10 @@
	+# Rust
	/target
	+Cargo.lock
	+
	+# Python
	+__pycache__
	+
	+# Data
	/FANTOIR*
	!/fantoir-datasource
	-Cargo.lock
	diff --git a/Cargo.toml b/Cargo.toml
	--- a/Cargo.toml
	+++ b/Cargo.toml
	@@ -2,5 +2,7 @@

	members = [
	"fantoir-datasource",
	+ "language-subtag-registry-datasource",
	+ "rfc-datasource",
	"opendatasoft-explore-api",
	]
	diff --git a/README.md b/README.md
	--- a/README.md
	+++ b/README.md
	@@ -14,6 +14,23 @@

	More information: [fantoir-datasource README](fantoir-datasource/README.md)

	+### IANA language subtag registry (language-subtag-registry-datasource)
	+
	+Import IANA language subtag registry datasource from RFC 5646 and convert it to
	+the specified text-based format.
	+
	+Can be used to refresh language Darkbot database for IRC bots.
	+
	+More information: [language-subtag-registry-datasource README](language-subtag-registry-datasource/README.md)
	+
	+### RFC import fool (rfc-datasource)
	+
	+Import RFC index and convert it to the specified text-based format.
	+
	+Can be used to refresh RFC Darkbot database for IRC bots.
	+
	+More information: [rfc-datasource README](rfc-datasource/README.md)
	+
	### Opendatasoft Explore API client (opendatasoft-explore-api)

	The opendatasoft-explore-api crate allows to query the Opendatasoft Explore API from Rust code.
	diff --git a/_pipelines/README.md b/_pipelines/README.md
	new file mode 100644
	--- /dev/null
	+++ b/_pipelines/README.md
	@@ -0,0 +1,14 @@
	+## Nasqueron Datasources :: pipelines
	+
	+The dags/ directory contains DAGs pipelines as code for Apache Airflow.
	+
	+Those pipelines can be used:
	+
	+ - at Nasqueron, on our Airflow instance
	+ - elsewhere, as a sample documentation how to use our datasources
	+ components and how to glue the components together
	+
	+The nasqueron_datasources module is published to the dags folder too,
	+so is available from the different DAGs. It contains helper methods.
	+
	+Unit tests are available in tests/ folder.
	diff --git a/_pipelines/dags/fantoir_fetch.py b/_pipelines/dags/fantoir_fetch.py
	new file mode 100644
	--- /dev/null
	+++ b/_pipelines/dags/fantoir_fetch.py
	@@ -0,0 +1,111 @@
	+# -------------------------------------------------------------
	+# Nasqueron Datasources :: pipelines
	+# - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - -
	+# Project: Nasqueron
	+# Pipeline: Datasources > FANTOIR > fetch
	+# License: BSD-2-Clause
	+# -------------------------------------------------------------
	+
	+from datetime import datetime
	+import json
	+import requests
	+
	+from airflow.decorators import dag, task
	+from airflow.models import Variable, TaskInstance
	+from airflow.operators.python import ShortCircuitOperator
	+from airflow.operators.trigger_dagrun import TriggerDagRunOperator
	+
	+from nasqueron_datasources.pipelines.commands import run, parse_environment
	+from nasqueron_datasources.pipelines.errors import CommandException, WorkflowException
	+
	+NOTIFICATION_URL = "https://notifications.nasqueron.org/gate/Notification/Nasqueron"
	+
	+
	+@dag(
	+ dag_id="fantoir_fetch",
	+ schedule=None,
	+ start_date=datetime(2023, 1, 1),
	+ tags=["datasources", "fantoir", "download", "external"],
	+)
	+def fantoir_fetch_dag():
	+ """
	+ ### Pipeline for FANTOIR datasource - fetch
	+
	+ This pipeline checks if a new version of FANTOIR file is published.
	+
	+ If so it downloads it, extracts it and calls import DAG.
	+
	+ Reference: https://agora.nasqueron.org/Fantoir-datasource
	+ """
	+
	+ @task
	+ def fetch() -> dict:
	+ """Fetches FANTOIR from data.economie.gouv.fr, if a new version is available."""
	+ exit_code, stdout, stderr = run(
	+ ["fantoir-datasource", "fetch"],
	+ cwd=Variable.get("fantoir_directory"),
	+ env={
	+ "DATABASE_URL": "", # a value is unneeded for fetch operation
	+ },
	+ )
	+
	+ if exit_code == 12:
	+ # No new version available
	+ return {
	+ "new_version": False,
	+ "environment": {},
	+ }
	+
	+ if exit_code != 0:
	+ # Failure
	+ raise CommandException("Can't fetch FANTOIR", exit_code, stderr)
	+
	+ return {
	+ "new_version": True,
	+ "environment": parse_environment(stdout),
	+ }
	+
	+ def is_new_version_available(task_instance: TaskInstance) -> bool:
	+ return task_instance.xcom_pull(task_ids="fetch", key="new_version")
	+
	+ check_fetch = ShortCircuitOperator(
	+ task_id="check_fetch",
	+ python_callable=is_new_version_available,
	+ doc_md="""Determine if a new version is available from previous task.""",
	+ )
	+
	+ call_import_dag = TriggerDagRunOperator(
	+ task_id="call_import_dag",
	+ trigger_dag_id="fantoir_import",
	+ notes="Triggered by fantoir_fetch DAG, as a new version is available.",
	+ conf={
	+ "fantoir_environment": "{{ task_instance.xcom_pull(task_ids='fetch', key='environment') }}"
	+ },
	+ doc_md="""Launch the workflow to import FANTOIR new version""",
	+ )
	+
	+ @task
	+ def notify(task_instance: TaskInstance):
	+ """Sends a notification a new version is available."""
	+
	+ fantoir_file = task_instance.xcom_pull(task_ids="fetch", key="environment").get(
	+ "FANTOIR_FILE", "(unknown)"
	+ )
	+ dag_run_id = task_instance.xcom_pull(
	+ task_id="call_import_dag", key="trigger_run_id"
	+ )
	+ notification = {
	+ "service": "Airflow",
	+ "project": "Nasqueron",
	+ "group": "Datasources",
	+ "type": "fantoir-fetch",
	+ "text": f"A new version of FANTOIR has been fetched: {fantoir_file}. Triggering import workflow: {dag_run_id}.",
	+ }
	+
	+ response = requests.post(NOTIFICATION_URL, data=json.dumps(notification))
	+ if response.status_code != 200:
	+ raise WorkflowException(
	+ "Can't send notification: HTTP error " + str(response.status_code)
	+ )
	+
	+ fetch() >> check_fetch >> call_import_dag >> notify()
	diff --git a/_pipelines/dags/fantoir_import.py b/_pipelines/dags/fantoir_import.py
	new file mode 100644
	--- /dev/null
	+++ b/_pipelines/dags/fantoir_import.py
	@@ -0,0 +1,104 @@
	+# -------------------------------------------------------------
	+# Nasqueron Datasources :: pipelines
	+# - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - -
	+# Project: Nasqueron
	+# Pipeline: Datasources > FANTOIR > import
	+# License: BSD-2-Clause
	+# -------------------------------------------------------------
	+
	+from datetime import datetime
	+
	+from airflow.decorators import dag, task
	+from airflow.models import Connection, Variable
	+
	+from nasqueron_datasources.pipelines.commands import run
	+
	+
	+@dag(
	+ dag_id="fantoir_import",
	+ schedule=None,
	+ start_date=datetime(2023, 1, 1),
	+ tags=["datasources", "fantoir", "postgresql", "external"],
	+)
	+def fantoir_import_dag():
	+ """
	+ ### Pipeline for FANTOIR datasource - import
	+
	+ This pipeline imports FANTOIR into PostgreSQL, enriches it
	+ and promotes the new table as the one to use.
	+
	+ Enrichment is done by fetching information from:
	+ - Wikidata
	+
	+ Reference: https://agora.nasqueron.org/Fantoir-datasource
	+ """
	+
	+ fantoir_directory = Variable.get("fantoir_directory")
	+ database_url = Connection.get_connection_from_secrets("postgresql_fantoir").get_uri()
	+
	+ @task
	+ def import_to_pgsql():
	+ run(
	+ [
	+ "fantoir-datasource",
	+ "import",
	+ "{{ params['FANTOIR_FILE'] }}",
	+ "{{ params['FANTOIR_TABLE'] }}",
	+ ],
	+ cwd=fantoir_directory,
	+ env={
	+ "DATABASE_URL": database_url,
	+ },
	+ )
	+
	+ @task
	+ def enrich_from_wikidata():
	+ run(
	+ ["fantoir-datasource", "wikidata"],
	+ cwd=fantoir_directory,
	+ env={
	+ "DATABASE_URL": database_url,
	+ },
	+ )
	+
	+ @task
	+ def promote():
	+ run(
	+ ["fantoir-datasource", "promote"],
	+ cwd=fantoir_directory,
	+ env={
	+ "DATABASE_URL": database_url,
	+ },
	+ )
	+
	+ @task
	+ def publish_to_configuration():
	+ """
	+ NOT IMPLEMENTED.
	+
	+ Publish new table name to use to etcd/consul
	+ """
	+ pass
	+
	+ @task
	+ def notify():
	+ """
	+ NOT IMPLEMENTED.
	+
	+ Send notification payload to Notifications Center
	+ """
	+ pass
	+
	+ (
	+ import_to_pgsql()
	+ >> [
	+ # Enrichment sources can run in //.
	+ enrich_from_wikidata(),
	+ ]
	+ >> promote()
	+ >> [
	+ # Post-action tasks can run in // too.
	+ publish_to_configuration(),
	+ notify(),
	+ ]
	+ )
	diff --git a/_pipelines/dags/nasqueron_datasources/__init__.py b/_pipelines/dags/nasqueron_datasources/__init__.py
	new file mode 100644
	diff --git a/_pipelines/dags/nasqueron_datasources/pipelines/__init__.py b/_pipelines/dags/nasqueron_datasources/pipelines/__init__.py
	new file mode 100644
	diff --git a/_pipelines/dags/nasqueron_datasources/pipelines/commands.py b/_pipelines/dags/nasqueron_datasources/pipelines/commands.py
	new file mode 100644
	--- /dev/null
	+++ b/_pipelines/dags/nasqueron_datasources/pipelines/commands.py
	@@ -0,0 +1,57 @@
	+# -------------------------------------------------------------
	+# Nasqueron Datasources :: pipelines :: command utilities
	+# - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - -
	+# Project: Nasqueron
	+# Description: Helpers to handle commands in Python pipelines
	+# License: BSD-2-Clause
	+# -------------------------------------------------------------
	+
	+
	+import subprocess
	+
	+
	+# -------------------------------------------------------------
	+# Subprocess wrappers
	+# - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - -
	+
	+
	+def run(command, cwd=None, env=None):
	+ """
	+ Runs the specified command and return exit_code, stdout, stderr.
	+
	+ :type env: dict\|None
	+ :param env: The environment variables to pass to the software
	+ :type command: string\|list
	+ :param command: The command to run, as a string to pass to shell (to avoid) or a list [command, arg1, arg2, ...]
	+ :param cwd: The working directory for the command to run
	+
	+ :return: (exit_code, stdout, stderr)
	+ """
	+ if env is None:
	+ env = {}
	+ shell = type(command) is str
	+ process = subprocess.run(
	+ command, shell=shell, cwd=cwd, env=env, capture_output=True
	+ )
	+
	+ return process.returncode, process.stdout, process.stderr
	+
	+
	+# -------------------------------------------------------------
	+# Environment
	+# - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - -
	+
	+
	+def parse_environment(environment_lines):
	+ """
	+ Parses environment as a dictionary.
	+
	+ This method is intended to be used with `env`, with .env files,
	+ or with any command offering a similar format:
	+
	+ VARIABLE=value
	+ """
	+ return {
	+ parts[0]: parts[1]
	+ for parts in [line.strip().split("=") for line in environment_lines]
	+ }
	diff --git a/_pipelines/dags/nasqueron_datasources/pipelines/errors.py b/_pipelines/dags/nasqueron_datasources/pipelines/errors.py
	new file mode 100644
	--- /dev/null
	+++ b/_pipelines/dags/nasqueron_datasources/pipelines/errors.py
	@@ -0,0 +1,19 @@
	+# -------------------------------------------------------------
	+# Nasqueron Datasources :: pipelines : errors
	+# - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - -
	+# Project: Nasqueron
	+# License: BSD-2-Clause
	+# -------------------------------------------------------------
	+
	+
	+class WorkflowException(Exception):
	+ def __init__(self, message):
	+ super(WorkflowException, self).__init__(message)
	+
	+
	+class CommandException(WorkflowException):
	+ def __init__(self, message, exit_code, stderr):
	+ consolidated_message = "{} (exit code {}): {}".format(
	+ message, exit_code, stderr
	+ )
	+ super(CommandException, self).__init__(consolidated_message)
	diff --git a/_pipelines/requirements.txt b/_pipelines/requirements.txt
	new file mode 100644
	--- /dev/null
	+++ b/_pipelines/requirements.txt
	@@ -0,0 +1,2 @@
	+apache-airflow~=2.8.0
	+requests~=2.28.2
	diff --git a/_pipelines/tests/files/env b/_pipelines/tests/files/env
	new file mode 100644
	--- /dev/null
	+++ b/_pipelines/tests/files/env
	@@ -0,0 +1,3 @@
	+FOO=This is a sentence.
	+QUUX=666
	+BAR=
	diff --git a/_pipelines/tests/test_commands.py b/_pipelines/tests/test_commands.py
	new file mode 100644
	--- /dev/null
	+++ b/_pipelines/tests/test_commands.py
	@@ -0,0 +1,27 @@
	+#!/usr/bin/env python3
	+
	+# -------------------------------------------------------------
	+# Nasqueron Datasources :: pipelines :: Tests
	+# - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - -
	+# Project: Nasqueron
	+# License: BSD-2-Clause
	+# -------------------------------------------------------------
	+
	+from nasqueron_datasources.pipelines import commands
	+import unittest
	+
	+
	+class TestCommands(unittest.TestCase):
	+ def test_parse_environment(self):
	+ expected = {
	+ "FOO": "This is a sentence.",
	+ "QUUX": "666", # everything is parsed as a string
	+ "BAR": "", # an empty string is used instead of None for empty values
	+ }
	+
	+ with open("files/env") as fd:
	+ self.assertDictEqual(expected, commands.parse_environment(fd))
	+
	+
	+if __name__ == "__main__":
	+ unittest.main()
	diff --git a/fantoir-datasource/Cargo.toml b/fantoir-datasource/Cargo.toml
	--- a/fantoir-datasource/Cargo.toml
	+++ b/fantoir-datasource/Cargo.toml
	@@ -13,7 +13,7 @@
	lazy_static = "~1.4.0"
	opendatasoft-explore-api = { version = "0.1.0", path = "../opendatasoft-explore-api" }
	oxrdf = "~0.1.1"
	-regex = "~1.7.1"
	+regex = "~1.8.1"
	sparesults = "~0.1.3"

	[dependencies.async-scoped]
	@@ -21,11 +21,11 @@
	features = ["use-tokio"]

	[dependencies.clap]
	-version = "~4.0.32"
	+version = "~4.3.0"
	features = ["derive"]

	[dependencies.reqwest]
	-version = "~0.11.13"
	+version = "~0.11.18"
	features = ["gzip", "deflate"]

	[dependencies.sqlx]
	@@ -33,5 +33,5 @@
	features = ["runtime-tokio-native-tls", "postgres", "chrono"]

	[dependencies.tokio]
	-version = "~1.23.0"
	+version = "~1.28.1"
	features = ["full"]
	diff --git a/language-subtag-registry-datasource/Cargo.toml b/language-subtag-registry-datasource/Cargo.toml
	new file mode 100644
	--- /dev/null
	+++ b/language-subtag-registry-datasource/Cargo.toml
	@@ -0,0 +1,25 @@
	+[package]
	+name = "language-subtag-registry-datasource"
	+version = "0.1.0"
	+edition = "2021"
	+description = "Downloads and transforms IANA language subtag registry"
	+authors = [
	+ "Sébastien Santoro <dereckson@espace-win.org>"
	+]
	+license = "BSD-2-Clause"
	+
	+[dependencies]
	+lazy_static = "1.4.0"
	+regex = "1.8.1"
	+
	+[dependencies.clap]
	+version = "4.3.0"
	+features = ["derive"]
	+
	+[dependencies.reqwest]
	+version = "~0.11.18"
	+features = ["gzip", "deflate"]
	+
	+[dependencies.tokio]
	+version = "1.28.1"
	+features = ["full"]
	diff --git a/language-subtag-registry-datasource/README.md b/language-subtag-registry-datasource/README.md
	new file mode 100644
	--- /dev/null
	+++ b/language-subtag-registry-datasource/README.md
	@@ -0,0 +1,65 @@
	+The `language-subtag-registry-datasource` utility allows to download
	+IANA language subtag registry datasource defined in the RFC 5646,
	+parse it, and transform the output.
	+
	+This registry shares language codes with the different ISO-639 lists,
	+but is more inclusive and descriptive.
	+
	+It has been designed to output the index in an arbitrary format,
	+so we can export a Darkbot database for Odderon, one of our IRC bot.
	+
	+## Usage
	+
	+```
	+language-subtag-registry-datasource
	+ --format <format string>
	+ [--languages-only]
	+ [--aggregation-separator <separator string>]
	+ [--source /path/to/registry.txt]`
	+```
	+
	+The format string can be arbitrary text or variables:
	+
	+\| Variable \| Description \|
	+\|-----------------\|-------------------------------------------\|
	+\| %%id%% \| The Tag or Subtag field of the entry \|
	+\| %%<key>%% \| A field in the registry entry \|
	+\| %%fullstatus%% \| A string built with description, comments \|
	+
	+If an entry doesn't have the required field, it left blank.
	+
	+Examples for the variables:
	+ - `%%Description%%` will output `Inupiaq` for the `ik` subtag
	+ - `%%Description%%` will output `Sichuan Yi / Nuosu` for the `ii` subtag
	+ - `%%Comments%%` will output an empty string for both `ik` and `ii` subtags
	+ - `%%fulldescription%%` will output "Serbo-Croatian - sr, hr, bs are preferred for most modern uses" for `sh`
	+
	+If a language has several values, they are coalesced and a specific string
	+is used as separator. Default separator is " / ". It can be overridden with
	+`--aggregation-separator`.
	+
	+The database contains entries of other types than languages, like variants, regions or redundant.
	+To only parse languages, use `-l` or `--languages-only` flag.
	+
	+The utility uses as source, by order of priority:
	+ - the path specified to the `--source` argument
	+ - any `registry.txt` file available in the current directory
	+ - https://www.iana.org/assignments/language-subtag-registry/language-subtag-registry
	+
	+## Recipes
	+
	+### Darkbot database
	+
	+ language-subtag-registry-datasource -l --format "lang+%%id%% %%fulldescription%%"
	+
	+### CSV export
	+
	+Identify the fields and the order you wish to use.
	+
	+For example, to create a CSV with the following header:
	+
	+ Type,Subtag,Tag,Added,Suppress-Script,Preferred-Value,Comments,Scope,Macrolanguage,Deprecated,Description
	+
	+Use:
	+
	+ language-subtag-registry-datasource --format '"%%Type%%","%%Subtag%%","%%Tag%%","%%Added%%","%%Suppress-Script%%","%%Preferred-Value%%","%%Comments%%","%%Scope%%","%%Macrolanguage%%","%%Deprecated%%","%%Description%%"'
	diff --git a/language-subtag-registry-datasource/src/language_parser.rs b/language-subtag-registry-datasource/src/language_parser.rs
	new file mode 100644
	--- /dev/null
	+++ b/language-subtag-registry-datasource/src/language_parser.rs
	@@ -0,0 +1,179 @@
	+use std::collections::HashMap;
	+
	+use lazy_static::lazy_static;
	+use regex::Regex;
	+
	+/* -------------------------------------------------------------
	+ Regexp definitions, used in builder
	+ - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - */
	+
	+lazy_static! {
	+ static ref RE_KEY: Regex = Regex::new(
	+ // %%key%%
	+ r"%%(.*?)%%"
	+ ).unwrap();
	+}
	+
	+/* -------------------------------------------------------------
	+ Language
	+
	+ Each language entry from the registry is a key/value map.
	+ - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - */
	+
	+#[derive(Debug)]
	+pub struct Language {
	+ pub fields: HashMap<String, Vec<String>>,
	+}
	+
	+impl Language {
	+
	+ ///
	+ /// Parser
	+ ///
	+
	+ pub fn parse_document(document: &str, restrict_to_language: bool) -> Vec<Self> {
	+ document
	+ .split("\n%%\n")
	+ .skip(1) // Metadata File-Date: <date>
	+ .filter(\|&entry\| !restrict_to_language \|\| entry.contains("Type: language"))
	+ .map(\|entry\| Self::parse_entry(entry))
	+ .collect()
	+ }
	+
	+ pub fn parse_entry(entry: &str) -> Self {
	+ let mut fields = HashMap::new();
	+
	+ let mut key = String::new();
	+ let mut value= String::new();
	+ let mut has_value = false;
	+
	+ // Pitfall: some values can extend to several lines
	+ for line in entry.split("\n") {
	+ if line.contains(": ") {
	+ // Save previous value
	+ if has_value {
	+ fields
	+ .entry(key)
	+ .or_insert(Vec::new())
	+ .push(value);
	+ }
	+
	+ // <key>: <value> line
	+ let mut tokens = line.splitn(2, ": ");
	+ key = String::from(tokens.next().unwrap());
	+ value = String::from(tokens.next().unwrap());
	+ has_value = true;
	+ } else {
	+ // Multiline value. Append the line to previous value.
	+ value = format!("{} {}", &value.trim(), line.trim())
	+ }
	+ }
	+ if has_value {
	+ fields
	+ .entry(key)
	+ .or_insert(Vec::new())
	+ .push(value);
	+ }
	+
	+ Self {
	+ fields,
	+ }
	+ }
	+
	+ ///
	+ /// Builder
	+ ///
	+
	+ pub fn get_field(&self, tag: &str, separator: &str) -> Option<String> {
	+ self.fields
	+ .get(tag)
	+ .map(\|values\| values.join(separator))
	+ }
	+
	+ pub fn get_id(&self) -> Option<String> {
	+ self.get_field("Subtag", "-")
	+ .or_else(\|\| self.get_field("Tag", "-"))
	+ }
	+
	+ pub fn build_full_description(&self, separator: &str) -> String {
	+ let mut full_description = self.get_field("Description", separator)
	+ .unwrap_or("<no description in IANA registry>".to_string());
	+
	+ if self.fields.contains_key("Deprecated") {
	+ full_description.push_str(" [deprecated]");
	+ }
	+
	+ if let Some(should_use) = self.get_field("Preferred-Value", separator) {
	+ full_description.push_str("; preferred value: ");
	+ full_description.push_str(&should_use);
	+
	+ }
	+
	+ if let Some(comments) = self.get_field("Comments", separator) {
	+ full_description.push_str("; ");
	+ full_description.push_str(&comments);
	+ }
	+
	+
	+
	+ full_description
	+ }
	+
	+ pub fn format(&self, format: &str, separator: &str) -> String {
	+ let mut formatted = String::from(format);
	+
	+ if formatted.contains("%%id%%") {
	+ let id = self.get_id().unwrap_or(String::new());
	+ formatted = formatted.replace("%%id%%", &id);
	+ }
	+
	+ if formatted.contains("%%fulldescription%%") {
	+ let description = self.build_full_description(separator);
	+ formatted = formatted.replace("%%fulldescription%%", &description);
	+ }
	+
	+ for (key , values) in &self.fields {
	+ let value = values.join(separator);
	+
	+ formatted = formatted.replace(
	+ &format!("%%{}%%", &key),
	+ &value
	+ );
	+ }
	+
	+ RE_KEY
	+ .replace_all(&formatted, "")
	+ .to_string()
	+ }
	+
	+}
	+
	+#[cfg(test)]
	+mod tests {
	+ use super::*;
	+
	+ #[test]
	+ pub fn test_format() {
	+ let liquids = vec!["Water".to_string(), "Air".to_string()];
	+
	+ let mut fields = HashMap::new();
	+ fields.insert("Liquid".to_string(), liquids);
	+ fields.insert("Model".to_string(), vec!["Newtonian".to_string()]);
	+
	+ let language = Language { fields };
	+
	+ assert_eq!(
	+ "Water or Air use Newtonian physic.",
	+ &language.format("%%Liquid%% use %%Model%% physic.", " or ")
	+ );
	+
	+ assert_eq!(
	+ "Water or Air use Newtonian physic.",
	+ &language.format("%%Liquid%% use %%Prefix%%%%Model%% physic.", " or ")
	+ );
	+
	+ assert_eq!(
	+ "", &language.format("", "")
	+ );
	+ }
	+}
	diff --git a/language-subtag-registry-datasource/src/main.rs b/language-subtag-registry-datasource/src/main.rs
	new file mode 100644
	--- /dev/null
	+++ b/language-subtag-registry-datasource/src/main.rs
	@@ -0,0 +1,40 @@
	+use clap::Parser;
	+
	+use crate::registry::get_registry;
	+use crate::language_parser::Language;
	+
	+mod registry;
	+mod language_parser;
	+
	+#[derive(Debug, Parser)]
	+#[command(name = "language-subtag-registry-datasource")]
	+#[clap(author="Nasqueron project", version, about="Download and print language subtag registry", long_about=None)]
	+pub struct Args {
	+ /// The format string to use
	+ #[arg(long, short = 'f')]
	+ format: String,
	+
	+ /// The aggregation separator
	+ #[arg(long, short = 'a', default_value = " / ")]
	+ aggregation_separator: String,
	+
	+ /// The path to the registry source
	+ #[arg(long, short = 's')]
	+ source: Option<String>,
	+
	+ /// Restricts parsing to language type
	+ #[arg(long, short = 'l', default_value_t = false)]
	+ languages_only: bool,
	+}
	+
	+#[tokio::main]
	+async fn main() {
	+ let args = Args::parse(); // Will exit if argument is missing or --help/--version provided.
	+
	+ let document = get_registry(args.source).await
	+ .expect("Can't read or fetch registry");
	+
	+ for language in Language::parse_document(&document, args.languages_only) {
	+ println!("{}", language.format(&args.format, &args.aggregation_separator));
	+ }
	+}
	diff --git a/language-subtag-registry-datasource/src/registry.rs b/language-subtag-registry-datasource/src/registry.rs
	new file mode 100644
	--- /dev/null
	+++ b/language-subtag-registry-datasource/src/registry.rs
	@@ -0,0 +1,60 @@
	+use std::error::Error;
	+use std::fs;
	+use std::path::Path;
	+
	+use reqwest::ClientBuilder;
	+
	+static REGISTRY_URL: &str = "https://www.iana.org/assignments/language-subtag-registry/language-subtag-registry";
	+
	+/* -------------------------------------------------------------
	+ User agent
	+
	+ The USER_AGENT variable is computed at build time.
	+ - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - */
	+
	+lazy_static::lazy_static! {
	+ pub static ref USER_AGENT: String = format!(
	+ "{}/{} (https://databases.nasqueron.org/)",
	+ env!("CARGO_PKG_NAME"), env!("CARGO_PKG_VERSION")
	+ );
	+}
	+
	+pub fn get_user_agent () -> &'static str {
	+ &USER_AGENT
	+}
	+
	+/* -------------------------------------------------------------
	+ Read or fetch registry
	+ - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - */
	+
	+pub async fn get_registry(source: Option<String>) -> Result<String, Box<dyn Error>> {
	+ match source {
	+ // Case 1 - A source file has been explicitly set
	+ Some(file) => Ok(fs::read_to_string(&file)?.parse()?),
	+
	+ None => {
	+ if Path::new("registry.txt").exists() {
	+ // Case 2 - The file registry.txt can be found locally
	+ Ok(fs::read_to_string("registry.txt")?.parse()?)
	+ } else {
	+ // Case 3 - Fetch the index remotely
	+ Ok(fetch_registry().await?)
	+ }
	+ }
	+ }
	+}
	+
	+async fn fetch_registry() -> Result<String, Box<dyn Error>> {
	+ let client = ClientBuilder::new()
	+ .user_agent(get_user_agent())
	+ .gzip(true)
	+ .deflate(true)
	+ .build()
	+ .expect("Can't build HTTP client");
	+
	+ let body = client.get(REGISTRY_URL)
	+ .send().await?
	+ .text().await?;
	+
	+ Ok(body)
	+}
	diff --git a/opendatasoft-explore-api/Cargo.toml b/opendatasoft-explore-api/Cargo.toml
	--- a/opendatasoft-explore-api/Cargo.toml
	+++ b/opendatasoft-explore-api/Cargo.toml
	@@ -16,14 +16,17 @@
	# See more keys and their definitions at https://doc.rust-lang.org/cargo/reference/manifest.html

	[dependencies]
	-bytes = "~1.3.0" # Keep in sync with reqwest
	+bytes = "~1.4.0" # Keep in sync with reqwest
	chrono = { version = "~0.4", features = ["serde"] }
	-reqwest = {version = "~0.11.13" }
	+reqwest = {version = "~0.11.18" }
	serde = "~1.0.152"
	serde_derive = "~1.0.152"
	serde_json = "~1.0.91"

	[dev-dependencies]
	-mockito = "~0.31.1"
	+mockito = "~1.1.0"
	lazy_static = "~1.4.0"
	-tokio = { version = "~1.23.0", features = ["macros", "rt"] }
	+
	+[dependencies.tokio]
	+version = "~1.28.1"
	+features = ["macros", "rt"]
	diff --git a/opendatasoft-explore-api/tests/requests_test.rs b/opendatasoft-explore-api/tests/requests_test.rs
	--- a/opendatasoft-explore-api/tests/requests_test.rs
	+++ b/opendatasoft-explore-api/tests/requests_test.rs
	@@ -3,7 +3,7 @@
	use std::collections::HashMap;

	use lazy_static::lazy_static;
	-use mockito::{mock, Mock};
	+use mockito::{Server, ServerGuard};
	use serde_json::json;

	use opendatasoft_explore_api::requests::ExploreApiEndPoint;
	@@ -16,49 +16,78 @@
	static TEST_RECORD_ID: &'static str = "eb04cba18e872814448a7fda829f3f1918cfae0b";

	lazy_static! {
	- static ref MOCK_URL: String = mockito::server_url();
	-
	static ref MOCK_FILES: HashMap<&'static str, &'static str> = {
	let mut m = HashMap::new();
	- m.insert("/catalog/datasets", include_str!("requests/catalog_datasets.json"));
	- m.insert("/catalog/facets", include_str!("requests/catalog_facets.json"));
	- m.insert("/catalog/exports/rdf", include_str!("requests/catalog_exports.rdf"));
	- m.insert("/catalog/datasets/controle_techn/records", include_str!("requests/catalog_datasets_records.json"));
	- m.insert("/catalog/datasets/fichier-fantoir-des-voies-et-lieux-dits", include_str!("requests/catalog_dataset_fantoir.json"));
	- m.insert("/catalog/datasets/fichier-fantoir-des-voies-et-lieux-dits/attachments", include_str!("requests/catalog_dataset_fantoir_attachments.json"));
	- m.insert("/catalog/datasets/fichier-fantoir-des-voies-et-lieux-dits/facets", include_str!("requests/catalog_dataset_fantoir_facets.json"));
	- m.insert("/catalog/datasets/controle_techn/records/eb04cba18e872814448a7fda829f3f1918cfae0b", include_str!("requests/catalog_dataset_record.json"));
	+ m.insert(
	+ "/catalog/datasets",
	+ include_str!("requests/catalog_datasets.json"),
	+ );
	+ m.insert(
	+ "/catalog/facets",
	+ include_str!("requests/catalog_facets.json"),
	+ );
	+ m.insert(
	+ "/catalog/exports/rdf",
	+ include_str!("requests/catalog_exports.rdf"),
	+ );
	+ m.insert(
	+ "/catalog/datasets/controle_techn/records",
	+ include_str!("requests/catalog_datasets_records.json"),
	+ );
	+ m.insert(
	+ "/catalog/datasets/fichier-fantoir-des-voies-et-lieux-dits",
	+ include_str!("requests/catalog_dataset_fantoir.json"),
	+ );
	+ m.insert(
	+ "/catalog/datasets/fichier-fantoir-des-voies-et-lieux-dits/attachments",
	+ include_str!("requests/catalog_dataset_fantoir_attachments.json"),
	+ );
	+ m.insert(
	+ "/catalog/datasets/fichier-fantoir-des-voies-et-lieux-dits/facets",
	+ include_str!("requests/catalog_dataset_fantoir_facets.json"),
	+ );
	+ m.insert(
	+ "/catalog/datasets/controle_techn/records/eb04cba18e872814448a7fda829f3f1918cfae0b",
	+ include_str!("requests/catalog_dataset_record.json"),
	+ );
	m
	};
	}

	-pub fn prepare_mock (url: &str) -> Mock {
	- mock("GET", url)
	- .with_status(200)
	+pub async fn prepare_mock(url: &str) -> ServerGuard {
	+ let mut server = Server::new_async().await;
	+ server
	+ .mock("GET", url)
	.with_body(MOCK_FILES[url])
	- .create()
	+ .create_async()
	+ .await;
	+ server
	}

	#[tokio::test]
	-async fn test_get_datasets () {
	- let _mock = prepare_mock("/catalog/datasets");
	+async fn test_get_datasets() {
	+ let server = prepare_mock("/catalog/datasets").await;

	- let endpoint = ExploreApiEndPoint::new(&MOCK_URL);
	+ let endpoint = ExploreApiEndPoint::new(&server.url());
	let catalog = endpoint.get_datasets().await;

	assert_eq!(426, catalog.total_count);
	- assert_eq!(Link {
	- href: "https://data.economie.gouv.fr/api/v2/catalog/datasets/mef-catalogue-temporaire".to_string(),
	- rel: "self".to_string(),
	- }, catalog.datasets[0].links[0]);
	+ assert_eq!(
	+ Link {
	+ href: "https://data.economie.gouv.fr/api/v2/catalog/datasets/mef-catalogue-temporaire"
	+ .to_string(),
	+ rel: "self".to_string(),
	+ },
	+ catalog.datasets[0].links[0]
	+ );
	assert_eq!(3, catalog.datasets.len());
	}

	#[tokio::test]
	-async fn test_export_datasets_catalog () {
	- let _mock = prepare_mock("/catalog/exports/rdf");
	+async fn test_export_datasets_catalog() {
	+ let server = prepare_mock("/catalog/exports/rdf").await;

	- let mut response = ExploreApiEndPoint::new(&MOCK_URL)
	+ let mut response = ExploreApiEndPoint::new(&server.url())
	.export_datasets_catalog("rdf")
	.await;

	@@ -75,32 +104,32 @@
	}

	#[tokio::test]
	-async fn test_get_facets () {
	- let _mock = prepare_mock("/catalog/facets");
	+async fn test_get_facets() {
	+ let server = prepare_mock("/catalog/facets").await;

	- let endpoint = ExploreApiEndPoint::new(&MOCK_URL);
	+ let endpoint = ExploreApiEndPoint::new(&server.url());
	let facets = endpoint.get_facets().await;

	assert!(facets.links[0].href.starts_with(TEST_URL));

	let expected_facets_categories = vec![
	- "features".to_string(), "modified".to_string(),
	- "publisher".to_string(), "keyword".to_string(),
	+ "features".to_string(),
	+ "modified".to_string(),
	+ "publisher".to_string(),
	+ "keyword".to_string(),
	"theme".to_string(),
	];
	- let actual_facets_categories: Vec<_> = facets.facets
	- .into_iter()
	- .map(\|facet\| facet.name)
	- .collect();
	+ let actual_facets_categories: Vec<_> =
	+ facets.facets.into_iter().map(\|facet\| facet.name).collect();

	assert_eq!(expected_facets_categories, actual_facets_categories);
	}

	#[tokio::test]
	-async fn test_get_dataset_records () {
	- let _mock = prepare_mock("/catalog/datasets/controle_techn/records");
	+async fn test_get_dataset_records() {
	+ let server = prepare_mock("/catalog/datasets/controle_techn/records").await;

	- let results = ExploreApiEndPoint::new(&MOCK_URL)
	+ let results = ExploreApiEndPoint::new(&server.url())
	.get_dataset_records(TEST_DATASET_WITH_RECORDS_ID)
	.await;

	@@ -110,7 +139,10 @@
	ResultsRecord::Aggregation(_) => unreachable!(),
	ResultsRecord::Record(record) => record.clone(),
	};
	- assert_eq!("b839362b229db63bc9b344e980ae6273be7f80fd", record.record.id.as_str());
	+ assert_eq!(
	+ "b839362b229db63bc9b344e980ae6273be7f80fd",
	+ record.record.id.as_str()
	+ );
	assert_eq!(
	Some(&json!("Voiture Particulière")),
	record.record.fields.get("cat_vehicule_libelle")
	@@ -122,10 +154,10 @@
	}

	#[tokio::test]
	-async fn test_get_dataset_information () {
	- let _mock = prepare_mock("/catalog/datasets/fichier-fantoir-des-voies-et-lieux-dits");
	+async fn test_get_dataset_information() {
	+ let server = prepare_mock("/catalog/datasets/fichier-fantoir-des-voies-et-lieux-dits").await;

	- let dataset = ExploreApiEndPoint::new(&MOCK_URL)
	+ let dataset = ExploreApiEndPoint::new(&server.url())
	.get_dataset_information(TEST_DATASET_ID)
	.await;

	@@ -133,21 +165,26 @@
	}

	#[tokio::test]
	-async fn test_get_dataset_attachments () {
	- let _mock = prepare_mock("/catalog/datasets/fichier-fantoir-des-voies-et-lieux-dits/attachments");
	+async fn test_get_dataset_attachments() {
	+ let server =
	+ prepare_mock("/catalog/datasets/fichier-fantoir-des-voies-et-lieux-dits/attachments").await;

	- let attachments = ExploreApiEndPoint::new(&MOCK_URL)
	+ let attachments = ExploreApiEndPoint::new(&server.url())
	.get_dataset_attachments(TEST_DATASET_ID)
	.await;

	- assert!(attachments.attachments[0].metas.url.starts_with("odsfile://"));
	+ assert!(attachments.attachments[0]
	+ .metas
	+ .url
	+ .starts_with("odsfile://"));
	}

	#[tokio::test]
	-async fn test_get_dataset_facets () {
	- let _mock = prepare_mock("/catalog/datasets/fichier-fantoir-des-voies-et-lieux-dits/facets");
	+async fn test_get_dataset_facets() {
	+ let server =
	+ prepare_mock("/catalog/datasets/fichier-fantoir-des-voies-et-lieux-dits/facets").await;

	- let facets = ExploreApiEndPoint::new(&MOCK_URL)
	+ let facets = ExploreApiEndPoint::new(&server.url())
	.get_dataset_facets(TEST_DATASET_ID)
	.await;

	@@ -155,10 +192,13 @@
	}

	#[tokio::test]
	-async fn test_get_dataset_record () {
	- let _mock = prepare_mock("/catalog/datasets/controle_techn/records/eb04cba18e872814448a7fda829f3f1918cfae0b");
	+async fn test_get_dataset_record() {
	+ let server = prepare_mock(
	+ "/catalog/datasets/controle_techn/records/eb04cba18e872814448a7fda829f3f1918cfae0b",
	+ )
	+ .await;

	- let record = ExploreApiEndPoint::new(&MOCK_URL)
	+ let record = ExploreApiEndPoint::new(&server.url())
	.get_dataset_record(TEST_DATASET_WITH_RECORDS_ID, TEST_RECORD_ID)
	.await;

	diff --git a/rfc-datasource/Cargo.toml b/rfc-datasource/Cargo.toml
	new file mode 100644
	--- /dev/null
	+++ b/rfc-datasource/Cargo.toml
	@@ -0,0 +1,24 @@
	+[package]
	+name = "rfc-datasource"
	+version = "0.1.0"
	+edition = "2021"
	+description = "Downloads and transforms RFC index"
	+authors = [
	+ "Sébastien Santoro <dereckson@espace-win.org>"
	+]
	+license = "BSD-2-Clause"
	+
	+[dependencies]
	+lazy_static = "1.4.0"
	+regex = "1.8.1"
	+
	+[dependencies.clap]
	+version = "4.3.0"
	+features = ["derive"]
	+
	+[dependencies.reqwest]
	+version = "~0.11.18"
	+
	+[dependencies.tokio]
	+version = "1.28.1"
	+features = ["full"]
	diff --git a/rfc-datasource/README.md b/rfc-datasource/README.md
	new file mode 100644
	--- /dev/null
	+++ b/rfc-datasource/README.md
	@@ -0,0 +1,39 @@
	+The `rfc-datasource` utility allows to download the RFC index, parse it,
	+and transform the output.
	+
	+It has been designed to output the index in an arbitrary RFC format,
	+so we can export a Darkbot database for Odderon, one of our IRC bot.
	+
	+## Usage
	+
	+`rfc-datasource --format <format string> [--source /path/to/rfc-index.txt]`
	+
	+The format string can be arbitrary text or variables:
	+
	+\| Variable \| Description \|
	+\|-----------------\|---------------------------------------------------------------\|
	+\| %%id%% \| The number of the RFC without leading 0 \|
	+\| %%<len>id%% \| The number of the RFC with leading 0 to fill <len> digits (1) \|
	+\| %%description%% \| The RFC title, authors and date \|
	+\| %%status%% \| The RFC status (2) \|
	+\| %%fullstatus%% \| A string summarizing the different status notes (3) \|
	+
	+Examples for the variables:
	+ - (1) e.g. `%%4id%%` will output `0065` for the RFC 65
	+ - (2) e.g. `INFORMATIONAL` for RFC 2286
	+ - (3) e.g. `Obsoletes RFC1938. Status: DRAFT STANDARD.` for RFC 2289
	+
	+The utility uses as source, by order of priority:
	+ - the path specified to the --source argument
	+ - any `rfc-index.txt` file available in the current directory
	+ - https://www.ietf.org/download/rfc-index.txt
	+
	+## Recipes
	+
	+### Darkbot database
	+
	+ rfc-datasource --format "rfc+%%id%% %%description%% %%fullstatus%%"
	+
	+### CSV export
	+
	+ rfc-datasource --format '%%id%%,"%%description%%", "%%status%%"'
	diff --git a/rfc-datasource/src/main.rs b/rfc-datasource/src/main.rs
	new file mode 100644
	--- /dev/null
	+++ b/rfc-datasource/src/main.rs
	@@ -0,0 +1,32 @@
	+use clap::Parser;
	+
	+use crate::rfc_index::get_rfc_index;
	+use crate::rfc_parser::Rfc;
	+
	+mod rfc_index;
	+mod rfc_parser;
	+
	+#[derive(Debug, Parser)]
	+#[command(name = "rfc-datasource")]
	+#[clap(author="Nasqueron project", version, about="Download and print RFC index", long_about=None)]
	+pub struct RfcArgs {
	+ /// The format string to use
	+ #[arg(long, short = 'f')]
	+ format: String,
	+
	+ /// The path to the RFC index source
	+ #[arg(long, short = 's')]
	+ source: Option<String>,
	+}
	+
	+#[tokio::main]
	+async fn main() {
	+ let args = RfcArgs::parse(); // Will exit if argument is missing or --help/--version provided.
	+
	+ let document = get_rfc_index(args.source).await
	+ .expect("Can't read or fetch RFC index");
	+
	+ for rfc in Rfc::parse_document(&document) {
	+ println!("{}", rfc.format(&args.format));
	+ }
	+}
	diff --git a/rfc-datasource/src/rfc_index.rs b/rfc-datasource/src/rfc_index.rs
	new file mode 100644
	--- /dev/null
	+++ b/rfc-datasource/src/rfc_index.rs
	@@ -0,0 +1,31 @@
	+use std::error::Error;
	+use std::fs;
	+use std::path::Path;
	+
	+static RFC_INDEX_URL: &str = "https://www.ietf.org/download/rfc-index.txt";
	+
	+pub async fn get_rfc_index(source: Option<String>) -> Result<String, Box<dyn Error>> {
	+ match source {
	+ // Case 1 - A source file has been explicitly set
	+ Some(file) => Ok(fs::read_to_string(&file)?.parse()?),
	+
	+ None => {
	+ if Path::new("rfc-index.txt").exists() {
	+ // Case 2 - The file rfc-index.txt can be found locally
	+ Ok(fs::read_to_string("rfc-index.txt")?.parse()?)
	+ } else {
	+ // Case 3 - Fetch the index remotely
	+ Ok(fetch_rfc_index().await?)
	+ }
	+ }
	+ }
	+}
	+
	+async fn fetch_rfc_index() -> Result<String, Box<dyn Error>> {
	+ let body = reqwest::get(RFC_INDEX_URL)
	+ .await?
	+ .text()
	+ .await?;
	+
	+ Ok(body)
	+}
	diff --git a/rfc-datasource/src/rfc_parser.rs b/rfc-datasource/src/rfc_parser.rs
	new file mode 100644
	--- /dev/null
	+++ b/rfc-datasource/src/rfc_parser.rs
	@@ -0,0 +1,214 @@
	+use std::collections::HashMap;
	+
	+use lazy_static::lazy_static;
	+use regex::Regex;
	+
	+/* -------------------------------------------------------------
	+ Regexp definitions, used in parser and builder
	+ - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - */
	+
	+lazy_static!{
	+ static ref RE_RFC: Regex = Regex::new(
	+ // <id> <description> <metadata...>
	+ r"(\d+) (.?) ($.$)"
	+ ).unwrap();
	+
	+ static ref RE_RFC_METADATA: Regex = Regex::new(
	+ // (...) (...) (...)
	+ r"$(.*?)$"
	+ ).unwrap();
	+
	+ static ref RE_ID: Regex = Regex::new(
	+ // %%9id%%
	+ r"\%(\d+)id\%"
	+ ).unwrap();
	+}
	+
	+/* -------------------------------------------------------------
	+ RFC
	+ - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - */
	+
	+#[derive(Debug)]
	+pub struct Rfc {
	+ pub id: i32,
	+ pub description: String,
	+
	+ pub metadata: HashMap<String, String>,
	+ pub untagged_metadata: Vec<String>,
	+}
	+
	+impl Rfc {
	+
	+ ///
	+ /// Parser
	+ ///
	+
	+ pub fn parse_document(document: &str) -> Vec<Self> {
	+ let lines: Vec<_> = document.lines().collect();
	+
	+ let start_index = lines
	+ .iter()
	+ .position(\|&line\| line.starts_with("0001"))
	+ .unwrap_or(0);
	+
	+ let document = lines[start_index..].join("\n");
	+
	+ Self::parse_blocks(&document)
	+ }
	+
	+ fn parse_blocks(document: &str) -> Vec<Self> {
	+ document
	+ .split("\n\n")
	+ .map(\|block\| Self::parse_block(block))
	+ .filter(\|rfc\| rfc.is_some())
	+ .map(\|rfc\| rfc.unwrap())
	+ .collect()
	+ }
	+
	+ pub fn parse_block(block: &str) -> Option<Self> {
	+ let rfc_expression: Vec<&str> = block
	+ .split("\n")
	+ .map(\|line\| line.trim_start())
	+ .collect();
	+
	+ Self::parse_line(&rfc_expression.join(" "))
	+ }
	+
	+ fn parse_line(line: &str) -> Option<Self> {
	+ match RE_RFC.captures(line) {
	+ None => None,
	+
	+ Some(caps) => {
	+ match caps.len() {
	+ 4 => {
	+ let (metadata, untagged_metadata) = Self::parse_metadata_line(
	+ caps.get(3)?.as_str()
	+ );
	+
	+ Some(Rfc {
	+ id: caps.get(1)?.as_str().parse::<i32>().ok()?,
	+ description: caps.get(2)?.as_str().to_string(),
	+ metadata,
	+ untagged_metadata,
	+ })
	+ },
	+ _ => None,
	+ }
	+ }
	+ }
	+ }
	+
	+ fn parse_metadata_line(expression: &str) -> (HashMap<String, String>, Vec<String>) {
	+ let mut metadata = HashMap::new();
	+ let mut untagged_metadata = Vec::new();
	+
	+ RE_RFC_METADATA
	+ .captures_iter(expression)
	+ .map(\|cap\| cap.get(1).unwrap().as_str())
	+ .for_each(\|value\| {
	+ if value.contains(":") {
	+ let parts: Vec<_> = value.splitn(2, ": ").collect(); // K: V
	+ metadata.insert(parts[0].to_owned(), parts[1].to_owned());
	+ } else {
	+ untagged_metadata.push(String::from(value));
	+ }
	+ });
	+
	+ (metadata, untagged_metadata)
	+ }
	+
	+ ///
	+ /// Builder
	+ ///
	+
	+ pub fn get_status (&self) -> Option<String> {
	+ self.metadata
	+ .get("Status")
	+ .map(\|value\| String::from(value))
	+ }
	+
	+ pub fn get_full_status_metadata (&self) -> Vec<String> {
	+ let mut all_metadata: Vec<String> = self.untagged_metadata
	+ .iter()
	+ .map(\|value\| format!("{}.", value))
	+ .collect();
	+
	+ all_metadata.extend(
	+ self.metadata
	+ .iter()
	+ .filter(\|&(key, _value)\| key != "DOI" && key != "Format")
	+ .map(\|(key, value)\| format!("{}: {}.", key, value))
	+ );
	+
	+ all_metadata
	+ }
	+
	+ pub fn get_full_status (&self) -> String {
	+ self.get_full_status_metadata()
	+ .join(" ")
	+ }
	+
	+ ///
	+ /// Format
	+ ///
	+
	+ pub fn format(&self, format: &str) -> String {
	+ // Replace expressions like %%4id%% %%5id%%
	+ let matches = RE_ID
	+ .captures_iter(&format)
	+ .map(\|caps\| caps.get(1).unwrap()
	+ .as_str()
	+ .parse::<usize>().unwrap());
	+
	+ let mut formatted_rfc = String::from(format);
	+ for len in matches {
	+ formatted_rfc = formatted_rfc.replace(
	+ &format!("%%{}id%%", len.clone()),
	+ &zerofill(self.id, len.clone()),
	+ );
	+ }
	+
	+ // Replace straightforward variables
	+ formatted_rfc
	+ .replace("%%id%%", &self.id.to_string())
	+ .replace("%%description%%", &self.description)
	+ .replace("%%status%%", &self.get_status().unwrap_or(String::new()))
	+ .replace("%%fullstatus%%", &self.get_full_status())
	+ }
	+}
	+
	+/* -------------------------------------------------------------
	+ Helper methods
	+ - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - */
	+
	+fn zerofill(number: i32, width: usize) -> String {
	+ format!("{:0>width$}", number, width = width)
	+}
	+
	+/* -------------------------------------------------------------
	+ Unit tests
	+ - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - */
	+
	+#[cfg(test)]
	+mod tests {
	+ use super::*;
	+
	+ #[test]
	+ pub fn test_zerofill () {
	+ // Test case 1: number is smaller than width (usual case)
	+ assert_eq!(zerofill(42, 5), "00042");
	+
	+ // Test case 2: number is equal to width
	+ assert_eq!(zerofill(12345, 5), "12345");
	+
	+ // Test case 3: number is larger than width
	+ assert_eq!(zerofill(987654, 4), "987654");
	+
	+ // Test case 4: number is zero
	+ assert_eq!(zerofill(0, 3), "000");
	+
	+ // Test case 5: width is zero
	+ assert_eq!(zerofill(987, 0), "987");
	+ }
	+
	+}

File Metadata

Mime Type: text/plain
Expires: Sat, Nov 23, 14:04 (18 h, 1 m)
Storage Engine: blob
Storage Format: Raw Data
Storage Handle: 2258181
Default Alt Text: D2754.id8426.diff (46 KB)

D2754.id8426.diffNo OneTemporaryActions

D2754.id8426.diffView Options

File Metadata

Event Timeline

D2754.id8426.diff
No OneTemporary
Actions

D2754.id8426.diff
View Options