Page MenuHomeDevCentral

D3112.diff
No OneTemporary

D3112.diff

diff --git a/Cargo.toml b/Cargo.toml
--- a/Cargo.toml
+++ b/Cargo.toml
@@ -2,5 +2,6 @@
members = [
"fantoir-datasource",
+ "rfc-datasource",
"opendatasoft-explore-api",
]
diff --git a/README.md b/README.md
--- a/README.md
+++ b/README.md
@@ -14,6 +14,14 @@
More information: [fantoir-datasource README](fantoir-datasource/README.md)
+### RFC import fool (rfc-datasource)
+
+Import RFC index and convert it to the specified text-based format.
+
+Can be used to refresh RFC Darkbot database for IRC bots.
+
+More information: [rfc-datasource README](rfc-datasource/README.md)
+
### Opendatasoft Explore API client (opendatasoft-explore-api)
The opendatasoft-explore-api crate allows to query the Opendatasoft Explore API from Rust code.
diff --git a/rfc-datasource/Cargo.toml b/rfc-datasource/Cargo.toml
new file mode 100644
--- /dev/null
+++ b/rfc-datasource/Cargo.toml
@@ -0,0 +1,24 @@
+[package]
+name = "rfc-datasource"
+version = "0.1.0"
+edition = "2021"
+description = "Downloads and transforms RFC index"
+authors = [
+ "Sébastien Santoro <dereckson@espace-win.org>"
+]
+license = "BSD-2-Clause"
+
+[dependencies]
+lazy_static = "1.4.0"
+regex = "1.8.1"
+
+[dependencies.clap]
+version = "4.3.0"
+features = ["derive"]
+
+[dependencies.reqwest]
+version = "~0.11.18"
+
+[dependencies.tokio]
+version = "1.28.1"
+features = ["full"]
diff --git a/rfc-datasource/README.md b/rfc-datasource/README.md
new file mode 100644
--- /dev/null
+++ b/rfc-datasource/README.md
@@ -0,0 +1,39 @@
+The `rfc-datasource` utility allows to download the RFC index, parse it,
+and transform the output.
+
+It has been designed to output the index in an arbitrary RFC format,
+so we can export a Darkbot database for Odderon, one of our IRC bot.
+
+## Usage
+
+`rfc-datasource --format <format string> [--source /path/to/rfc-index.txt]`
+
+The format string can be arbitrary text or variables:
+
+| **Variable** | **Description** |
+|-----------------|---------------------------------------------------------------|
+| %%id%% | The number of the RFC without leading 0 |
+| %%<len>id%% | The number of the RFC with leading 0 to fill <len> digits (1) |
+| %%description%% | The RFC title, authors and date |
+| %%status%% | The RFC status (2) |
+| %%fullstatus%% | A string summarizing the different status notes (3) |
+
+Examples for the variables:
+ - (1) e.g. `%%4id%%` will output `0065` for the RFC 65
+ - (2) e.g. `INFORMATIONAL` for RFC 2286
+ - (3) e.g. `Obsoletes RFC1938. Status: DRAFT STANDARD.` for RFC 2289
+
+The utility uses as source, by order of priority:
+ - the path specified to the --source argument
+ - any `rfc-index.txt` file available in the current directory
+ - https://www.ietf.org/download/rfc-index.txt
+
+## Recipes
+
+### Darkbot database
+
+ rfc-datasource --format "rfc+%%id%% %%description%% %%fullstatus%%"
+
+### CSV export
+
+ rfc-datasource --format '%%id%%,"%%description%%", "%%status%%"'
diff --git a/rfc-datasource/src/main.rs b/rfc-datasource/src/main.rs
new file mode 100644
--- /dev/null
+++ b/rfc-datasource/src/main.rs
@@ -0,0 +1,32 @@
+use clap::Parser;
+
+use crate::rfc_index::get_rfc_index;
+use crate::rfc_parser::Rfc;
+
+mod rfc_index;
+mod rfc_parser;
+
+#[derive(Debug, Parser)]
+#[command(name = "rfc-datasource")]
+#[clap(author="Nasqueron project", version, about="Download and print RFC index", long_about=None)]
+pub struct RfcArgs {
+ /// The format string to use
+ #[arg(long, short = 'f')]
+ format: String,
+
+ /// The path to the RFC index source
+ #[arg(long, short = 's')]
+ source: Option<String>,
+}
+
+#[tokio::main]
+async fn main() {
+ let args = RfcArgs::parse(); // Will exit if argument is missing or --help/--version provided.
+
+ let document = get_rfc_index(args.source).await
+ .expect("Can't read or fetch RFC index");
+
+ for rfc in Rfc::parse_document(&document) {
+ println!("{}", rfc.format(&args.format));
+ }
+}
diff --git a/rfc-datasource/src/rfc_index.rs b/rfc-datasource/src/rfc_index.rs
new file mode 100644
--- /dev/null
+++ b/rfc-datasource/src/rfc_index.rs
@@ -0,0 +1,31 @@
+use std::error::Error;
+use std::fs;
+use std::path::Path;
+
+static RFC_INDEX_URL: &str = "https://www.ietf.org/download/rfc-index.txt";
+
+pub async fn get_rfc_index(source: Option<String>) -> Result<String, Box<dyn Error>> {
+ match source {
+ // Case 1 - A source file has been explicitly set
+ Some(file) => Ok(fs::read_to_string(&file)?.parse()?),
+
+ None => {
+ if Path::new("rfc-index.txt").exists() {
+ // Case 2 - The file rfc-index.txt can be found locally
+ Ok(fs::read_to_string("rfc-index.txt")?.parse()?)
+ } else {
+ // Case 3 - Fetch the index remotely
+ Ok(fetch_rfc_index().await?)
+ }
+ }
+ }
+}
+
+async fn fetch_rfc_index() -> Result<String, Box<dyn Error>> {
+ let body = reqwest::get(RFC_INDEX_URL)
+ .await?
+ .text()
+ .await?;
+
+ Ok(body)
+}
diff --git a/rfc-datasource/src/rfc_parser.rs b/rfc-datasource/src/rfc_parser.rs
new file mode 100644
--- /dev/null
+++ b/rfc-datasource/src/rfc_parser.rs
@@ -0,0 +1,214 @@
+use std::collections::HashMap;
+
+use lazy_static::lazy_static;
+use regex::Regex;
+
+/* -------------------------------------------------------------
+ Regexp definitions, used in parser and builder
+ - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - */
+
+lazy_static!{
+ static ref RE_RFC: Regex = Regex::new(
+ // <id> <description> <metadata...>
+ r"(\d+) (.*?) (\(.*\))"
+ ).unwrap();
+
+ static ref RE_RFC_METADATA: Regex = Regex::new(
+ // (...) (...) (...)
+ r"\((.*?)\)"
+ ).unwrap();
+
+ static ref RE_ID: Regex = Regex::new(
+ // %%9id%%
+ r"\%(\d+)id\%"
+ ).unwrap();
+}
+
+/* -------------------------------------------------------------
+ RFC
+ - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - */
+
+#[derive(Debug)]
+pub struct Rfc {
+ pub id: i32,
+ pub description: String,
+
+ pub metadata: HashMap<String, String>,
+ pub untagged_metadata: Vec<String>,
+}
+
+impl Rfc {
+
+ ///
+ /// Parser
+ ///
+
+ pub fn parse_document(document: &str) -> Vec<Self> {
+ let lines: Vec<_> = document.lines().collect();
+
+ let start_index = lines
+ .iter()
+ .position(|&line| line.starts_with("0001"))
+ .unwrap_or(0);
+
+ let document = lines[start_index..].join("\n");
+
+ Self::parse_blocks(&document)
+ }
+
+ fn parse_blocks(document: &str) -> Vec<Self> {
+ document
+ .split("\n\n")
+ .map(|block| Self::parse_block(block))
+ .filter(|rfc| rfc.is_some())
+ .map(|rfc| rfc.unwrap())
+ .collect()
+ }
+
+ pub fn parse_block(block: &str) -> Option<Self> {
+ let rfc_expression: Vec<&str> = block
+ .split("\n")
+ .map(|line| line.trim_start())
+ .collect();
+
+ Self::parse_line(&rfc_expression.join(" "))
+ }
+
+ fn parse_line(line: &str) -> Option<Self> {
+ match RE_RFC.captures(line) {
+ None => None,
+
+ Some(caps) => {
+ match caps.len() {
+ 4 => {
+ let (metadata, untagged_metadata) = Self::parse_metadata_line(
+ caps.get(3)?.as_str()
+ );
+
+ Some(Rfc {
+ id: caps.get(1)?.as_str().parse::<i32>().ok()?,
+ description: caps.get(2)?.as_str().to_string(),
+ metadata,
+ untagged_metadata,
+ })
+ },
+ _ => None,
+ }
+ }
+ }
+ }
+
+ fn parse_metadata_line(expression: &str) -> (HashMap<String, String>, Vec<String>) {
+ let mut metadata = HashMap::new();
+ let mut untagged_metadata = Vec::new();
+
+ RE_RFC_METADATA
+ .captures_iter(expression)
+ .map(|cap| cap.get(1).unwrap().as_str())
+ .for_each(|value| {
+ if value.contains(":") {
+ let parts: Vec<_> = value.splitn(2, ": ").collect(); // K: V
+ metadata.insert(parts[0].to_owned(), parts[1].to_owned());
+ } else {
+ untagged_metadata.push(String::from(value));
+ }
+ });
+
+ (metadata, untagged_metadata)
+ }
+
+ ///
+ /// Builder
+ ///
+
+ pub fn get_status (&self) -> Option<String> {
+ self.metadata
+ .get("Status")
+ .map(|value| String::from(value))
+ }
+
+ pub fn get_full_status_metadata (&self) -> Vec<String> {
+ let mut all_metadata: Vec<String> = self.untagged_metadata
+ .iter()
+ .map(|value| format!("{}.", value))
+ .collect();
+
+ all_metadata.extend(
+ self.metadata
+ .iter()
+ .filter(|&(key, _value)| key != "DOI" && key != "Format")
+ .map(|(key, value)| format!("{}: {}.", key, value))
+ );
+
+ all_metadata
+ }
+
+ pub fn get_full_status (&self) -> String {
+ self.get_full_status_metadata()
+ .join(" ")
+ }
+
+ ///
+ /// Format
+ ///
+
+ pub fn format(&self, format: &str) -> String {
+ // Replace expressions like %%4id%% %%5id%%
+ let matches = RE_ID
+ .captures_iter(&format)
+ .map(|caps| caps.get(1).unwrap()
+ .as_str()
+ .parse::<usize>().unwrap());
+
+ let mut formatted_rfc = String::from(format);
+ for len in matches {
+ formatted_rfc = formatted_rfc.replace(
+ &format!("%%{}id%%", len.clone()),
+ &zerofill(self.id, len.clone()),
+ );
+ }
+
+ // Replace straightforward variables
+ formatted_rfc
+ .replace("%%id%%", &self.id.to_string())
+ .replace("%%description%%", &self.description)
+ .replace("%%status%%", &self.get_status().unwrap_or(String::new()))
+ .replace("%%fullstatus%%", &self.get_full_status())
+ }
+}
+
+/* -------------------------------------------------------------
+ Helper methods
+ - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - */
+
+fn zerofill(number: i32, width: usize) -> String {
+ format!("{:0>width$}", number, width = width)
+}
+
+/* -------------------------------------------------------------
+ Unit tests
+ - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - */
+
+#[cfg(test)]
+mod tests {
+ use super::*;
+
+ #[test]
+ pub fn test_zerofill () {
+ // Test case 1: number is smaller than width (usual case)
+ assert_eq!(zerofill(42, 5), "00042");
+
+ // Test case 2: number is equal to width
+ assert_eq!(zerofill(12345, 5), "12345");
+
+ // Test case 3: number is larger than width
+ assert_eq!(zerofill(987654, 4), "987654");
+
+ // Test case 4: number is zero
+ assert_eq!(zerofill(0, 3), "000");
+
+ // Test case 5: width is zero
+ assert_eq!(zerofill(987, 0), "987");
+ }
+
+}

File Metadata

Mime Type
text/plain
Expires
Sat, Oct 26, 13:06 (21 h, 35 m)
Storage Engine
blob
Storage Format
Raw Data
Storage Handle
2216086
Default Alt Text
D3112.diff (11 KB)

Event Timeline