Page Menu
Home
DevCentral
Search
Configure Global Search
Log In
Files
F7108164
D3112.id8050.diff
No One
Temporary
Actions
View File
Edit File
Delete File
View Transforms
Subscribe
Mute Notifications
Award Token
Flag For Later
Size
11 KB
Referenced Files
None
Subscribers
None
D3112.id8050.diff
View Options
diff --git a/Cargo.toml b/Cargo.toml
--- a/Cargo.toml
+++ b/Cargo.toml
@@ -2,5 +2,6 @@
members = [
"fantoir-datasource",
+ "rfc-datasource",
"opendatasoft-explore-api",
]
diff --git a/README.md b/README.md
--- a/README.md
+++ b/README.md
@@ -14,6 +14,14 @@
More information: [fantoir-datasource README](fantoir-datasource/README.md)
+### RFC import fool (rfc-datasource)
+
+Import RFC index and convert it to the specified text-based format.
+
+Can be used to refresh RFC Darkbot database for IRC bots.
+
+More information: [rfc-datasource README](rfc-datasource/README.md)
+
### Opendatasoft Explore API client (opendatasoft-explore-api)
The opendatasoft-explore-api crate allows to query the Opendatasoft Explore API from Rust code.
diff --git a/rfc-datasource/Cargo.toml b/rfc-datasource/Cargo.toml
new file mode 100644
--- /dev/null
+++ b/rfc-datasource/Cargo.toml
@@ -0,0 +1,24 @@
+[package]
+name = "rfc-datasource"
+version = "0.1.0"
+edition = "2021"
+description = "Downloads and transforms RFC index"
+authors = [
+ "Sébastien Santoro <dereckson@espace-win.org>"
+]
+license = "BSD-2-Clause"
+
+[dependencies]
+lazy_static = "1.4.0"
+regex = "1.8.1"
+
+[dependencies.clap]
+version = "4.3.0"
+features = ["derive"]
+
+[dependencies.reqwest]
+version = "~0.11.18"
+
+[dependencies.tokio]
+version = "1.28.1"
+features = ["full"]
diff --git a/rfc-datasource/README.md b/rfc-datasource/README.md
new file mode 100644
--- /dev/null
+++ b/rfc-datasource/README.md
@@ -0,0 +1,39 @@
+The `rfc-datasource` utility allows to download the RFC index, parse it,
+and transform the output.
+
+It has been designed to output the index in an arbitrary RFC format,
+so we can export a Darkbot database for Odderon, one of our IRC bot.
+
+## Usage
+
+`rfc-datasource --format <format string> [--source /path/to/rfc-index.txt]`
+
+The format string can be arbitrary text or variables:
+
+| **Variable** | **Description** |
+|-----------------|---------------------------------------------------------------|
+| %%id%% | The number of the RFC without leading 0 |
+| %%<len>id%% | The number of the RFC with leading 0 to fill <len> digits (1) |
+| %%description%% | The RFC title, authors and date |
+| %%status%% | The RFC status (2) |
+| %%fullstatus%% | A string summarizing the different status notes (3) |
+
+Examples for the variables:
+ - (1) e.g. `%%4id%%` will output `0065` for the RFC 65
+ - (2) e.g. `INFORMATIONAL` for RFC 2286
+ - (3) e.g. `Obsoletes RFC1938. Status: DRAFT STANDARD.` for RFC 2289
+
+The utility uses as source, by order of priority:
+ - the path specified to the --source argument
+ - any `rfc-index.txt` file available in the current directory
+ - https://www.ietf.org/download/rfc-index.txt
+
+## Recipes
+
+### Darkbot database
+
+ rfc-datasource --format "rfc+%%id%% %%description%% %%fullstatus%%"
+
+### CSV export
+
+ rfc-datasource --format '%%id%%,"%%description%%", "%%status%%"'
diff --git a/rfc-datasource/src/main.rs b/rfc-datasource/src/main.rs
new file mode 100644
--- /dev/null
+++ b/rfc-datasource/src/main.rs
@@ -0,0 +1,32 @@
+use clap::Parser;
+
+use crate::rfc_index::get_rfc_index;
+use crate::rfc_parser::Rfc;
+
+mod rfc_index;
+mod rfc_parser;
+
+#[derive(Debug, Parser)]
+#[command(name = "rfc-datasource")]
+#[clap(author="Nasqueron project", version, about="Download and print RFC index", long_about=None)]
+pub struct RfcArgs {
+ /// The format string to use
+ #[arg(long, short = 'f')]
+ format: String,
+
+ /// The path to the RFC index source
+ #[arg(long, short = 's')]
+ source: Option<String>,
+}
+
+#[tokio::main]
+async fn main() {
+ let args = RfcArgs::parse(); // Will exit if argument is missing or --help/--version provided.
+
+ let document = get_rfc_index(args.source).await
+ .expect("Can't read or fetch RFC index");
+
+ for rfc in Rfc::parse_document(&document) {
+ println!("{}", rfc.format(&args.format));
+ }
+}
diff --git a/rfc-datasource/src/rfc_index.rs b/rfc-datasource/src/rfc_index.rs
new file mode 100644
--- /dev/null
+++ b/rfc-datasource/src/rfc_index.rs
@@ -0,0 +1,31 @@
+use std::error::Error;
+use std::fs;
+use std::path::Path;
+
+static RFC_INDEX_URL: &str = "https://www.ietf.org/download/rfc-index.txt";
+
+pub async fn get_rfc_index(source: Option<String>) -> Result<String, Box<dyn Error>> {
+ match source {
+ // Case 1 - A source file has been explicitly set
+ Some(file) => Ok(fs::read_to_string(&file)?.parse()?),
+
+ None => {
+ if Path::new("rfc-index.txt").exists() {
+ // Case 2 - The file rfc-index.txt can be found locally
+ Ok(fs::read_to_string("rfc-index.txt")?.parse()?)
+ } else {
+ // Case 3 - Fetch the index remotely
+ Ok(fetch_rfc_index().await?)
+ }
+ }
+ }
+}
+
+async fn fetch_rfc_index() -> Result<String, Box<dyn Error>> {
+ let body = reqwest::get(RFC_INDEX_URL)
+ .await?
+ .text()
+ .await?;
+
+ Ok(body)
+}
diff --git a/rfc-datasource/src/rfc_parser.rs b/rfc-datasource/src/rfc_parser.rs
new file mode 100644
--- /dev/null
+++ b/rfc-datasource/src/rfc_parser.rs
@@ -0,0 +1,214 @@
+use std::collections::HashMap;
+
+use lazy_static::lazy_static;
+use regex::Regex;
+
+/* -------------------------------------------------------------
+ Regexp definitions, used in parser and builder
+ - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - */
+
+lazy_static!{
+ static ref RE_RFC: Regex = Regex::new(
+ // <id> <description> <metadata...>
+ r"(\d+) (.*?) (\(.*\))"
+ ).unwrap();
+
+ static ref RE_RFC_METADATA: Regex = Regex::new(
+ // (...) (...) (...)
+ r"\((.*?)\)"
+ ).unwrap();
+
+ static ref RE_ID: Regex = Regex::new(
+ // %%9id%%
+ r"\%(\d+)id\%"
+ ).unwrap();
+}
+
+/* -------------------------------------------------------------
+ RFC
+ - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - */
+
+#[derive(Debug)]
+pub struct Rfc {
+ pub id: i32,
+ pub description: String,
+
+ pub metadata: HashMap<String, String>,
+ pub untagged_metadata: Vec<String>,
+}
+
+impl Rfc {
+
+ ///
+ /// Parser
+ ///
+
+ pub fn parse_document(document: &str) -> Vec<Self> {
+ let lines: Vec<_> = document.lines().collect();
+
+ let start_index = lines
+ .iter()
+ .position(|&line| line.starts_with("0001"))
+ .unwrap_or(0);
+
+ let document = lines[start_index..].join("\n");
+
+ Self::parse_blocks(&document)
+ }
+
+ fn parse_blocks(document: &str) -> Vec<Self> {
+ document
+ .split("\n\n")
+ .map(|block| Self::parse_block(block))
+ .filter(|rfc| rfc.is_some())
+ .map(|rfc| rfc.unwrap())
+ .collect()
+ }
+
+ pub fn parse_block(block: &str) -> Option<Self> {
+ let rfc_expression: Vec<&str> = block
+ .split("\n")
+ .map(|line| line.trim_start())
+ .collect();
+
+ Self::parse_line(&rfc_expression.join(" "))
+ }
+
+ fn parse_line(line: &str) -> Option<Self> {
+ match RE_RFC.captures(line) {
+ None => None,
+
+ Some(caps) => {
+ match caps.len() {
+ 4 => {
+ let (metadata, untagged_metadata) = Self::parse_metadata_line(
+ caps.get(3)?.as_str()
+ );
+
+ Some(Rfc {
+ id: caps.get(1)?.as_str().parse::<i32>().ok()?,
+ description: caps.get(2)?.as_str().to_string(),
+ metadata,
+ untagged_metadata,
+ })
+ },
+ _ => None,
+ }
+ }
+ }
+ }
+
+ fn parse_metadata_line(expression: &str) -> (HashMap<String, String>, Vec<String>) {
+ let mut metadata = HashMap::new();
+ let mut untagged_metadata = Vec::new();
+
+ RE_RFC_METADATA
+ .captures_iter(expression)
+ .map(|cap| cap.get(1).unwrap().as_str())
+ .for_each(|value| {
+ if value.contains(":") {
+ let parts: Vec<_> = value.splitn(2, ": ").collect(); // K: V
+ metadata.insert(parts[0].to_owned(), parts[1].to_owned());
+ } else {
+ untagged_metadata.push(String::from(value));
+ }
+ });
+
+ (metadata, untagged_metadata)
+ }
+
+ ///
+ /// Builder
+ ///
+
+ pub fn get_status (&self) -> Option<String> {
+ self.metadata
+ .get("Status")
+ .map(|value| String::from(value))
+ }
+
+ pub fn get_full_status_metadata (&self) -> Vec<String> {
+ let mut all_metadata: Vec<String> = self.untagged_metadata
+ .iter()
+ .map(|value| format!("{}.", value))
+ .collect();
+
+ all_metadata.extend(
+ self.metadata
+ .iter()
+ .filter(|&(key, _value)| key != "DOI" && key != "Format")
+ .map(|(key, value)| format!("{}: {}.", key, value))
+ );
+
+ all_metadata
+ }
+
+ pub fn get_full_status (&self) -> String {
+ self.get_full_status_metadata()
+ .join(" ")
+ }
+
+ ///
+ /// Format
+ ///
+
+ pub fn format(&self, format: &str) -> String {
+ // Replace expressions like %%4id%% %%5id%%
+ let matches = RE_ID
+ .captures_iter(&format)
+ .map(|caps| caps.get(1).unwrap()
+ .as_str()
+ .parse::<usize>().unwrap());
+
+ let mut formatted_rfc = String::from(format);
+ for len in matches {
+ formatted_rfc = formatted_rfc.replace(
+ &format!("%%{}id%%", len.clone()),
+ &zerofill(self.id, len.clone()),
+ );
+ }
+
+ // Replace straightforward variables
+ formatted_rfc
+ .replace("%%id%%", &self.id.to_string())
+ .replace("%%description%%", &self.description)
+ .replace("%%status%%", &self.get_status().unwrap_or(String::new()))
+ .replace("%%fullstatus%%", &self.get_full_status())
+ }
+}
+
+/* -------------------------------------------------------------
+ Helper methods
+ - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - */
+
+fn zerofill(number: i32, width: usize) -> String {
+ format!("{:0>width$}", number, width = width)
+}
+
+/* -------------------------------------------------------------
+ Unit tests
+ - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - */
+
+#[cfg(test)]
+mod tests {
+ use super::*;
+
+ #[test]
+ pub fn test_zerofill () {
+ // Test case 1: number is smaller than width (usual case)
+ assert_eq!(zerofill(42, 5), "00042");
+
+ // Test case 2: number is equal to width
+ assert_eq!(zerofill(12345, 5), "12345");
+
+ // Test case 3: number is larger than width
+ assert_eq!(zerofill(987654, 4), "987654");
+
+ // Test case 4: number is zero
+ assert_eq!(zerofill(0, 3), "000");
+
+ // Test case 5: width is zero
+ assert_eq!(zerofill(987, 0), "987");
+ }
+
+}
File Metadata
Details
Attached
Mime Type
text/plain
Expires
Mon, Apr 21, 20:06 (12 h, 43 m)
Storage Engine
blob
Storage Format
Raw Data
Storage Handle
2592769
Default Alt Text
D3112.id8050.diff (11 KB)
Attached To
Mode
D3112: Fetch RFC index and format it
Attached
Detach File
Event Timeline
Log In to Comment