Page MenuHomeDevCentral

D3424.id8838.diff
No OneTemporary

D3424.id8838.diff

diff --git a/Cargo.toml b/Cargo.toml
--- a/Cargo.toml
+++ b/Cargo.toml
@@ -1,10 +1,12 @@
[workspace]
members = [
+ "ds-http-client",
"fantoir-datasource",
"language-subtag-registry-datasource",
"rfc-datasource",
"opendatasoft-explore-api",
+ "sparql-client",
]
resolver = "2"
diff --git a/ds-http-client/Cargo.toml b/ds-http-client/Cargo.toml
new file mode 100644
--- /dev/null
+++ b/ds-http-client/Cargo.toml
@@ -0,0 +1,15 @@
+[package]
+name = "ds-http-client"
+version = "0.1.0"
+edition = "2021"
+
+[dependencies]
+lazy_static = "~1.5.0"
+
+[dependencies.reqwest]
+version = "~0.12.7"
+features = ["gzip", "deflate"]
+
+[dependencies.tokio]
+version = "~1.39.3"
+features = ["full"]
diff --git a/fantoir-datasource/src/services/http_client.rs b/ds-http-client/src/lib.rs
rename from fantoir-datasource/src/services/http_client.rs
rename to ds-http-client/src/lib.rs
--- a/fantoir-datasource/src/services/http_client.rs
+++ b/ds-http-client/src/lib.rs
@@ -1,3 +1,9 @@
+//! # HTTP client
+//!
+//! High-level interface to Hyper/reqwest HTTP client.
+//!
+//! This library is optimized to work with Nasqueron Datasources components.
+
use std::io::Error as IOError;
use std::path::Path;
@@ -7,7 +13,7 @@
use reqwest::Error as ReqwestError;
use reqwest::IntoUrl;
use reqwest::Response;
-use reqwest::header::HeaderMap;
+use reqwest::header::{HeaderMap, HeaderValue};
use tokio::fs::File;
use tokio::io::AsyncWriteExt;
@@ -19,11 +25,12 @@
lazy_static! {
pub static ref USER_AGENT: String = format!(
- "{}/{} (https://databases.nasqueron.org/)",
+ "{}/{}",
env!("CARGO_PKG_NAME"), env!("CARGO_PKG_VERSION")
);
}
+/// Gets the default user agent
pub fn get_user_agent () -> &'static str {
&USER_AGENT
}
@@ -32,18 +39,15 @@
HTTP client
- - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - */
+/// HTTP client
pub struct Client {
client: ReqwestClient,
}
impl Client {
pub fn new(headers: Option<HeaderMap>) -> Self {
- let headers = headers
- .unwrap_or(HeaderMap::new());
-
let client = ClientBuilder::new()
- .user_agent(get_user_agent())
- .default_headers(headers)
+ .default_headers(build_headers(headers))
.gzip(true)
.deflate(true)
.build()
@@ -55,7 +59,7 @@
}
pub async fn get<T>(&self, url: T) -> Result<Response, Error>
- where T: IntoUrl {
+ where T: IntoUrl {
self.client
.get(url)
.send()
@@ -64,7 +68,7 @@
}
pub async fn download<P, T>(&self, url: T, target_path: P) -> Result<usize, Error>
- where T: IntoUrl, P: AsRef<Path> {
+ where T: IntoUrl, P: AsRef<Path> {
let mut file = File::create(target_path)
.await
.map_err(|error| Error::IO(error))?;
@@ -76,15 +80,31 @@
.await
.map_err(|error| Error::Reqwest(error))?
{
- bytes_read += file.write(chunk.as_ref())
- .await
- .map_err(|error| Error::IO(error))?;
+ bytes_read += file.write(chunk.as_ref())
+ .await
+ .map_err(|error| Error::IO(error))?;
}
Ok(bytes_read)
}
}
+/* -------------------------------------------------------------
+ HTTP client utilities
+ - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - */
+
+pub fn build_headers(headers: Option<HeaderMap>) -> HeaderMap {
+ let mut headers = headers
+ .unwrap_or(HeaderMap::new());
+
+ // RFC 7231 states User-Agent header SHOULD be sent.
+ if !headers.contains_key("User-Agent") {
+ headers.append("User-Agent", HeaderValue::from_static(get_user_agent()));
+ }
+
+ headers
+}
+
/* -------------------------------------------------------------
HTTP client error
- - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - */
diff --git a/fantoir-datasource/Cargo.toml b/fantoir-datasource/Cargo.toml
--- a/fantoir-datasource/Cargo.toml
+++ b/fantoir-datasource/Cargo.toml
@@ -1,6 +1,6 @@
[package]
name = "fantoir-datasource"
-version = "0.1.1"
+version = "0.2.0"
edition = "2021"
description = "Generates a Postgres table from FANTOIR raw file"
authors = [
@@ -10,11 +10,12 @@
[dependencies]
chrono = "~0.4.38"
+ds-http-client = { version = "0.1.0", path = "../ds-http-client" }
lazy_static = "~1.5.0"
opendatasoft-explore-api = { version = "0.1.1", path = "../opendatasoft-explore-api" }
oxrdf = "~0.1.7"
regex = "~1.10.6"
-sparesults = "~0.1.8"
+sparql-client = { version = "0.1.0", path = "../sparql-client" }
[dependencies.async-scoped]
version = "~0.9.0"
@@ -24,10 +25,6 @@
version = "~4.5.16"
features = ["derive"]
-[dependencies.reqwest]
-version = "~0.11.18"
-features = ["gzip", "deflate"]
-
[dependencies.sqlx]
version = "~0.8.1"
features = ["runtime-tokio-native-tls", "postgres", "chrono"]
diff --git a/fantoir-datasource/src/commands/fetch/mod.rs b/fantoir-datasource/src/commands/fetch/mod.rs
--- a/fantoir-datasource/src/commands/fetch/mod.rs
+++ b/fantoir-datasource/src/commands/fetch/mod.rs
@@ -7,13 +7,13 @@
use std::process::exit;
use chrono::Utc;
+use ds_http_client::Client as HttpClient;
use tokio::fs::remove_file;
use opendatasoft_explore_api::requests::ExploreApiEndPoint;
use tokio::process::Command;
use crate::commands::fetch::fantoir_file::FantoirFile;
use crate::commands::fetch::os::is_command_available;
-use crate::services::http_client::Client as HttpClient;
mod fantoir_file;
mod os;
diff --git a/fantoir-datasource/src/commands/wikidata/mod.rs b/fantoir-datasource/src/commands/wikidata/mod.rs
--- a/fantoir-datasource/src/commands/wikidata/mod.rs
+++ b/fantoir-datasource/src/commands/wikidata/mod.rs
@@ -8,14 +8,14 @@
use oxrdf::Term;
use sqlx::PgPool;
-
+use sparql_client::{is_term_empty, parse_literal, parse_term_uri, ClientBuilder};
use crate::commands::wikidata::qualification::determine_p31_winner;
use crate::commands::wikidata::report::*;
use crate::db::*;
use crate::WikidataArgs;
use crate::fantoir::{fix_fantoir_code, FixedFantoirCode};
use crate::services::query::search_fantoir_code;
-use crate::services::sparql::*;
+use crate::services::user_agent::get_user_agent;
pub static WIKIDATA_TABLE: &'static str = "fantoir_wikidata";
pub static WIKIDATA_SPARQL_ENDPOINT: &'static str = "https://query.wikidata.org/sparql";
@@ -38,7 +38,10 @@
}
// Query Wikidata and get (Wikidata/FANTOIR code, list of P31 (instance of) values) hashmap
- let client = Client::new(WIKIDATA_SPARQL_ENDPOINT);
+ let client = ClientBuilder::new(WIKIDATA_SPARQL_ENDPOINT)
+ .with_user_agent(get_user_agent())
+ .build()
+ .expect("Can't build SPARQL client");
let mut what_map = HashMap::new();
client.query(include_str!("../../queries/wikidata.sparql"))
diff --git a/fantoir-datasource/src/services/mod.rs b/fantoir-datasource/src/services/mod.rs
--- a/fantoir-datasource/src/services/mod.rs
+++ b/fantoir-datasource/src/services/mod.rs
@@ -1,3 +1,2 @@
pub mod query;
-pub mod http_client;
-pub mod sparql;
+pub mod user_agent;
\ No newline at end of file
diff --git a/fantoir-datasource/src/services/user_agent.rs b/fantoir-datasource/src/services/user_agent.rs
new file mode 100644
--- /dev/null
+++ b/fantoir-datasource/src/services/user_agent.rs
@@ -0,0 +1,14 @@
+//! Compute at build time user agent to use in HTTP requests.
+
+use lazy_static::lazy_static;
+
+lazy_static! {
+ pub static ref USER_AGENT: String = format!(
+ "{}/{} (https://databases.nasqueron.org/)",
+ env!("CARGO_PKG_NAME"), env!("CARGO_PKG_VERSION")
+ );
+}
+
+pub fn get_user_agent () -> &'static str {
+ &USER_AGENT
+}
diff --git a/sparql-client/Cargo.toml b/sparql-client/Cargo.toml
new file mode 100644
--- /dev/null
+++ b/sparql-client/Cargo.toml
@@ -0,0 +1,15 @@
+[package]
+name = "sparql-client"
+version = "0.1.0"
+edition = "2021"
+
+[dependencies]
+ds-http-client = { version = "0.1.0", path = "../ds-http-client" }
+oxrdf = "~0.1.7"
+sparesults = "~0.1.8"
+lazy_static = "~1.5.0"
+tokio = "~1.39.3"
+
+[dependencies.reqwest]
+version = "~0.12.7"
+features = ["gzip", "deflate"]
diff --git a/fantoir-datasource/src/services/sparql.rs b/sparql-client/src/lib.rs
rename from fantoir-datasource/src/services/sparql.rs
rename to sparql-client/src/lib.rs
--- a/fantoir-datasource/src/services/sparql.rs
+++ b/sparql-client/src/lib.rs
@@ -3,39 +3,30 @@
use std::collections::HashMap;
use std::io::BufRead;
+use ds_http_client::Client as HttpClient;
+use lazy_static::lazy_static;
use oxrdf::Term;
use reqwest::header::{HeaderMap, HeaderValue};
use reqwest::Url;
use sparesults::*;
-use crate::services::http_client::Client as HttpClient;
-
type SparqlSolution = HashMap<String, Term>;
+/* -------------------------------------------------------------
+ SPARQL client
+ - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - */
+
/// SPARQL client
pub struct Client {
pub endpoint: String,
http_client: HttpClient,
}
-/// Represent results for a SPARQL query
-/// A query can return a collection of solutions or a boolean.
-pub enum SparqlResults {
- /// Results for SELECT queries
- Solutions(Vec<SparqlSolution>),
-
- /// Results for INSERT DATA, UPDATE DATA, etc. queries
- Boolean(bool),
-}
-
impl Client {
pub fn new (endpoint: &str) -> Self {
- let mut headers = HeaderMap::new();
- headers.insert("Accept", HeaderValue::from_static("Accept: application/sparql-results+xml"));
-
Self {
endpoint: String::from(endpoint),
- http_client: HttpClient::new(Some(headers)),
+ http_client: build_default_http_client(None),
}
}
@@ -52,15 +43,86 @@
}
}
-pub fn parse_sparql_results (query_results: &str) -> SparqlResults {
- let results_reader = get_query_results_xml_reader(query_results.as_bytes());
+/* -------------------------------------------------------------
+ SPARQL client builder
+ - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - */
- SparqlResults::read(results_reader)
+pub struct ClientBuilder {
+ pub endpoint: String,
+ http_client: Option<HttpClient>,
+ user_agent: Option<String>,
+}
+
+impl ClientBuilder {
+ pub fn new (endpoint: &str) -> Self {
+ Self {
+ endpoint: endpoint.to_string(),
+ http_client: None,
+ user_agent: None,
+ }
+ }
+
+ pub fn with_http_client (mut self, client: HttpClient) -> Self {
+ self.http_client = Some(client);
+
+ self
+ }
+
+ pub fn with_user_agent (mut self, user_agent: &str) -> Self {
+ self.user_agent = Some(user_agent.to_string());
+
+ self
+ }
+
+ pub fn build (self) -> Result<Client, SparqlClientBuildError> {
+ if self.user_agent.is_some() && self.http_client.is_some() {
+ return Err(SparqlClientBuildError::UserAgentAndHttpClientAreExclusive);
+ }
+
+ let user_agent = self.user_agent.clone();
+ let http_client = self.http_client.unwrap_or(build_default_http_client(user_agent));
+
+ Ok(Client {
+ endpoint: self.endpoint,
+ http_client,
+ })
+ }
+}
+
+fn build_default_http_client (user_agent: Option<String>) -> HttpClient {
+ let user_agent = user_agent.unwrap_or(get_user_agent().to_string());
+
+ let mut headers = HeaderMap::new();
+ headers.insert("Accept", HeaderValue::from_static("Accept: application/sparql-results+xml"));
+ headers.insert("User-Agent", HeaderValue::from_str(&user_agent).unwrap());
+
+ HttpClient::new(Some(headers))
+}
+
+#[derive(Debug)]
+pub enum SparqlClientBuildError {
+ /// You cannot define both user agent and HTTP client, as the HTTP client
+ /// does not provide a way to inject new headers once built.
+ UserAgentAndHttpClientAreExclusive,
+}
+
+/* -------------------------------------------------------------
+ SPARQL query results
+ - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - */
+
+/// Represent results for a SPARQL query
+/// A query can return a collection of solutions or a boolean.
+pub enum SparqlResults {
+ /// Results for SELECT queries
+ Solutions(Vec<SparqlSolution>),
+
+ /// Results for INSERT DATA, UPDATE DATA, etc. queries
+ Boolean(bool),
}
impl SparqlResults {
pub fn read<T>(reader: QueryResultsReader<T>) -> Self
- where T: BufRead
+ where T: BufRead
{
match reader {
QueryResultsReader::Solutions(solutions) => {
@@ -78,8 +140,14 @@
}
}
+pub fn parse_sparql_results (query_results: &str) -> SparqlResults {
+ let results_reader = get_query_results_xml_reader(query_results.as_bytes());
+
+ SparqlResults::read(results_reader)
+}
+
fn get_query_results_xml_reader<T>(reader: T) -> QueryResultsReader<T>
- where T: BufRead
+where T: BufRead
{
QueryResultsParser::from_format(QueryResultsFormat::Xml)
.read_results(reader)
@@ -87,7 +155,7 @@
}
fn parse_sparql_solutions<T> (solutions: SolutionsReader<T>) -> Vec<SparqlSolution>
- where T: BufRead
+where T: BufRead
{
solutions
.map(|solution| {
@@ -124,6 +192,10 @@
}
}
+/* -------------------------------------------------------------
+ Helper methods
+ - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - */
+
pub fn is_term_empty(term: &Term) -> bool {
match term {
Term::NamedNode(node) => {
@@ -134,3 +206,21 @@
Term::Literal(_) => false,
}
}
+
+/* -------------------------------------------------------------
+ User agent
+
+ The USER_AGENT variable is computed at build time.
+ - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - */
+
+lazy_static! {
+ pub static ref USER_AGENT: String = format!(
+ "{}/{}",
+ env!("CARGO_PKG_NAME"), env!("CARGO_PKG_VERSION")
+ );
+}
+
+/// Gets the default user agent
+pub fn get_user_agent () -> &'static str {
+ &USER_AGENT
+}

File Metadata

Mime Type
text/plain
Expires
Fri, Dec 20, 12:16 (21 h, 21 m)
Storage Engine
blob
Storage Format
Raw Data
Storage Handle
2308041
Default Alt Text
D3424.id8838.diff (13 KB)

Event Timeline