Page Menu
Home
DevCentral
Search
Configure Global Search
Log In
Files
F3913903
D3424.id8838.diff
No One
Temporary
Actions
View File
Edit File
Delete File
View Transforms
Subscribe
Mute Notifications
Award Token
Flag For Later
Size
13 KB
Referenced Files
None
Subscribers
None
D3424.id8838.diff
View Options
diff --git a/Cargo.toml b/Cargo.toml
--- a/Cargo.toml
+++ b/Cargo.toml
@@ -1,10 +1,12 @@
[workspace]
members = [
+ "ds-http-client",
"fantoir-datasource",
"language-subtag-registry-datasource",
"rfc-datasource",
"opendatasoft-explore-api",
+ "sparql-client",
]
resolver = "2"
diff --git a/ds-http-client/Cargo.toml b/ds-http-client/Cargo.toml
new file mode 100644
--- /dev/null
+++ b/ds-http-client/Cargo.toml
@@ -0,0 +1,15 @@
+[package]
+name = "ds-http-client"
+version = "0.1.0"
+edition = "2021"
+
+[dependencies]
+lazy_static = "~1.5.0"
+
+[dependencies.reqwest]
+version = "~0.12.7"
+features = ["gzip", "deflate"]
+
+[dependencies.tokio]
+version = "~1.39.3"
+features = ["full"]
diff --git a/fantoir-datasource/src/services/http_client.rs b/ds-http-client/src/lib.rs
rename from fantoir-datasource/src/services/http_client.rs
rename to ds-http-client/src/lib.rs
--- a/fantoir-datasource/src/services/http_client.rs
+++ b/ds-http-client/src/lib.rs
@@ -1,3 +1,9 @@
+//! # HTTP client
+//!
+//! High-level interface to Hyper/reqwest HTTP client.
+//!
+//! This library is optimized to work with Nasqueron Datasources components.
+
use std::io::Error as IOError;
use std::path::Path;
@@ -7,7 +13,7 @@
use reqwest::Error as ReqwestError;
use reqwest::IntoUrl;
use reqwest::Response;
-use reqwest::header::HeaderMap;
+use reqwest::header::{HeaderMap, HeaderValue};
use tokio::fs::File;
use tokio::io::AsyncWriteExt;
@@ -19,11 +25,12 @@
lazy_static! {
pub static ref USER_AGENT: String = format!(
- "{}/{} (https://databases.nasqueron.org/)",
+ "{}/{}",
env!("CARGO_PKG_NAME"), env!("CARGO_PKG_VERSION")
);
}
+/// Gets the default user agent
pub fn get_user_agent () -> &'static str {
&USER_AGENT
}
@@ -32,18 +39,15 @@
HTTP client
- - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - */
+/// HTTP client
pub struct Client {
client: ReqwestClient,
}
impl Client {
pub fn new(headers: Option<HeaderMap>) -> Self {
- let headers = headers
- .unwrap_or(HeaderMap::new());
-
let client = ClientBuilder::new()
- .user_agent(get_user_agent())
- .default_headers(headers)
+ .default_headers(build_headers(headers))
.gzip(true)
.deflate(true)
.build()
@@ -55,7 +59,7 @@
}
pub async fn get<T>(&self, url: T) -> Result<Response, Error>
- where T: IntoUrl {
+ where T: IntoUrl {
self.client
.get(url)
.send()
@@ -64,7 +68,7 @@
}
pub async fn download<P, T>(&self, url: T, target_path: P) -> Result<usize, Error>
- where T: IntoUrl, P: AsRef<Path> {
+ where T: IntoUrl, P: AsRef<Path> {
let mut file = File::create(target_path)
.await
.map_err(|error| Error::IO(error))?;
@@ -76,15 +80,31 @@
.await
.map_err(|error| Error::Reqwest(error))?
{
- bytes_read += file.write(chunk.as_ref())
- .await
- .map_err(|error| Error::IO(error))?;
+ bytes_read += file.write(chunk.as_ref())
+ .await
+ .map_err(|error| Error::IO(error))?;
}
Ok(bytes_read)
}
}
+/* -------------------------------------------------------------
+ HTTP client utilities
+ - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - */
+
+pub fn build_headers(headers: Option<HeaderMap>) -> HeaderMap {
+ let mut headers = headers
+ .unwrap_or(HeaderMap::new());
+
+ // RFC 7231 states User-Agent header SHOULD be sent.
+ if !headers.contains_key("User-Agent") {
+ headers.append("User-Agent", HeaderValue::from_static(get_user_agent()));
+ }
+
+ headers
+}
+
/* -------------------------------------------------------------
HTTP client error
- - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - */
diff --git a/fantoir-datasource/Cargo.toml b/fantoir-datasource/Cargo.toml
--- a/fantoir-datasource/Cargo.toml
+++ b/fantoir-datasource/Cargo.toml
@@ -1,6 +1,6 @@
[package]
name = "fantoir-datasource"
-version = "0.1.1"
+version = "0.2.0"
edition = "2021"
description = "Generates a Postgres table from FANTOIR raw file"
authors = [
@@ -10,11 +10,12 @@
[dependencies]
chrono = "~0.4.38"
+ds-http-client = { version = "0.1.0", path = "../ds-http-client" }
lazy_static = "~1.5.0"
opendatasoft-explore-api = { version = "0.1.1", path = "../opendatasoft-explore-api" }
oxrdf = "~0.1.7"
regex = "~1.10.6"
-sparesults = "~0.1.8"
+sparql-client = { version = "0.1.0", path = "../sparql-client" }
[dependencies.async-scoped]
version = "~0.9.0"
@@ -24,10 +25,6 @@
version = "~4.5.16"
features = ["derive"]
-[dependencies.reqwest]
-version = "~0.11.18"
-features = ["gzip", "deflate"]
-
[dependencies.sqlx]
version = "~0.8.1"
features = ["runtime-tokio-native-tls", "postgres", "chrono"]
diff --git a/fantoir-datasource/src/commands/fetch/mod.rs b/fantoir-datasource/src/commands/fetch/mod.rs
--- a/fantoir-datasource/src/commands/fetch/mod.rs
+++ b/fantoir-datasource/src/commands/fetch/mod.rs
@@ -7,13 +7,13 @@
use std::process::exit;
use chrono::Utc;
+use ds_http_client::Client as HttpClient;
use tokio::fs::remove_file;
use opendatasoft_explore_api::requests::ExploreApiEndPoint;
use tokio::process::Command;
use crate::commands::fetch::fantoir_file::FantoirFile;
use crate::commands::fetch::os::is_command_available;
-use crate::services::http_client::Client as HttpClient;
mod fantoir_file;
mod os;
diff --git a/fantoir-datasource/src/commands/wikidata/mod.rs b/fantoir-datasource/src/commands/wikidata/mod.rs
--- a/fantoir-datasource/src/commands/wikidata/mod.rs
+++ b/fantoir-datasource/src/commands/wikidata/mod.rs
@@ -8,14 +8,14 @@
use oxrdf::Term;
use sqlx::PgPool;
-
+use sparql_client::{is_term_empty, parse_literal, parse_term_uri, ClientBuilder};
use crate::commands::wikidata::qualification::determine_p31_winner;
use crate::commands::wikidata::report::*;
use crate::db::*;
use crate::WikidataArgs;
use crate::fantoir::{fix_fantoir_code, FixedFantoirCode};
use crate::services::query::search_fantoir_code;
-use crate::services::sparql::*;
+use crate::services::user_agent::get_user_agent;
pub static WIKIDATA_TABLE: &'static str = "fantoir_wikidata";
pub static WIKIDATA_SPARQL_ENDPOINT: &'static str = "https://query.wikidata.org/sparql";
@@ -38,7 +38,10 @@
}
// Query Wikidata and get (Wikidata/FANTOIR code, list of P31 (instance of) values) hashmap
- let client = Client::new(WIKIDATA_SPARQL_ENDPOINT);
+ let client = ClientBuilder::new(WIKIDATA_SPARQL_ENDPOINT)
+ .with_user_agent(get_user_agent())
+ .build()
+ .expect("Can't build SPARQL client");
let mut what_map = HashMap::new();
client.query(include_str!("../../queries/wikidata.sparql"))
diff --git a/fantoir-datasource/src/services/mod.rs b/fantoir-datasource/src/services/mod.rs
--- a/fantoir-datasource/src/services/mod.rs
+++ b/fantoir-datasource/src/services/mod.rs
@@ -1,3 +1,2 @@
pub mod query;
-pub mod http_client;
-pub mod sparql;
+pub mod user_agent;
\ No newline at end of file
diff --git a/fantoir-datasource/src/services/user_agent.rs b/fantoir-datasource/src/services/user_agent.rs
new file mode 100644
--- /dev/null
+++ b/fantoir-datasource/src/services/user_agent.rs
@@ -0,0 +1,14 @@
+//! Compute at build time user agent to use in HTTP requests.
+
+use lazy_static::lazy_static;
+
+lazy_static! {
+ pub static ref USER_AGENT: String = format!(
+ "{}/{} (https://databases.nasqueron.org/)",
+ env!("CARGO_PKG_NAME"), env!("CARGO_PKG_VERSION")
+ );
+}
+
+pub fn get_user_agent () -> &'static str {
+ &USER_AGENT
+}
diff --git a/sparql-client/Cargo.toml b/sparql-client/Cargo.toml
new file mode 100644
--- /dev/null
+++ b/sparql-client/Cargo.toml
@@ -0,0 +1,15 @@
+[package]
+name = "sparql-client"
+version = "0.1.0"
+edition = "2021"
+
+[dependencies]
+ds-http-client = { version = "0.1.0", path = "../ds-http-client" }
+oxrdf = "~0.1.7"
+sparesults = "~0.1.8"
+lazy_static = "~1.5.0"
+tokio = "~1.39.3"
+
+[dependencies.reqwest]
+version = "~0.12.7"
+features = ["gzip", "deflate"]
diff --git a/fantoir-datasource/src/services/sparql.rs b/sparql-client/src/lib.rs
rename from fantoir-datasource/src/services/sparql.rs
rename to sparql-client/src/lib.rs
--- a/fantoir-datasource/src/services/sparql.rs
+++ b/sparql-client/src/lib.rs
@@ -3,39 +3,30 @@
use std::collections::HashMap;
use std::io::BufRead;
+use ds_http_client::Client as HttpClient;
+use lazy_static::lazy_static;
use oxrdf::Term;
use reqwest::header::{HeaderMap, HeaderValue};
use reqwest::Url;
use sparesults::*;
-use crate::services::http_client::Client as HttpClient;
-
type SparqlSolution = HashMap<String, Term>;
+/* -------------------------------------------------------------
+ SPARQL client
+ - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - */
+
/// SPARQL client
pub struct Client {
pub endpoint: String,
http_client: HttpClient,
}
-/// Represent results for a SPARQL query
-/// A query can return a collection of solutions or a boolean.
-pub enum SparqlResults {
- /// Results for SELECT queries
- Solutions(Vec<SparqlSolution>),
-
- /// Results for INSERT DATA, UPDATE DATA, etc. queries
- Boolean(bool),
-}
-
impl Client {
pub fn new (endpoint: &str) -> Self {
- let mut headers = HeaderMap::new();
- headers.insert("Accept", HeaderValue::from_static("Accept: application/sparql-results+xml"));
-
Self {
endpoint: String::from(endpoint),
- http_client: HttpClient::new(Some(headers)),
+ http_client: build_default_http_client(None),
}
}
@@ -52,15 +43,86 @@
}
}
-pub fn parse_sparql_results (query_results: &str) -> SparqlResults {
- let results_reader = get_query_results_xml_reader(query_results.as_bytes());
+/* -------------------------------------------------------------
+ SPARQL client builder
+ - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - */
- SparqlResults::read(results_reader)
+pub struct ClientBuilder {
+ pub endpoint: String,
+ http_client: Option<HttpClient>,
+ user_agent: Option<String>,
+}
+
+impl ClientBuilder {
+ pub fn new (endpoint: &str) -> Self {
+ Self {
+ endpoint: endpoint.to_string(),
+ http_client: None,
+ user_agent: None,
+ }
+ }
+
+ pub fn with_http_client (mut self, client: HttpClient) -> Self {
+ self.http_client = Some(client);
+
+ self
+ }
+
+ pub fn with_user_agent (mut self, user_agent: &str) -> Self {
+ self.user_agent = Some(user_agent.to_string());
+
+ self
+ }
+
+ pub fn build (self) -> Result<Client, SparqlClientBuildError> {
+ if self.user_agent.is_some() && self.http_client.is_some() {
+ return Err(SparqlClientBuildError::UserAgentAndHttpClientAreExclusive);
+ }
+
+ let user_agent = self.user_agent.clone();
+ let http_client = self.http_client.unwrap_or(build_default_http_client(user_agent));
+
+ Ok(Client {
+ endpoint: self.endpoint,
+ http_client,
+ })
+ }
+}
+
+fn build_default_http_client (user_agent: Option<String>) -> HttpClient {
+ let user_agent = user_agent.unwrap_or(get_user_agent().to_string());
+
+ let mut headers = HeaderMap::new();
+ headers.insert("Accept", HeaderValue::from_static("Accept: application/sparql-results+xml"));
+ headers.insert("User-Agent", HeaderValue::from_str(&user_agent).unwrap());
+
+ HttpClient::new(Some(headers))
+}
+
+#[derive(Debug)]
+pub enum SparqlClientBuildError {
+ /// You cannot define both user agent and HTTP client, as the HTTP client
+ /// does not provide a way to inject new headers once built.
+ UserAgentAndHttpClientAreExclusive,
+}
+
+/* -------------------------------------------------------------
+ SPARQL query results
+ - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - */
+
+/// Represent results for a SPARQL query
+/// A query can return a collection of solutions or a boolean.
+pub enum SparqlResults {
+ /// Results for SELECT queries
+ Solutions(Vec<SparqlSolution>),
+
+ /// Results for INSERT DATA, UPDATE DATA, etc. queries
+ Boolean(bool),
}
impl SparqlResults {
pub fn read<T>(reader: QueryResultsReader<T>) -> Self
- where T: BufRead
+ where T: BufRead
{
match reader {
QueryResultsReader::Solutions(solutions) => {
@@ -78,8 +140,14 @@
}
}
+pub fn parse_sparql_results (query_results: &str) -> SparqlResults {
+ let results_reader = get_query_results_xml_reader(query_results.as_bytes());
+
+ SparqlResults::read(results_reader)
+}
+
fn get_query_results_xml_reader<T>(reader: T) -> QueryResultsReader<T>
- where T: BufRead
+where T: BufRead
{
QueryResultsParser::from_format(QueryResultsFormat::Xml)
.read_results(reader)
@@ -87,7 +155,7 @@
}
fn parse_sparql_solutions<T> (solutions: SolutionsReader<T>) -> Vec<SparqlSolution>
- where T: BufRead
+where T: BufRead
{
solutions
.map(|solution| {
@@ -124,6 +192,10 @@
}
}
+/* -------------------------------------------------------------
+ Helper methods
+ - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - */
+
pub fn is_term_empty(term: &Term) -> bool {
match term {
Term::NamedNode(node) => {
@@ -134,3 +206,21 @@
Term::Literal(_) => false,
}
}
+
+/* -------------------------------------------------------------
+ User agent
+
+ The USER_AGENT variable is computed at build time.
+ - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - */
+
+lazy_static! {
+ pub static ref USER_AGENT: String = format!(
+ "{}/{}",
+ env!("CARGO_PKG_NAME"), env!("CARGO_PKG_VERSION")
+ );
+}
+
+/// Gets the default user agent
+pub fn get_user_agent () -> &'static str {
+ &USER_AGENT
+}
File Metadata
Details
Attached
Mime Type
text/plain
Expires
Fri, Dec 20, 12:16 (21 h, 21 m)
Storage Engine
blob
Storage Format
Raw Data
Storage Handle
2308041
Default Alt Text
D3424.id8838.diff (13 KB)
Attached To
Mode
D3424: Extract SPARQL client
Attached
Detach File
Event Timeline
Log In to Comment