Page MenuHomeDevCentral

D2745.id6964.diff
No OneTemporary

D2745.id6964.diff

diff --git a/Cargo.toml b/Cargo.toml
--- a/Cargo.toml
+++ b/Cargo.toml
@@ -2,4 +2,5 @@
members = [
"fantoir-datasource",
+ "opendatasoft-explore-api",
]
diff --git a/README.md b/README.md
--- a/README.md
+++ b/README.md
@@ -14,6 +14,12 @@
More information: [fantoir-datasource README](fantoir-datasource/README.md)
+### Opendatasoft Explore API client (opendatasoft-explore-api)
+
+The opendatasoft-explore-api crate allows to query the Opendatasoft Explore API from Rust code.
+
+This API software is for example used for data.economie.gouv.fr for open data.
+
## Repository structure
The repository is structured in subdirectories for components.
diff --git a/opendatasoft-explore-api/Cargo.toml b/opendatasoft-explore-api/Cargo.toml
new file mode 100644
--- /dev/null
+++ b/opendatasoft-explore-api/Cargo.toml
@@ -0,0 +1,17 @@
+[package]
+name = "opendatasoft-explore-api"
+version = "0.1.0"
+edition = "2021"
+
+# See more keys and their definitions at https://doc.rust-lang.org/cargo/reference/manifest.html
+
+[dependencies]
+bytes = "~1.3.0" # Keep in sync with reqwest
+chrono = { version = "~0.4", features = ["serde"] }
+reqwest = {version = "~0.11.13" }
+serde = "~1.0.152"
+serde_derive = "~1.0.152"
+serde_json = "~1.0.91"
+
+[dev-dependencies]
+tokio = { version = "~1.23.0", features = ["macros", "rt"] }
diff --git a/opendatasoft-explore-api/src/lib.rs b/opendatasoft-explore-api/src/lib.rs
new file mode 100644
--- /dev/null
+++ b/opendatasoft-explore-api/src/lib.rs
@@ -0,0 +1,4 @@
+pub mod schema;
+pub mod requests;
+
+pub use reqwest::Response as ApiHttpResponse;
diff --git a/opendatasoft-explore-api/src/requests.rs b/opendatasoft-explore-api/src/requests.rs
new file mode 100644
--- /dev/null
+++ b/opendatasoft-explore-api/src/requests.rs
@@ -0,0 +1,267 @@
+use reqwest::Client;
+use serde::Deserialize;
+use crate::ApiHttpResponse;
+
+use crate::schema::*;
+
+static USER_AGENT: &str = concat!(
+ env!("CARGO_PKG_NAME"),
+ "/",
+ env!("CARGO_PKG_VERSION"),
+);
+
+pub struct ExploreApiEndPoint {
+ /// The Opendatasoft Explore API v2 server to use
+ pub url: String,
+
+ client: Client,
+}
+
+impl ExploreApiEndPoint {
+ pub fn new (url: &str) -> Self {
+ Self {
+ url: url.to_string(),
+ client: Client::builder()
+ .user_agent(USER_AGENT)
+ .build()
+ .expect("A HTTP client should be built"),
+ }
+ }
+
+ /* -------------------------------------------------------------
+ Part 1 - catalog
+
+ API to enumerate datasets
+ - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - */
+
+ /// Query /catalog/datasets
+ pub async fn get_datasets(&self) -> DatasetsCollection {
+ let url = self.get_url("/catalog/datasets");
+
+ self.fetch(url).await
+ }
+
+ /// Query /catalog/exports/{format}
+ ///
+ /// As the method returns the raw HTTP response, you can use
+ /// the chunk() method to get the next part of the export,
+ /// as a Bytes object from bytes crate.
+ ///
+ /// Example:
+ /// ```
+ /// use opendatasoft_explore_api::requests::ExploreApiEndPoint;
+ ///
+ /// async fn print_catalog_rdf (api: ExploreApiEndPoint) {
+ /// let mut response = api.export_datasets_catalog("rdf").await;
+ ///
+ /// while let Some(chunk) = response.chunk().await.unwrap() {
+ /// let bytes = chunk.to_vec(); // Vec<u8>
+ /// let text = String::from_utf8(bytes).expect("Not a valid UTF-8 bytes sequence");
+ ///
+ /// print!("{}", text.unwrap());
+ /// }
+ /// println!();
+ /// }
+ /// ```
+ pub async fn export_datasets_catalog(&self, format: &str) -> ApiHttpResponse {
+ let url = self
+ .get_url("/catalog/exports/?")
+ .replace("?", format);
+
+ self.fetch_resource(url).await
+ }
+
+ /// Query /catalog/facets
+ pub async fn get_facets(&self) -> FacetsCollection {
+ let url = self.get_url("/catalog/facets");
+
+ self.fetch(url).await
+ }
+
+ /* -------------------------------------------------------------
+ Part 2 - datasets
+
+ API to work on records
+ - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - */
+
+ /// Query /catalog/datasets/{dataset_id}/records
+ pub async fn get_dataset_records(&self, dataset_id: &str) -> Results {
+ let url = self
+ .get_url("/catalog/datasets/?/records")
+ .replace("?", dataset_id);
+
+ self.fetch(url).await
+ }
+
+ /// Query /catalog/datasets/{dataset_id}/exports/{format}
+ pub async fn export_dataset(&self, dataset_id: &str, format: &str) -> ApiHttpResponse {
+ let url = self
+ .get_url("/catalog/datasets/:id/exports/:format")
+ .replace(":id", dataset_id)
+ .replace(":format", format);
+
+ self.fetch_resource(url).await
+ }
+
+ /// Query /catalog/datasets/{dataset_id}
+ pub async fn get_dataset_information(&self, dataset_id: &str) -> Dataset {
+ let mut url = self.get_url("/catalog/datasets/");
+ url.push_str(dataset_id);
+
+ self.fetch(url).await
+ }
+
+ /// Query /catalog/datasets/{dataset_id}/facets
+ pub async fn get_dataset_facets(&self, dataset_id: &str) -> FacetsCollection {
+ let url = self
+ .get_url("/catalog/datasets/?/facets")
+ .replace("?", dataset_id);
+
+ self.fetch(url).await
+ }
+
+ /// Query /catalog/datasets/{dataset_id}/attachments
+ pub async fn get_dataset_attachments(&self, dataset_id: &str) -> AttachmentCollection {
+ let url = self
+ .get_url("/catalog/datasets/?/attachments")
+ .replace("?", dataset_id);
+
+ self.fetch(url).await
+ }
+
+ /// Query /catalog/datasets/{dataset_id}/records/{record_id}
+ pub async fn get_dataset_record(&self, dataset_id: &str, record_id: &str) -> Record {
+ let url = self
+ .get_url("/catalog/datasets/:id/records/:record")
+ .replace(":id", dataset_id)
+ .replace(":record", record_id);
+
+ self.fetch(url).await
+ }
+
+ /* -------------------------------------------------------------
+ Helper methods
+ - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - */
+
+ fn get_url (&self, method: &str) -> String {
+ format!("{}{}", self.url, method)
+ }
+
+ async fn fetch_resource (&self, url: String) -> ApiHttpResponse {
+ self.client.get(url)
+ .send().await
+ .expect("Can't fetch API URL")
+ }
+
+ async fn fetch<T> (&self, url: String) -> T where for<'a> T: Deserialize<'a> {
+ let body = self.fetch_resource(url).await
+ .text().await
+ .expect("Can't get HTTP response content");
+
+ serde_json::from_str(&body)
+ .expect("HTTP response should be a valid dataset, can't parse it.")
+ }
+}
+
+#[cfg(test)]
+mod tests {
+ use super::*;
+
+ static TEST_URL: &'static str = "https://data.economie.gouv.fr/api/v2";
+ static TEST_DATASET_ID: &'static str = "fichier-fantoir-des-voies-et-lieux-dits";
+
+ static TEST_DATASET_WITH_RECORDS_ID: &'static str = "controle_techn";
+ static TEST_RECORD_ID: &'static str = "eb04cba18e872814448a7fda829f3f1918cfae0b";
+
+ #[test]
+ fn test_get_url () {
+ let endpoint = ExploreApiEndPoint::new("https://foo");
+ assert_eq!("https://foo/bar", endpoint.get_url("/bar"));
+ assert_eq!("https://foo", endpoint.get_url(""));
+ }
+
+ #[tokio::test]
+ async fn test_get_datasets () {
+ let endpoint = ExploreApiEndPoint::new(TEST_URL);
+ let catalog = endpoint.get_datasets().await;
+
+ assert!(catalog.links[0].href.starts_with(TEST_URL));
+ }
+
+ #[tokio::test]
+ async fn test_get_facets () {
+ let endpoint = ExploreApiEndPoint::new(TEST_URL);
+ let facets = endpoint.get_facets().await;
+
+ assert!(facets.links[0].href.starts_with(TEST_URL));
+ }
+
+ #[tokio::test]
+ async fn test_get_dataset_information () {
+ let dataset = ExploreApiEndPoint::new(TEST_URL)
+ .get_dataset_information(TEST_DATASET_ID)
+ .await;
+
+ assert_eq!(TEST_DATASET_ID, dataset.dataset.dataset_id);
+ }
+
+ #[tokio::test]
+ async fn test_get_dataset_attachments () {
+ let attachments = ExploreApiEndPoint::new(TEST_URL)
+ .get_dataset_attachments(TEST_DATASET_ID)
+ .await;
+
+ assert!(attachments.attachments[0].metas.url.starts_with("odsfile://"));
+ }
+
+ #[tokio::test]
+ async fn test_get_dataset_facets () {
+ let facets = ExploreApiEndPoint::new(TEST_URL)
+ .get_dataset_facets(TEST_DATASET_ID)
+ .await;
+
+ assert!(facets.links[0].href.starts_with(TEST_URL));
+ }
+
+ #[tokio::test]
+ async fn test_export_datasets_catalog () {
+ let mut response = ExploreApiEndPoint::new(TEST_URL)
+ .export_datasets_catalog("rdf")
+ .await;
+
+ let mut rdf_about_found = false;
+ while let Some(chunk) = response.chunk().await.unwrap() {
+ let part = String::from_utf8(chunk.to_vec()).unwrap();
+ if part.contains("rdf:about") {
+ rdf_about_found = true;
+ break;
+ }
+ }
+
+ assert!(rdf_about_found);
+ }
+
+ #[tokio::test]
+ async fn test_get_dataset_records () {
+ let results = ExploreApiEndPoint::new(TEST_URL)
+ .get_dataset_records(TEST_DATASET_WITH_RECORDS_ID)
+ .await;
+
+ let link = match &results.records[0] {
+ ResultsRecord::Aggregation(_) => unreachable!(),
+ ResultsRecord::Record(record) => &record.links[0],
+ };
+
+ assert!(link.href.starts_with(TEST_URL));
+ assert!(link.href.contains(TEST_DATASET_WITH_RECORDS_ID));
+ }
+
+ #[tokio::test]
+ async fn test_get_dataset_record () {
+ let record = ExploreApiEndPoint::new(TEST_URL)
+ .get_dataset_record(TEST_DATASET_WITH_RECORDS_ID, TEST_RECORD_ID)
+ .await;
+
+ assert_eq!(TEST_RECORD_ID, record.record.id);
+ }
+}
diff --git a/opendatasoft-explore-api/src/schema.rs b/opendatasoft-explore-api/src/schema.rs
new file mode 100644
--- /dev/null
+++ b/opendatasoft-explore-api/src/schema.rs
@@ -0,0 +1,170 @@
+//! Schema for Opendatasoft Explore API v2
+
+use chrono::{DateTime, Utc};
+use serde_derive::Deserialize;
+use serde_derive::Serialize;
+use serde_json::Value as JsonValue;
+
+/* -------------------------------------------------------------
+ links
+ - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - */
+
+#[derive(Clone, Debug, PartialEq, Serialize, Deserialize)]
+pub struct Link {
+ pub href: String,
+ pub rel: String,
+}
+
+/* -------------------------------------------------------------
+ dataset
+ - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - */
+
+#[derive(Clone, Debug, PartialEq, Default, Serialize, Deserialize)]
+pub struct Dataset {
+ pub links: Vec<Link>,
+ pub dataset: DatasetProperties,
+}
+
+#[derive(Clone, Debug, PartialEq, Default, Serialize, Deserialize)]
+pub struct DatasetProperties {
+ pub dataset_id: String,
+ pub dataset_uid: String,
+ pub attachments: Vec<AttachmentProperties>,
+ pub has_records: bool,
+ pub data_visible: bool,
+ /// A map of available features for a dataset, with the fields they apply to.
+ pub features: Vec<String>,
+ pub metas: JsonValue,
+ pub fields: Vec<DatasetField>,
+ #[serde(rename = "additionalProperties", default, skip_serializing_if = "Option::is_none")]
+ pub additional_properties: Option<JsonValue>,
+}
+
+#[derive(Clone, Debug, PartialEq, Default, Serialize, Deserialize)]
+pub struct DatasetField {
+ pub name: String,
+ pub label: String,
+ #[serde(rename = "type")]
+ pub field_type: String,
+ pub annotations: JsonValue,
+ #[serde(rename = "description", skip_serializing_if = "Option::is_none")]
+ pub description: Option<String>,
+}
+
+/* -------------------------------------------------------------
+ results_dataset
+ - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - */
+
+#[derive(Clone, Debug, PartialEq, Default, Serialize, Deserialize)]
+pub struct DatasetsCollection {
+ pub total_count: usize,
+ pub links: Vec<Link>,
+ pub datasets: Vec<Dataset>,
+}
+
+/* -------------------------------------------------------------
+ facet_value_enumeration
+ - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - */
+
+#[derive(Clone, Debug, PartialEq, Default, Serialize, Deserialize)]
+pub struct FacetValueEnumeration {
+ pub name: String,
+ pub count: usize,
+ pub value: String,
+ pub state: String,
+}
+
+/* -------------------------------------------------------------
+ facet_enumeration
+ - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - */
+
+#[derive(Clone, Debug, PartialEq, Default, Serialize, Deserialize)]
+pub struct FacetEnumeration {
+ pub name: String,
+ pub facets: Vec<FacetValueEnumeration>,
+}
+
+/* -------------------------------------------------------------
+ aggregation
+ - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - */
+
+#[derive(Clone, Debug, PartialEq, Default, Serialize, Deserialize)]
+pub struct Aggregation {
+ pub count: usize,
+ pub cou_name_en: String,
+}
+
+/* -------------------------------------------------------------
+ record
+ - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - */
+
+#[derive(Clone, Debug, PartialEq, Default, Serialize, Deserialize)]
+pub struct Record {
+ pub record: RecordProperties,
+ pub links: Vec<Link>,
+}
+
+#[derive(Clone, Debug, PartialEq, Default, Serialize, Deserialize)]
+pub struct RecordProperties {
+ pub id: String,
+ pub timestamp: DateTime<Utc>,
+ pub size: usize,
+ pub fields: JsonValue,
+}
+
+/* -------------------------------------------------------------
+ results
+ - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - */
+
+#[derive(Clone, Debug, Default, Serialize, Deserialize)]
+pub struct Results {
+ pub total_count: usize,
+ pub links: Vec<Link>,
+ pub records: Vec<ResultsRecord>,
+}
+
+#[derive(Clone, Debug, Serialize, Deserialize)]
+#[serde(untagged)]
+pub enum ResultsRecord {
+ Aggregation(Aggregation),
+ Record(Record),
+}
+
+/* -------------------------------------------------------------
+ attachment
+ - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - */
+
+#[derive(Clone, Debug, PartialEq, Default, Serialize, Deserialize)]
+pub struct Attachment {
+ pub href: String,
+ pub metas: AttachmentProperties,
+}
+
+#[derive(Clone, Debug, PartialEq, Default, Serialize, Deserialize)]
+pub struct AttachmentProperties {
+ #[serde(rename = "mime-type", default, skip_serializing_if = "Option::is_none")]
+ pub mime_type: Option<String>,
+ pub title: String,
+ pub url: String,
+ pub id: String,
+}
+
+/* -------------------------------------------------------------
+ Response to /catalog/datasets/{dataset_id}/attachments
+ - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - */
+
+#[derive(Clone, Debug, PartialEq, Default, Serialize, Deserialize)]
+pub struct AttachmentCollection {
+ pub links: Vec<Link>,
+ pub attachments: Vec<Attachment>,
+}
+
+/* -------------------------------------------------------------
+ Response to /catalog/facets
+ - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - */
+
+#[derive(Clone, Debug, PartialEq, Default, Serialize, Deserialize)]
+pub struct FacetsCollection {
+ pub links: Vec<Link>,
+ pub facets: Vec<FacetEnumeration>,
+}

File Metadata

Mime Type
text/plain
Expires
Tue, Jun 17, 02:32 (1 h, 2 m)
Storage Engine
blob
Storage Format
Raw Data
Storage Handle
2739524
Default Alt Text
D2745.id6964.diff (15 KB)

Event Timeline