JSON Schemas

Query

JSON objects expected by the POST /search endpoint.

  1
  2
  3
  4
  5
  6
  7
  8
  9
 10
 11
 12
 13
 14
 15
 16
 17
 18
 19
 20
 21
 22
 23
 24
 25
 26
 27
 28
 29
 30
 31
 32
 33
 34
 35
 36
 37
 38
 39
 40
 41
 42
 43
 44
 45
 46
 47
 48
 49
 50
 51
 52
 53
 54
 55
 56
 57
 58
 59
 60
 61
 62
 63
 64
 65
 66
 67
 68
 69
 70
 71
 72
 73
 74
 75
 76
 77
 78
 79
 80
 81
 82
 83
 84
 85
 86
 87
 88
 89
 90
 91
 92
 93
 94
 95
 96
 97
 98
 99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
161
162
163
164
165
166
167
168
169
170
{
  "$schema": "http://json-schema.org/draft-07/schema#",
  "$id": "https://gitlab.com/datadrivendiscovery/datamart-api/query_input_schema.json",
  "title": "Query",
  "description": "JSON object that specifies queries for searching datasets in DataMart.",
  "type": "object",
  "definitions": {
    "temporal_variable": {
      "type": "object",
      "description": "Describes columns containing temporal information.",
      "properties": {
        "type": {
          "type": "string",
          "enum": [
            "temporal_variable"
          ]
        },
        "start": {
          "type": "string",
          "description": "Requested dates are more recent than this date."
        },
        "end": {
          "type": "string",
          "description": "Requested dates are older than this date."
        },
        "granularity": {
          "type": "string",
          "description": "Requested dates should match the requested granularity. For example, if 'day' is requested, the best match is a dataset with dates; however a dataset with hours is relevant too as hourly data can be aggregated into days.",
          "enum": [
            "year",
            "month",
            "day",
            "hour",
            "second"
          ]
        }
      },
      "required": [
        "type"
      ]
    },
    "geospatial_variable": {
      "type": "object",
      "description": "Describes columns containing geospatial entities.",
      "properties": {
        "type": {
          "type": "string",
          "enum": [
            "geospatial_variable"
          ]
        },
        "latitude1":{
          "type": "number",
          "description": "The latitude of the top left point."
        },
        "longitude1":{
          "type": "number",
          "description": "The longitude of the top left point."
        },
        "latitude2":{
          "type": "number",
          "description": "The latitude of the bottom right point."
        },
        "longitude2":{
          "type": "number",
          "description": "The longitude of the bottom right point."
        },
        "granularity": {
          "type": "string",
          "description": "The granularity of the entities contained in a bounding box.",
          "enum": [
            "country",
            "state",
            "city",
            "county",
            "postal_code"
          ]
        }
      },
      "required": [
        "type"
      ]
    },
    "tabular_variable": {
      "type": "object",
      "description": "Describe columns that a matching dataset should have in terms of columns of the supplied dataset.",
      "properties": {
        "type": {
          "type": "string",
          "enum": [
            "dataframe_variable"
          ]
        },
        "columns": {
          "type": "array",
          "description": "A set of indices that identifies a set of columns in the supplied dataset. When multiple indices are provided, the matching dataset should contain columns corresponding to each of the given columns.",
          "items": {
            "type": "integer"
          }
        },
        "relationship": {
          "type": "string",
          "description": "The relationship between a column in the supplied dataset and a column in a matching dataset. The default is 'contains'.",
          "enum": [
            "contains",
            "similar",
            "correlated",
            "anti-correlated",
            "mutually-informative",
            "mutually-uninformative"
          ]
        }
      },
      "required": [
        "type"
      ]
    },
    "named_entity_variable": {
      "type": "object",
      "description": "Describes a set of named entities that a matching dataset must contain.",
      "properties": {
        "type": {
          "type": "string",
          "enum": [
            "named_entity_variable"
          ]
        },
        "entities": {
          "type": "array",
          "description": "A set of entity names. A matching dataset should contain a column with the requested names. ",
          "items": {
              "type": "string"
          }
        }
      },
      "required": [
        "type"
      ]
    }
  },
  "properties": {
    "keywords": {
      "type": "array",
      "description": "Keywords that match a dataset. The keywords can be matched against the dataset title, dataset description, dataset column names, etc.",
      "items": {
        "type": "string"
      }
    },
    "variables": {
      "type": "array",
      "description": "Describes a set of features (variables) that a matching dataset must have. Datasets with more features will be ranked higher.",
      "items": {
        "oneOf": [
          {
            "$ref": "#/definitions/temporal_variable"
          },
          {
            "$ref": "#/definitions/geospatial_variable"
          },
          {
            "$ref": "#/definitions/tabular_variable"
          },
          {
            "$ref": "#/definitions/named_entity_variable"
          }
        ]
      }
    }
  }
}

Result schema

Description of a dataset, such as a search result. The POST /search endpoint returns an array of those. They are also what you give the datamart_materialize.download().

 1
 2
 3
 4
 5
 6
 7
 8
 9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
{
  "$schema": "http://json-schema.org/draft-07/schema#",
  "$id": "https://gitlab.com/datadrivendiscovery/datamart-api/query_result_schema.json",
  "title": "Result",
  "definitions": {
    "augmentation_unit": {
      "type": "array",
      "description": "An array of column identifiers that represents an unit for the augmentation. In the simplest case, we have a single identifier, and in more complex cases, we have multiple identifiers (e.g.: columns 'latitude' and 'longitude' could be combined to join and/or concatenate with column 'country')."
    }
  },
  "type": "object",
  "properties": {
    "id": {
      "type": "string",
      "description": "The dataset identifier provided by DataMart."
    },
    "score": {
      "type": "number",
      "description": "A non-negative number that represents the relevance of this dataset to query. Larger scores indicate better matches. Scores across different DataMart systems are not comparable."
    },
    "metadata": {
      "type": "object",
      "description": "The metadata associated with the dataset. Metadata from different DataMart systems can have different schemas.",
      "properties": {
        "name": {
          "type": "string"
        },
        "description": {
          "type": "string"
        }
      },
      "required": ["name"]
    },
    "augmentation": {
      "type": "object",
      "description": "The augmentation suggested by the DataMart system.",
      "properties": {
        "type": {
          "type": "string",
          "enum": [
            "join",
            "union",
            "none"
          ]
        },
        "left_columns": {
          "type": "array",
          "description": "The left-side columns for the augmentation, which correspond to the supplied dataset.",
          "items": {
            "$ref": "#/definitions/augmentation_unit"
          }
        },
        "right_columns": {
          "type": "array",
          "description": "The right-side columns for the augmentation, which correspond to the DataMart dataset.",
          "items": {
            "$ref": "#/definitions/augmentation_unit"
          }
        }
      }
    }
  },
  "required": ["id", "score", "metadata"]
}