Batch predictions
Use the endpoints described below to generate predictions for a project.
GET /api/v2/batchJobs/
Get a collection of batch jobs by statuses
Parameters
Name |
In |
Type |
Required |
Description |
offset |
query |
integer |
true |
This many results will be skipped |
limit |
query |
integer |
true |
At most this many results are returned |
status |
query |
any |
false |
Includes only jobs that have the status value that matches this flag. Repeat the parameter for filtering on multiple statuses. |
source |
query |
any |
false |
Includes only jobs that have the source value that matches this flag. Repeat the parameter for filtering on multiple statuses.Prefix values with a dash (- ) to exclude those sources. |
deploymentId |
query |
string |
false |
Includes only jobs for this particular deployment |
modelId |
query |
string |
false |
ID of leaderboard model which is used in job for processing predictions dataset |
jobId |
query |
string |
false |
Includes only job by specific id |
orderBy |
query |
string |
false |
Sort order which will be applied to batch prediction list. Prefix the attribute name with a dash to sort in descending order, e.g. "-created". |
allJobs |
query |
boolean |
false |
[DEPRECATED - replaced with RBAC permission model] - No effect |
cutoffHours |
query |
integer |
false |
Only list jobs created at most this amount of hours ago. |
startDateTime |
query |
string(date-time) |
false |
ISO-formatted datetime of the earliest time the job was added (inclusive). For example "2008-08-24T12:00:00Z". Will ignore cutoffHours if set. |
endDateTime |
query |
string(date-time) |
false |
ISO-formatted datetime of the latest time the job was added (inclusive). For example "2008-08-24T12:00:00Z". |
batchPredictionJobDefinitionId |
query |
string |
false |
Includes only jobs for this particular definition |
hostname |
query |
any |
false |
Includes only jobs for this particular prediction instance hostname |
batchJobType |
query |
any |
false |
Includes only jobs that have the batch job type that matches this flag. Repeat the parameter for filtering on multiple types. |
intakeType |
query |
any |
false |
Includes only jobs for these particular intakes type |
outputType |
query |
any |
false |
Includes only jobs for these particular outputs type |
Enumerated Values
Parameter |
Value |
orderBy |
[created , -created , status , -status ] |
Example responses
200 Response
{
"properties": {
"count": {
"description": "Number of items returned on this page.",
"type": "integer"
},
"data": {
"description": "An array of jobs",
"items": {
"properties": {
"batchMonitoringJobDefinition": {
"description": "The Batch Prediction Job Definition linking to this job, if any.",
"properties": {
"createdBy": {
"description": "The ID of creator of this job definition",
"type": "string"
},
"id": {
"description": "The ID of the Batch Prediction job definition",
"type": "string"
},
"name": {
"description": "A human-readable name for the definition, must be unique across organisations",
"type": "string"
}
},
"required": [
"createdBy",
"id",
"name"
],
"type": "object",
"x-versionadded": "v2.35"
},
"batchPredictionJobDefinition": {
"description": "The Batch Prediction Job Definition linking to this job, if any.",
"properties": {
"createdBy": {
"description": "The ID of creator of this job definition",
"type": "string"
},
"id": {
"description": "The ID of the Batch Prediction job definition",
"type": "string"
},
"name": {
"description": "A human-readable name for the definition, must be unique across organisations",
"type": "string"
}
},
"required": [
"createdBy",
"id",
"name"
],
"type": "object",
"x-versionadded": "v2.35"
},
"created": {
"description": "When was this job created",
"format": "date-time",
"type": "string",
"x-versionadded": "v2.30"
},
"createdBy": {
"description": "Who created this job",
"properties": {
"fullName": {
"description": "The full name of the user who created this job (if defined by the user)",
"type": [
"string",
"null"
]
},
"userId": {
"description": "The User ID of the user who created this job",
"type": "string"
},
"username": {
"description": "The username (e-mail address) of the user who created this job",
"type": "string"
}
},
"required": [
"fullName",
"userId",
"username"
],
"type": "object"
},
"elapsedTimeSec": {
"description": "Number of seconds the job has been processing for",
"minimum": 0,
"type": "integer"
},
"failedRows": {
"description": "Number of rows that have failed scoring",
"minimum": 0,
"type": "integer"
},
"hidden": {
"description": "When was this job was hidden last, blank if visible",
"format": "date-time",
"type": "string",
"x-versionadded": "v2.30"
},
"id": {
"description": "The ID of the Batch job",
"type": "string",
"x-versionadded": "v2.30"
},
"intakeDatasetDisplayName": {
"description": "If applicable (e.g. for AI catalog), will contain the dataset name used for the intake dataset.",
"type": [
"string",
"null"
],
"x-versionadded": "v2.30"
},
"jobIntakeSize": {
"description": "Number of bytes in the intake dataset for this job",
"minimum": 0,
"type": [
"integer",
"null"
]
},
"jobOutputSize": {
"description": "Number of bytes in the output dataset for this job",
"minimum": 0,
"type": [
"integer",
"null"
]
},
"jobSpec": {
"description": "The job configuration used to create this job",
"properties": {
"abortOnError": {
"default": true,
"description": "Should this job abort if too many errors are encountered",
"type": "boolean"
},
"batchJobType": {
"description": "Batch job type.",
"enum": [
"monitoring",
"prediction"
],
"type": "string",
"x-versionadded": "v2.30"
},
"chunkSize": {
"default": "auto",
"description": "Which strategy should be used to determine the chunk size. Can be either a named strategy or a fixed size in bytes.",
"oneOf": [
{
"enum": [
"auto",
"fixed",
"dynamic"
],
"type": "string"
},
{
"maximum": 41943040,
"minimum": 20,
"type": "integer"
}
]
},
"columnNamesRemapping": {
"description": "Remap (rename or remove columns from) the output from this job",
"oneOf": [
{
"description": "Provide a dictionary with key/value pairs to remap (deprecated)",
"type": "object"
},
{
"description": "Provide a list of items to remap",
"items": {
"properties": {
"inputName": {
"description": "Rename column with this name",
"type": "string"
},
"outputName": {
"description": "Rename column to this name (leave as null to remove from the output)",
"type": [
"string",
"null"
]
}
},
"required": [
"inputName",
"outputName"
],
"type": "object"
},
"maxItems": 1000,
"type": "array"
}
]
},
"csvSettings": {
"description": "The CSV settings used for this job",
"properties": {
"delimiter": {
"default": ",",
"description": "CSV fields are delimited by this character. Use the string \"tab\" to denote TSV (TAB separated values).",
"oneOf": [
{
"enum": [
"tab"
],
"type": "string"
},
{
"maxLength": 1,
"minLength": 1,
"type": "string"
}
]
},
"encoding": {
"default": "utf-8",
"description": "The encoding to be used for intake and output. For example (but not limited to): \"shift_jis\", \"latin_1\" or \"mskanji\".",
"type": "string"
},
"quotechar": {
"default": "\"",
"description": "Fields containing the delimiter or newlines must be quoted using this character.",
"maxLength": 1,
"minLength": 1,
"type": "string"
}
},
"required": [
"delimiter",
"encoding",
"quotechar"
],
"type": "object"
},
"deploymentId": {
"description": "ID of deployment which is used in job for processing predictions dataset",
"type": "string"
},
"disableRowLevelErrorHandling": {
"default": false,
"description": "Skip row by row error handling",
"type": "boolean"
},
"explanationAlgorithm": {
"description": "Which algorithm will be used to calculate prediction explanations",
"enum": [
"shap",
"xemp"
],
"type": "string"
},
"explanationClassNames": {
"description": "List of class names that will be explained for each row for multiclass. Mutually exclusive with explanationNumTopClasses. If neither specified - we assume explanationNumTopClasses=1",
"items": {
"description": "Class name to explain",
"type": "string"
},
"maxItems": 10,
"minItems": 1,
"type": "array",
"x-versionadded": "v2.30"
},
"explanationNumTopClasses": {
"description": "Number of top predicted classes for each row that will be explained for multiclass. Mutually exclusive with explanationClassNames. If neither specified - we assume explanationNumTopClasses=1",
"maximum": 10,
"minimum": 1,
"type": "integer",
"x-versionadded": "v2.30"
},
"includePredictionStatus": {
"default": false,
"description": "Include prediction status column in the output",
"type": "boolean"
},
"includeProbabilities": {
"default": true,
"description": "Include probabilities for all classes",
"type": "boolean"
},
"includeProbabilitiesClasses": {
"default": [],
"description": "Include only probabilities for these specific class names.",
"items": {
"description": "Include probability for this class name",
"type": "string"
},
"maxItems": 100,
"type": "array"
},
"intakeSettings": {
"default": {
"type": "localFile"
},
"description": "The response option configured for this job",
"oneOf": [
{
"description": "Stream CSV data chunks from Azure",
"properties": {
"credentialId": {
"description": "Either the populated value of the field or [redacted] due to permission settings",
"oneOf": [
{
"description": "Use the specified credential to access the url",
"type": [
"string",
"null"
]
},
{
"enum": [
"[redacted]"
],
"type": "string"
}
]
},
"format": {
"default": "csv",
"description": "Type of input file format",
"enum": [
"csv",
"parquet"
],
"type": "string",
"x-versionadded": "v2.25"
},
"type": {
"description": "Type name for this intake type",
"enum": [
"azure"
],
"type": "string"
},
"url": {
"description": "URL for the CSV file",
"format": "url",
"type": "string"
}
},
"required": [
"type",
"url"
],
"type": "object"
},
{
"description": "Stream CSV data chunks from data stage storage",
"properties": {
"dataStageId": {
"description": "The ID of the data stage",
"type": "string"
},
"type": {
"description": "Type name for this intake type",
"enum": [
"dataStage"
],
"type": "string"
}
},
"required": [
"dataStageId",
"type"
],
"type": "object"
},
{
"description": "Stream CSV data chunks from AI catalog dataset",
"properties": {
"datasetId": {
"description": "Either the populated value of the field or [redacted] due to permission settings",
"oneOf": [
{
"description": "The ID of the AI catalog dataset",
"type": "string"
},
{
"enum": [
"[redacted]"
],
"type": "string"
}
]
},
"datasetVersionId": {
"description": "The ID of the AI catalog dataset version",
"type": "string"
},
"type": {
"description": "Type name for this intake type",
"enum": [
"dataset"
],
"type": "string"
}
},
"required": [
"datasetId",
"type"
],
"type": "object"
},
{
"description": "Stream CSV data chunks from Google Storage",
"properties": {
"credentialId": {
"description": "Either the populated value of the field or [redacted] due to permission settings",
"oneOf": [
{
"description": "Use the specified credential to access the url",
"type": [
"string",
"null"
]
},
{
"enum": [
"[redacted]"
],
"type": "string"
}
]
},
"format": {
"default": "csv",
"description": "Type of input file format",
"enum": [
"csv",
"parquet"
],
"type": "string",
"x-versionadded": "v2.25"
},
"type": {
"description": "Type name for this intake type",
"enum": [
"gcp"
],
"type": "string"
},
"url": {
"description": "URL for the CSV file",
"format": "url",
"type": "string"
}
},
"required": [
"type",
"url"
],
"type": "object"
},
{
"description": "Stream CSV data chunks from Big Query using GCS",
"properties": {
"bucket": {
"description": "The name of gcs bucket for data export",
"type": "string"
},
"credentialId": {
"description": "Either the populated value of the field or [redacted] due to permission settings",
"oneOf": [
{
"description": "The ID of the GCP credentials",
"type": "string"
},
{
"enum": [
"[redacted]"
],
"type": "string"
}
]
},
"dataset": {
"description": "The name of the specified big query dataset to read input data from",
"type": "string"
},
"table": {
"description": "The name of the specified big query table to read input data from",
"type": "string"
},
"type": {
"description": "Type name for this intake type",
"enum": [
"bigquery"
],
"type": "string"
}
},
"required": [
"bucket",
"credentialId",
"dataset",
"table",
"type"
],
"type": "object"
},
{
"description": "Stream CSV data chunks from Amazon Cloud Storage S3",
"properties": {
"credentialId": {
"description": "Either the populated value of the field or [redacted] due to permission settings",
"oneOf": [
{
"description": "Use the specified credential to access the url",
"type": [
"string",
"null"
]
},
{
"enum": [
"[redacted]"
],
"type": "string"
}
]
},
"endpointUrl": {
"description": "Endpoint URL for the S3 connection (omit to use the default)",
"format": "url",
"type": "string",
"x-versionadded": "v2.29"
},
"format": {
"default": "csv",
"description": "Type of input file format",
"enum": [
"csv",
"parquet"
],
"type": "string",
"x-versionadded": "v2.25"
},
"type": {
"description": "Type name for this intake type",
"enum": [
"s3"
],
"type": "string"
},
"url": {
"description": "URL for the CSV file",
"format": "url",
"type": "string"
}
},
"required": [
"type",
"url"
],
"type": "object"
},
{
"description": "Stream CSV data chunks from Snowflake",
"properties": {
"catalog": {
"description": "The name of the specified database catalog to read input data from.",
"type": "string",
"x-versionadded": "v2.28"
},
"cloudStorageCredentialId": {
"description": "Either the populated value of the field or [redacted] due to permission settings",
"oneOf": [
{
"description": "The ID of the credential holding information about a user with read access to the cloud storage.",
"type": [
"string",
"null"
]
},
{
"enum": [
"[redacted]"
],
"type": "string"
}
]
},
"cloudStorageType": {
"default": "s3",
"description": "Type name for cloud storage",
"enum": [
"azure",
"gcp",
"s3"
],
"type": "string"
},
"credentialId": {
"description": "Either the populated value of the field or [redacted] due to permission settings",
"oneOf": [
{
"description": "The ID of the credential holding information about a user with read access to the Snowflake data source.",
"type": [
"string",
"null"
]
},
{
"enum": [
"[redacted]"
],
"type": "string"
}
]
},
"dataStoreId": {
"description": "Either the populated value of the field or [redacted] due to permission settings",
"oneOf": [
{
"description": "ID of the data store to connect to",
"type": "string"
},
{
"enum": [
"[redacted]"
],
"type": "string"
}
]
},
"externalStage": {
"description": "External storage",
"type": "string"
},
"query": {
"description": "A self-supplied SELECT statement of the dataset you wish to score. Helpful for supplying a more fine-grained selection of data not achievable through specification of \"table\" and/or \"schema\" parameters exclusively.If this job is executed with a job definition, then template variables are available which will be substituted for timestamps: {{ current_run_timestamp }}, {{ last_completed_run_time }}, {{ last_scheduled_run_time }}, {{ next_scheduled_run_time }}, {{ current_run_time }}",
"type": "string"
},
"schema": {
"description": "The name of the specified database schema to read input data from.",
"type": "string"
},
"table": {
"description": "The name of the specified database table to read input data from.",
"type": "string"
},
"type": {
"description": "Type name for this intake type",
"enum": [
"snowflake"
],
"type": "string"
}
},
"required": [
"dataStoreId",
"externalStage",
"type"
],
"type": "object"
},
{
"description": "Stream CSV data chunks from Azure Synapse",
"properties": {
"cloudStorageCredentialId": {
"description": "Either the populated value of the field or [redacted] due to permission settings",
"oneOf": [
{
"description": "The ID of the Azure credential holding information about a user with read access to the cloud storage.",
"type": [
"string",
"null"
]
},
{
"enum": [
"[redacted]"
],
"type": "string"
}
]
},
"credentialId": {
"description": "Either the populated value of the field or [redacted] due to permission settings",
"oneOf": [
{
"description": "The ID of the credential holding information about a user with read access to the JDBC data source.",
"type": [
"string",
"null"
]
},
{
"enum": [
"[redacted]"
],
"type": "string"
}
]
},
"dataStoreId": {
"description": "Either the populated value of the field or [redacted] due to permission settings",
"oneOf": [
{
"description": "ID of the data store to connect to",
"type": "string"
},
{
"enum": [
"[redacted]"
],
"type": "string"
}
]
},
"externalDataSource": {
"description": "External datasource name",
"type": "string"
},
"query": {
"description": "A self-supplied SELECT statement of the dataset you wish to score. Helpful for supplying a more fine-grained selection of data not achievable through specification of \"table\" and/or \"schema\" parameters exclusively.If this job is executed with a job definition, then template variables are available which will be substituted for timestamps: {{ current_run_timestamp }}, {{ last_completed_run_time }}, {{ last_scheduled_run_time }}, {{ next_scheduled_run_time }}, {{ current_run_time }}",
"type": "string"
},
"schema": {
"description": "The name of the specified database schema to read input data from.",
"type": "string"
},
"table": {
"description": "The name of the specified database table to read input data from.",
"type": "string"
},
"type": {
"description": "Type name for this intake type",
"enum": [
"synapse"
],
"type": "string"
}
},
"required": [
"dataStoreId",
"externalDataSource",
"type"
],
"type": "object"
},
{
"description": "Stream CSV data chunks from DSS dataset",
"properties": {
"datasetId": {
"description": "Either the populated value of the field or [redacted] due to permission settings",
"oneOf": [
{
"description": "The ID of the dataset",
"type": "string"
},
{
"enum": [
"[redacted]"
],
"type": "string"
}
]
},
"partition": {
"default": null,
"description": "Partition used to predict",
"enum": [
"holdout",
"validation",
"allBacktests",
null
],
"type": [
"string",
"null"
]
},
"projectId": {
"description": "The ID of the project",
"type": "string"
},
"type": {
"description": "Type name for this intake type",
"enum": [
"dss"
],
"type": "string"
}
},
"required": [
"projectId",
"type"
],
"type": "object"
},
{
"properties": {
"path": {
"description": "Path to data on host filesystem",
"type": "string"
},
"type": {
"description": "Type name for this intake type",
"enum": [
"filesystem"
],
"type": "string"
}
},
"required": [
"path",
"type"
],
"type": "object"
},
{
"description": "Stream CSV data chunks from HTTP",
"properties": {
"type": {
"description": "Type name for this intake type",
"enum": [
"http"
],
"type": "string"
},
"url": {
"description": "URL for the CSV file",
"format": "url",
"type": "string"
}
},
"required": [
"type",
"url"
],
"type": "object"
},
{
"description": "Stream CSV data chunks from JDBC",
"properties": {
"catalog": {
"description": "The name of the specified database catalog to read input data from.",
"type": "string",
"x-versionadded": "v2.22"
},
"credentialId": {
"description": "Either the populated value of the field or [redacted] due to permission settings",
"oneOf": [
{
"description": "The ID of the credential holding information about a user with read access to the JDBC data source.",
"type": [
"string",
"null"
]
},
{
"enum": [
"[redacted]"
],
"type": "string"
}
]
},
"dataStoreId": {
"description": "Either the populated value of the field or [redacted] due to permission settings",
"oneOf": [
{
"description": "ID of the data store to connect to",
"type": "string"
},
{
"enum": [
"[redacted]"
],
"type": "string"
}
]
},
"fetchSize": {
"description": "A user specified fetch size. Changing it can be used to balance throughput and memory usage. Deprecated and ignored since v2.21.",
"maximum": 1000000,
"minimum": 1,
"type": "integer"
},
"query": {
"description": "A self-supplied SELECT statement of the dataset you wish to score. Helpful for supplying a more fine-grained selection of data not achievable through specification of \"table\" and/or \"schema\" parameters exclusively.If this job is executed with a job definition, then template variables are available which will be substituted for timestamps: {{ current_run_timestamp }}, {{ last_completed_run_time }}, {{ last_scheduled_run_time }}, {{ next_scheduled_run_time }}, {{ current_run_time }}",
"type": "string"
},
"schema": {
"description": "The name of the specified database schema to read input data from.",
"type": "string"
},
"table": {
"description": "The name of the specified database table to read input data from.",
"type": "string"
},
"type": {
"description": "Type name for this intake type",
"enum": [
"jdbc"
],
"type": "string"
}
},
"required": [
"dataStoreId",
"type"
],
"type": "object"
},
{
"description": "Stream CSV data chunks from local file storage",
"properties": {
"async": {
"description": "The default behavior (async: true) will still submit the job to the queue and start processing as soon as the upload is started.Setting it to false will postpone submitting the job to the queue until all data has been uploaded.This is helpful if the user is on a bad connection and bottlednecked by the upload speed. Instead of blocking the queue this will allow others to submit to the queue until the upload has finished.",
"type": [
"boolean",
"null"
],
"x-versionadded": "v2.28"
},
"multipart": {
"description": "specify if the data will be uploaded in multiple parts instead of a single file",
"type": "boolean",
"x-versionadded": "v2.27"
},
"type": {
"description": "Type name for this intake type",
"enum": [
"local_file",
"localFile"
],
"type": "string"
}
},
"required": [
"type"
],
"type": "object"
},
{
"description": "Stream CSV data chunks from Datasphere using browser-datasphere",
"properties": {
"catalog": {
"description": "The name of the specified database catalog to read input data from.",
"type": "string"
},
"credentialId": {
"description": "Either the populated value of the field or [redacted] due to permission settings",
"oneOf": [
{
"description": "The ID of the credential holding information about a user with read access to the data source.",
"type": "string"
},
{
"enum": [
"[redacted]"
],
"type": "string"
}
]
},
"dataStoreId": {
"description": "Either the populated value of the field or [redacted] due to permission settings",
"oneOf": [
{
"description": "ID of the data store to connect to",
"type": "string"
},
{
"enum": [
"[redacted]"
],
"type": "string"
}
]
},
"schema": {
"description": "The name of the specified database schema to read input data from.",
"type": "string"
},
"table": {
"description": "The name of the specified database table to read input data from.",
"type": "string"
},
"type": {
"description": "Type name for this intake type",
"enum": [
"datasphere"
],
"type": "string"
}
},
"required": [
"credentialId",
"dataStoreId",
"schema",
"table",
"type"
],
"type": "object",
"x-versionadded": "v2.35"
}
]
},
"maxExplanations": {
"default": 0,
"description": "Number of explanations requested. Will be ordered by strength.",
"maximum": 100,
"minimum": 0,
"type": "integer"
},
"maxNgramExplanations": {
"description": "The maximum number of text ngram explanations to supply per row of the dataset. The default recommended `maxNgramExplanations` is `all` (no limit)",
"oneOf": [
{
"minimum": 0,
"type": "integer"
},
{
"enum": [
"all"
],
"type": "string"
}
],
"x-versionadded": "v2.30"
},
"modelId": {
"description": "ID of leaderboard model which is used in job for processing predictions dataset",
"type": "string",
"x-versionadded": "v2.30"
},
"modelPackageId": {
"description": "ID of model package from registry is used in job for processing predictions dataset",
"type": "string",
"x-versionadded": "v2.30"
},
"monitoringAggregation": {
"description": "Defines the aggregation policy for monitoring jobs.",
"properties": {
"retentionPolicy": {
"default": "percentage",
"description": "Monitoring jobs retention policy for aggregation.",
"enum": [
"samples",
"percentage"
],
"type": "string"
},
"retentionValue": {
"default": 0,
"description": "Amount/percentage of samples to retain.",
"type": "integer"
}
},
"type": "object"
},
"monitoringBatchPrefix": {
"description": "Name of the batch to create with this job",
"type": [
"string",
"null"
]
},
"monitoringColumns": {
"description": "Column names mapping for monitoring",
"properties": {
"actedUponColumn": {
"description": "Name of column that contains value for acted_on.",
"type": "string"
},
"actualsTimestampColumn": {
"description": "Name of column that contains actual timestamps.",
"type": "string"
},
"actualsValueColumn": {
"description": "Name of column that contains actuals value.",
"type": "string"
},
"associationIdColumn": {
"description": "Name of column that contains association Id.",
"type": "string"
},
"customMetricId": {
"description": "Id of custom metric to process values for.",
"type": "string"
},
"customMetricTimestampColumn": {
"description": "Name of column that contains custom metric values timestamps.",
"type": "string"
},
"customMetricTimestampFormat": {
"description": "Format of timestamps from customMetricTimestampColumn.",
"type": "string"
},
"customMetricValueColumn": {
"description": "Name of column that contains values for custom metric.",
"type": "string"
},
"monitoredStatusColumn": {
"description": "Column name used to mark monitored rows.",
"type": "string"
},
"predictionsColumns": {
"description": "Name of the column(s) which contain prediction values.",
"oneOf": [
{
"description": "Map containing column name(s) and class name(s) for multiclass problem.",
"items": {
"properties": {
"className": {
"description": "Class name.",
"type": "string"
},
"columnName": {
"description": "Column name that contains the prediction for a specific class.",
"type": "string"
}
},
"required": [
"className",
"columnName"
],
"type": "object"
},
"maxItems": 100,
"type": "array"
},
{
"description": "Column name that contains the prediction for regressions problem.",
"type": "string"
}
]
},
"reportDrift": {
"description": "True to report drift, False otherwise.",
"type": "boolean"
},
"reportPredictions": {
"description": "True to report prediction, False otherwise.",
"type": "boolean"
},
"uniqueRowIdentifierColumns": {
"description": "Column(s) name of unique row identifiers.",
"items": {
"type": "string"
},
"maxItems": 100,
"type": "array"
}
},
"type": "object"
},
"monitoringOutputSettings": {
"description": "Output settings for monitoring jobs",
"properties": {
"monitoredStatusColumn": {
"description": "Column name used to mark monitored rows.",
"type": "string"
},
"uniqueRowIdentifierColumns": {
"description": "Column(s) name of unique row identifiers.",
"items": {
"type": "string"
},
"maxItems": 100,
"type": "array"
}
},
"required": [
"monitoredStatusColumn",
"uniqueRowIdentifierColumns"
],
"type": "object"
},
"numConcurrent": {
"description": "Number of simultaneous requests to run against the prediction instance",
"minimum": 1,
"type": "integer"
},
"outputSettings": {
"description": "The response option configured for this job",
"oneOf": [
{
"description": "Save CSV data chunks to Azure Blob Storage",
"properties": {
"credentialId": {
"description": "Either the populated value of the field or [redacted] due to permission settings",
"oneOf": [
{
"description": "Use the specified credential to access the url",
"type": [
"string",
"null"
]
},
{
"enum": [
"[redacted]"
],
"type": "string"
}
]
},
"format": {
"default": "csv",
"description": "Type of output file format",
"enum": [
"csv",
"parquet"
],
"type": "string",
"x-versionadded": "v2.25"
},
"partitionColumns": {
"description": "For Parquet directory-scoring only. The column names of the intake data of which to partition the dataset. Columns are partitioned in the order they are given. At least one value is required if scoring to a directory (meaning the output url ends with a slash (\"/\").",
"items": {
"type": "string"
},
"maxItems": 100,
"type": "array",
"x-versionadded": "v2.26"
},
"type": {
"description": "Type name for this output type",
"enum": [
"azure"
],
"type": "string"
},
"url": {
"description": "URL for the file or directory",
"format": "url",
"type": "string"
}
},
"required": [
"type",
"url"
],
"type": "object"
},
{
"description": "Save CSV data chunks to Google Storage",
"properties": {
"credentialId": {
"description": "Either the populated value of the field or [redacted] due to permission settings",
"oneOf": [
{
"description": "Use the specified credential to access the url",
"type": [
"string",
"null"
]
},
{
"enum": [
"[redacted]"
],
"type": "string"
}
]
},
"format": {
"default": "csv",
"description": "Type of input file format",
"enum": [
"csv",
"parquet"
],
"type": "string",
"x-versionadded": "v2.25"
},
"partitionColumns": {
"description": "For Parquet directory-scoring only. The column names of the intake data of which to partition the dataset. Columns are partitioned in the order they are given. At least one value is required if scoring to a directory (meaning the output url ends with a slash (\"/\").",
"items": {
"type": "string"
},
"maxItems": 100,
"type": "array",
"x-versionadded": "v2.26"
},
"type": {
"description": "Type name for this output type",
"enum": [
"gcp"
],
"type": "string"
},
"url": {
"description": "URL for the CSV file",
"format": "url",
"type": "string"
}
},
"required": [
"type",
"url"
],
"type": "object"
},
{
"description": "Save CSV data chunks to Google BigQuery in bulk",
"properties": {
"bucket": {
"description": "The name of gcs bucket for data loading",
"type": "string"
},
"credentialId": {
"description": "Either the populated value of the field or [redacted] due to permission settings",
"oneOf": [
{
"description": "The ID of the GCP credentials",
"type": "string"
},
{
"enum": [
"[redacted]"
],
"type": "string"
}
]
},
"dataset": {
"description": "The name of the specified big query dataset to write data back",
"type": "string"
},
"table": {
"description": "The name of the specified big query table to write data back",
"type": "string"
},
"type": {
"description": "Type name for this output type",
"enum": [
"bigquery"
],
"type": "string"
}
},
"required": [
"bucket",
"credentialId",
"dataset",
"table",
"type"
],
"type": "object"
},
{
"description": "Saves CSV data chunks to Amazon Cloud Storage S3",
"properties": {
"credentialId": {
"description": "Either the populated value of the field or [redacted] due to permission settings",
"oneOf": [
{
"description": "Use the specified credential to access the url",
"type": [
"string",
"null"
]
},
{
"enum": [
"[redacted]"
],
"type": "string"
}
]
},
"endpointUrl": {
"description": "Endpoint URL for the S3 connection (omit to use the default)",
"format": "url",
"type": "string",
"x-versionadded": "v2.29"
},
"format": {
"default": "csv",
"description": "Type of output file format",
"enum": [
"csv",
"parquet"
],
"type": "string",
"x-versionadded": "v2.25"
},
"partitionColumns": {
"description": "For Parquet directory-scoring only. The column names of the intake data of which to partition the dataset. Columns are partitioned in the order they are given. At least one value is required if scoring to a directory (meaning the output url ends with a slash (\"/\").",
"items": {
"type": "string"
},
"maxItems": 100,
"type": "array",
"x-versionadded": "v2.26"
},
"serverSideEncryption": {
"description": "Configure Server-Side Encryption for S3 output",
"properties": {
"algorithm": {
"description": "The server-side encryption algorithm used when storing this object in Amazon S3 (for example, AES256, aws:kms).",
"type": "string"
},
"customerAlgorithm": {
"description": "Specifies the algorithm to use to when encrypting the object (for example, AES256).",
"type": "string"
},
"customerKey": {
"description": "Specifies the customer-provided encryption key for Amazon S3 to use in encrypting data. This value is used to store the object and then it is discarded; Amazon S3 does not store the encryption key. The key must be appropriate for use with the algorithm specified in customerAlgorithm. The key must be sent as an base64 encoded string.",
"type": "string"
},
"kmsEncryptionContext": {
"description": "Specifies the Amazon Web Services KMS Encryption Context to use for object encryption. The value of this header is a base64-encoded UTF-8 string holding JSON with the encryption context key-value pairs.",
"type": "string"
},
"kmsKeyId": {
"description": "Specifies the ID of the symmetric customer managed key to use for object encryption.",
"type": "string"
}
},
"type": "object"
},
"type": {
"description": "Type name for this output type",
"enum": [
"s3"
],
"type": "string"
},
"url": {
"description": "URL for the CSV file",
"format": "url",
"type": "string"
}
},
"required": [
"type",
"url"
],
"type": "object"
},
{
"description": "Save CSV data chunks to Snowflake in bulk",
"properties": {
"catalog": {
"description": "The name of the specified database catalog to write output data to.",
"type": "string",
"x-versionadded": "v2.28"
},
"cloudStorageCredentialId": {
"description": "Either the populated value of the field or [redacted] due to permission settings",
"oneOf": [
{
"description": "The ID of the credential holding information about a user with write access to the cloud storage.",
"type": [
"string",
"null"
]
},
{
"enum": [
"[redacted]"
],
"type": "string"
}
]
},
"cloudStorageType": {
"default": "s3",
"description": "Type name for cloud storage",
"enum": [
"azure",
"gcp",
"s3"
],
"type": "string"
},
"createTableIfNotExists": {
"default": false,
"description": "Attempt to create the table first if no existing one is detected, before writing data with the strategy defined in the `statementType` parameter.",
"type": "boolean",
"x-versionadded": "v2.25"
},
"credentialId": {
"description": "Either the populated value of the field or [redacted] due to permission settings",
"oneOf": [
{
"description": "The ID of the credential holding information about a user with write access to the Snowflake data source.",
"type": [
"string",
"null"
]
},
{
"enum": [
"[redacted]"
],
"type": "string"
}
]
},
"dataStoreId": {
"description": "Either the populated value of the field or [redacted] due to permission settings",
"oneOf": [
{
"description": "ID of the data store to connect to",
"type": "string"
},
{
"enum": [
"[redacted]"
],
"type": "string"
}
]
},
"externalStage": {
"description": "External storage",
"type": "string"
},
"schema": {
"description": "The name of the specified database schema to write results to.",
"type": "string"
},
"statementType": {
"description": "The statement type to use when writing the results.",
"enum": [
"insert",
"create_table",
"createTable"
],
"type": "string"
},
"table": {
"description": "The name of the specified database table to write results to.",
"type": "string"
},
"type": {
"description": "Type name for this output type",
"enum": [
"snowflake"
],
"type": "string"
}
},
"required": [
"dataStoreId",
"externalStage",
"statementType",
"table",
"type"
],
"type": "object"
},
{
"description": "Save CSV data chunks to Azure Synapse in bulk",
"properties": {
"cloudStorageCredentialId": {
"description": "Either the populated value of the field or [redacted] due to permission settings",
"oneOf": [
{
"description": "The ID of the credential holding information about a user with write access to the cloud storage.",
"type": [
"string",
"null"
]
},
{
"enum": [
"[redacted]"
],
"type": "string"
}
]
},
"createTableIfNotExists": {
"default": false,
"description": "Attempt to create the table first if no existing one is detected, before writing data with the strategy defined in the `statementType` parameter.",
"type": "boolean",
"x-versionadded": "v2.25"
},
"credentialId": {
"description": "Either the populated value of the field or [redacted] due to permission settings",
"oneOf": [
{
"description": "The ID of the credential holding information about a user with write access to the JDBC data source.",
"type": [
"string",
"null"
]
},
{
"enum": [
"[redacted]"
],
"type": "string"
}
]
},
"dataStoreId": {
"description": "Either the populated value of the field or [redacted] due to permission settings",
"oneOf": [
{
"description": "ID of the data store to connect to",
"type": "string"
},
{
"enum": [
"[redacted]"
],
"type": "string"
}
]
},
"externalDataSource": {
"description": "External data source name",
"type": "string"
},
"schema": {
"description": "The name of the specified database schema to write results to.",
"type": "string"
},
"statementType": {
"description": "The statement type to use when writing the results.",
"enum": [
"insert",
"create_table",
"createTable"
],
"type": "string"
},
"table": {
"description": "The name of the specified database table to write results to.",
"type": "string"
},
"type": {
"description": "Type name for this output type",
"enum": [
"synapse"
],
"type": "string"
}
},
"required": [
"dataStoreId",
"externalDataSource",
"statementType",
"table",
"type"
],
"type": "object"
},
{
"properties": {
"path": {
"description": "Path to results on host filesystem",
"type": "string"
},
"type": {
"description": "Type name for this output type",
"enum": [
"filesystem"
],
"type": "string"
}
},
"required": [
"path",
"type"
],
"type": "object"
},
{
"description": "Save CSV data chunks to HTTP data endpoint",
"properties": {
"headers": {
"description": "Extra headers to send with the request",
"type": "object"
},
"method": {
"description": "Method to use when saving the CSV file",
"enum": [
"POST",
"PUT"
],
"type": "string"
},
"type": {
"description": "Type name for this output type",
"enum": [
"http"
],
"type": "string"
},
"url": {
"description": "URL for the CSV file",
"format": "url",
"type": "string"
}
},
"required": [
"method",
"type",
"url"
],
"type": "object"
},
{
"description": "Save CSV data chunks via JDBC",
"properties": {
"catalog": {
"description": "The name of the specified database catalog to write output data to.",
"type": "string",
"x-versionadded": "v2.22"
},
"commitInterval": {
"default": 600,
"description": "Defines a time interval in seconds between each commit is done to the JDBC source. If set to 0, the batch prediction operation will write the entire job before committing.",
"maximum": 86400,
"minimum": 0,
"type": "integer",
"x-versionadded": "v2.21"
},
"createTableIfNotExists": {
"default": false,
"description": "Attempt to create the table first if no existing one is detected, before writing data with the strategy defined in the `statementType` parameter.",
"type": "boolean",
"x-versionadded": "v2.24"
},
"credentialId": {
"description": "Either the populated value of the field or [redacted] due to permission settings",
"oneOf": [
{
"description": "The ID of the credential holding information about a user with write access to the JDBC data source.",
"type": [
"string",
"null"
]
},
{
"enum": [
"[redacted]"
],
"type": "string"
}
]
},
"dataStoreId": {
"description": "Either the populated value of the field or [redacted] due to permission settings",
"oneOf": [
{
"description": "ID of the data store to connect to",
"type": "string"
},
{
"enum": [
"[redacted]"
],
"type": "string"
}
]
},
"schema": {
"description": "The name of the specified database schema to write the results to.",
"type": "string"
},
"statementType": {
"description": "The statement type to use when writing the results. Deprecation Warning: Use of `create_table` is now discouraged. Use one of the other possibilities along with the parameter `createTableIfNotExists` set to `true`.",
"enum": [
"createTable",
"create_table",
"insert",
"insertUpdate",
"insert_update",
"update"
],
"type": "string"
},
"table": {
"description": "The name of the specified database table to write the results to.If this job is executed with a job definition, then template variables are available which will be substituted for timestamps: {{ current_run_timestamp }}, {{ last_completed_run_time }}, {{ last_scheduled_run_time }}, {{ next_scheduled_run_time }}, {{ current_run_time }}",
"type": "string"
},
"type": {
"description": "Type name for this intake type",
"enum": [
"jdbc"
],
"type": "string"
},
"updateColumns": {
"description": "The column names to be updated if statementType is set to either update or upsert.",
"items": {
"type": "string"
},
"maxItems": 100,
"type": "array"
},
"whereColumns": {
"description": "The column names to be used in the where clause if statementType is set to update or upsert.",
"items": {
"type": "string"
},
"maxItems": 100,
"type": "array"
}
},
"required": [
"dataStoreId",
"statementType",
"table",
"type"
],
"type": "object"
},
{
"description": "Save CSV data chunks to local file storage",
"properties": {
"type": {
"description": "Type name for this output type",
"enum": [
"local_file",
"localFile"
],
"type": "string"
}
},
"required": [
"type"
],
"type": "object"
},
{
"description": "Saves CSV data chunks to Datasphere using browser-datasphere",
"properties": {
"catalog": {
"description": "The name of the specified database catalog to read input data from.",
"type": "string"
},
"credentialId": {
"description": "Either the populated value of the field or [redacted] due to permission settings",
"oneOf": [
{
"description": "The ID of the credential holding information about a user with write access to the data destination.",
"type": "string"
},
{
"enum": [
"[redacted]"
],
"type": "string"
}
]
},
"dataStoreId": {
"description": "Either the populated value of the field or [redacted] due to permission settings",
"oneOf": [
{
"description": "The ID of the data store to connect to",
"type": "string"
},
{
"enum": [
"[redacted]"
],
"type": "string"
}
]
},
"schema": {
"description": "The name of the specified database schema to read input data from.",
"type": "string"
},
"table": {
"description": "The name of the specified database table to read input data from.",
"type": "string"
},
"type": {
"description": "The type name for this output type",
"enum": [
"datasphere"
],
"type": "string"
}
},
"required": [
"credentialId",
"dataStoreId",
"schema",
"table",
"type"
],
"type": "object",
"x-versionadded": "v2.35"
}
]
},
"passthroughColumns": {
"description": "Pass through columns from the original dataset",
"items": {
"description": "A column name from the original dataset to pass through to the resulting predictions",
"maxLength": 50,
"minLength": 1,
"type": "string"
},
"maxItems": 100,
"type": "array"
},
"passthroughColumnsSet": {
"description": "Pass through all columns from the original dataset",
"enum": [
"all"
],
"type": "string"
},
"pinnedModelId": {
"description": "Specify a model ID used for scoring",
"type": "string"
},
"predictionInstance": {
"description": "Override the default prediction instance from the deployment when scoring this job.",
"properties": {
"apiKey": {
"description": "By default, prediction requests will use the API key of the user that created the job. This allows you to make requests on behalf of other users.",
"type": "string"
},
"datarobotKey": {
"description": "If running a job against a prediction instance in the Managed AI Cloud, you must provide the organization level DataRobot-Key.",
"type": "string"
},
"hostName": {
"description": "Override the default host name of the deployment with this.",
"type": "string"
},
"sslEnabled": {
"default": true,
"description": "Use SSL (HTTPS) when communicating with the overriden prediction server.",
"type": "boolean"
}
},
"required": [
"hostName",
"sslEnabled"
],
"type": "object"
},
"predictionWarningEnabled": {
"description": "Enable prediction warnings.",
"type": [
"boolean",
"null"
]
},
"redactedFields": {
"description": "A list of qualified field names from intake- and/or outputSettings that was redacted due to permissions and sharing settings. For example: intakeSettings.dataStoreId",
"items": {
"description": "Field names that are potentially redacted",
"type": "string"
},
"type": "array",
"x-versionadded": "v2.30"
},
"skipDriftTracking": {
"default": false,
"description": "Skip drift tracking for this job.",
"type": "boolean"
},
"thresholdHigh": {
"description": "Compute explanations for predictions above this threshold",
"type": "number"
},
"thresholdLow": {
"description": "Compute explanations for predictions below this threshold",
"type": "number"
},
"timeseriesSettings": {
"description": "Time Series settings included of this job is a Time Series job.",
"oneOf": [
{
"properties": {
"forecastPoint": {
"description": "Used for forecast predictions in order to override the inferred forecast point from the dataset.",
"format": "date-time",
"type": "string"
},
"relaxKnownInAdvanceFeaturesCheck": {
"default": false,
"description": "If activated, missing values in the known in advance features are allowed in the forecast window at prediction time. If omitted or false, missing values are not allowed.",
"type": "boolean"
},
"type": {
"description": "Forecast mode makes predictions using forecastPoint or rows in the dataset without target.",
"enum": [
"forecast"
],
"type": "string"
}
},
"required": [
"type"
],
"type": "object"
},
{
"properties": {
"predictionsEndDate": {
"description": "Used for historical predictions in order to override date to which predictions should be calculated. By default value will be inferred automatically from the dataset.",
"format": "date-time",
"type": "string"
},
"predictionsStartDate": {
"description": "Used for historical predictions in order to override date from which predictions should be calculated. By default value will be inferred automatically from the dataset.",
"format": "date-time",
"type": "string"
},
"relaxKnownInAdvanceFeaturesCheck": {
"default": false,
"description": "If activated, missing values in the known in advance features are allowed in the forecast window at prediction time. If omitted or false, missing values are not allowed.",
"type": "boolean"
},
"type": {
"description": "Historical mode enables bulk predictions which calculates predictions for all possible forecast points and forecast distances in the dataset within the predictionsStartDate/predictionsEndDate range.",
"enum": [
"historical"
],
"type": "string"
}
},
"required": [
"type"
],
"type": "object"
}
],
"x-versionadded": "v2.30"
}
},
"required": [
"abortOnError",
"csvSettings",
"disableRowLevelErrorHandling",
"includePredictionStatus",
"includeProbabilities",
"includeProbabilitiesClasses",
"intakeSettings",
"maxExplanations",
"redactedFields",
"skipDriftTracking"
],
"type": "object",
"x-versionadded": "v2.35"
},
"links": {
"description": "Links useful for this job",
"properties": {
"csvUpload": {
"description": "The URL used to upload the dataset for this job. Only available for localFile intake.",
"format": "url",
"type": "string"
},
"download": {
"description": "The URL used to download the results from this job. Only available for localFile outputs. Will be null if the download is not yet available.",
"type": [
"string",
"null"
]
},
"self": {
"description": "The URL used access this job.",
"format": "url",
"type": "string"
}
},
"required": [
"self"
],
"type": "object",
"x-versionadded": "v2.35"
},
"logs": {
"description": "The job log.",
"items": {
"description": "A log line from the job log.",
"type": "string"
},
"type": "array"
},
"monitoringBatchId": {
"description": "Id of the monitoring batch created by this job. Only present if the job runs on a deployment with batch monitoring enabled.",
"type": [
"string",
"null"
],
"x-versionadded": "v2.33"
},
"percentageCompleted": {
"description": "Indicates job progress which is based on number of already processed rows in dataset",
"maximum": 100,
"minimum": 0,
"type": "number"
},
"queuePosition": {
"description": "To ensure a dedicated prediction instance is not overloaded, only one job will be run against it at a time. This is the number of jobs that are awaiting processing before this job start running. May not be available in all environments.",
"minimum": 0,
"type": [
"integer",
"null"
],
"x-versionadded": "v2.30"
},
"queued": {
"description": "The job has been put on the queue for execution.",
"type": "boolean",
"x-versionadded": "v2.30"
},
"resultsDeleted": {
"description": "Indicates if the job was subject to garbage collection and had its artifacts deleted (output files, if any, and scoring data on local storage)",
"type": "boolean",
"x-versionadded": "v2.30"
},
"scoredRows": {
"description": "Number of rows that have been used in prediction computation",
"minimum": 0,
"type": "integer"
},
"skippedRows": {
"description": "Number of rows that have been skipped during scoring. May contain non-zero value only in time-series predictions case if provided dataset contains more than required historical rows.",
"minimum": 0,
"type": "integer",
"x-versionadded": "v2.30"
},
"source": {
"description": "Source from which batch job was started",
"type": "string",
"x-versionadded": "v2.30"
},
"status": {
"description": "The current job status",
"enum": [
"INITIALIZING",
"RUNNING",
"COMPLETED",
"ABORTED",
"FAILED"
],
"type": "string"
},
"statusDetails": {
"description": "Explanation for current status",
"type": "string"
}
},
"required": [
"created",
"createdBy",
"elapsedTimeSec",
"failedRows",
"id",
"jobIntakeSize",
"jobOutputSize",
"jobSpec",
"links",
"logs",
"monitoringBatchId",
"percentageCompleted",
"queued",
"scoredRows",
"skippedRows",
"status",
"statusDetails"
],
"type": "object",
"x-versionadded": "v2.35"
},
"maxItems": 10000,
"type": "array"
},
"next": {
"description": "URL pointing to the next page (if null, there is no next page).",
"format": "uri",
"type": [
"string",
"null"
]
},
"previous": {
"description": "URL pointing to the previous page (if null, there is no previous page).",
"format": "uri",
"type": [
"string",
"null"
]
},
"totalCount": {
"description": "The total number of items across all pages.",
"type": "integer"
}
},
"required": [
"data",
"next",
"previous",
"totalCount"
],
"type": "object",
"x-versionadded": "v2.35"
}
Responses
To perform this operation, you must be authenticated by means of one of the following methods:
BearerAuth
POST /api/v2/batchJobs/fromJobDefinition/
Launches a one-time batch job based off of the previously supplied definition referring to the job definition ID and puts it on the queue.
Body parameter
{
"properties": {
"jobDefinitionId": {
"description": "ID of the Batch Prediction job definition",
"type": "string"
}
},
"required": [
"jobDefinitionId"
],
"type": "object"
}
Parameters
Example responses
202 Response
{
"properties": {
"batchMonitoringJobDefinition": {
"description": "The Batch Prediction Job Definition linking to this job, if any.",
"properties": {
"createdBy": {
"description": "The ID of creator of this job definition",
"type": "string"
},
"id": {
"description": "The ID of the Batch Prediction job definition",
"type": "string"
},
"name": {
"description": "A human-readable name for the definition, must be unique across organisations",
"type": "string"
}
},
"required": [
"createdBy",
"id",
"name"
],
"type": "object",
"x-versionadded": "v2.35"
},
"batchPredictionJobDefinition": {
"description": "The Batch Prediction Job Definition linking to this job, if any.",
"properties": {
"createdBy": {
"description": "The ID of creator of this job definition",
"type": "string"
},
"id": {
"description": "The ID of the Batch Prediction job definition",
"type": "string"
},
"name": {
"description": "A human-readable name for the definition, must be unique across organisations",
"type": "string"
}
},
"required": [
"createdBy",
"id",
"name"
],
"type": "object",
"x-versionadded": "v2.35"
},
"created": {
"description": "When was this job created",
"format": "date-time",
"type": "string",
"x-versionadded": "v2.30"
},
"createdBy": {
"description": "Who created this job",
"properties": {
"fullName": {
"description": "The full name of the user who created this job (if defined by the user)",
"type": [
"string",
"null"
]
},
"userId": {
"description": "The User ID of the user who created this job",
"type": "string"
},
"username": {
"description": "The username (e-mail address) of the user who created this job",
"type": "string"
}
},
"required": [
"fullName",
"userId",
"username"
],
"type": "object"
},
"elapsedTimeSec": {
"description": "Number of seconds the job has been processing for",
"minimum": 0,
"type": "integer"
},
"failedRows": {
"description": "Number of rows that have failed scoring",
"minimum": 0,
"type": "integer"
},
"hidden": {
"description": "When was this job was hidden last, blank if visible",
"format": "date-time",
"type": "string",
"x-versionadded": "v2.30"
},
"id": {
"description": "The ID of the Batch job",
"type": "string",
"x-versionadded": "v2.30"
},
"intakeDatasetDisplayName": {
"description": "If applicable (e.g. for AI catalog), will contain the dataset name used for the intake dataset.",
"type": [
"string",
"null"
],
"x-versionadded": "v2.30"
},
"jobIntakeSize": {
"description": "Number of bytes in the intake dataset for this job",
"minimum": 0,
"type": [
"integer",
"null"
]
},
"jobOutputSize": {
"description": "Number of bytes in the output dataset for this job",
"minimum": 0,
"type": [
"integer",
"null"
]
},
"jobSpec": {
"description": "The job configuration used to create this job",
"properties": {
"abortOnError": {
"default": true,
"description": "Should this job abort if too many errors are encountered",
"type": "boolean"
},
"batchJobType": {
"description": "Batch job type.",
"enum": [
"monitoring",
"prediction"
],
"type": "string",
"x-versionadded": "v2.30"
},
"chunkSize": {
"default": "auto",
"description": "Which strategy should be used to determine the chunk size. Can be either a named strategy or a fixed size in bytes.",
"oneOf": [
{
"enum": [
"auto",
"fixed",
"dynamic"
],
"type": "string"
},
{
"maximum": 41943040,
"minimum": 20,
"type": "integer"
}
]
},
"columnNamesRemapping": {
"description": "Remap (rename or remove columns from) the output from this job",
"oneOf": [
{
"description": "Provide a dictionary with key/value pairs to remap (deprecated)",
"type": "object"
},
{
"description": "Provide a list of items to remap",
"items": {
"properties": {
"inputName": {
"description": "Rename column with this name",
"type": "string"
},
"outputName": {
"description": "Rename column to this name (leave as null to remove from the output)",
"type": [
"string",
"null"
]
}
},
"required": [
"inputName",
"outputName"
],
"type": "object"
},
"maxItems": 1000,
"type": "array"
}
]
},
"csvSettings": {
"description": "The CSV settings used for this job",
"properties": {
"delimiter": {
"default": ",",
"description": "CSV fields are delimited by this character. Use the string \"tab\" to denote TSV (TAB separated values).",
"oneOf": [
{
"enum": [
"tab"
],
"type": "string"
},
{
"maxLength": 1,
"minLength": 1,
"type": "string"
}
]
},
"encoding": {
"default": "utf-8",
"description": "The encoding to be used for intake and output. For example (but not limited to): \"shift_jis\", \"latin_1\" or \"mskanji\".",
"type": "string"
},
"quotechar": {
"default": "\"",
"description": "Fields containing the delimiter or newlines must be quoted using this character.",
"maxLength": 1,
"minLength": 1,
"type": "string"
}
},
"required": [
"delimiter",
"encoding",
"quotechar"
],
"type": "object"
},
"deploymentId": {
"description": "ID of deployment which is used in job for processing predictions dataset",
"type": "string"
},
"disableRowLevelErrorHandling": {
"default": false,
"description": "Skip row by row error handling",
"type": "boolean"
},
"explanationAlgorithm": {
"description": "Which algorithm will be used to calculate prediction explanations",
"enum": [
"shap",
"xemp"
],
"type": "string"
},
"explanationClassNames": {
"description": "List of class names that will be explained for each row for multiclass. Mutually exclusive with explanationNumTopClasses. If neither specified - we assume explanationNumTopClasses=1",
"items": {
"description": "Class name to explain",
"type": "string"
},
"maxItems": 10,
"minItems": 1,
"type": "array",
"x-versionadded": "v2.30"
},
"explanationNumTopClasses": {
"description": "Number of top predicted classes for each row that will be explained for multiclass. Mutually exclusive with explanationClassNames. If neither specified - we assume explanationNumTopClasses=1",
"maximum": 10,
"minimum": 1,
"type": "integer",
"x-versionadded": "v2.30"
},
"includePredictionStatus": {
"default": false,
"description": "Include prediction status column in the output",
"type": "boolean"
},
"includeProbabilities": {
"default": true,
"description": "Include probabilities for all classes",
"type": "boolean"
},
"includeProbabilitiesClasses": {
"default": [],
"description": "Include only probabilities for these specific class names.",
"items": {
"description": "Include probability for this class name",
"type": "string"
},
"maxItems": 100,
"type": "array"
},
"intakeSettings": {
"default": {
"type": "localFile"
},
"description": "The response option configured for this job",
"oneOf": [
{
"description": "Stream CSV data chunks from Azure",
"properties": {
"credentialId": {
"description": "Either the populated value of the field or [redacted] due to permission settings",
"oneOf": [
{
"description": "Use the specified credential to access the url",
"type": [
"string",
"null"
]
},
{
"enum": [
"[redacted]"
],
"type": "string"
}
]
},
"format": {
"default": "csv",
"description": "Type of input file format",
"enum": [
"csv",
"parquet"
],
"type": "string",
"x-versionadded": "v2.25"
},
"type": {
"description": "Type name for this intake type",
"enum": [
"azure"
],
"type": "string"
},
"url": {
"description": "URL for the CSV file",
"format": "url",
"type": "string"
}
},
"required": [
"type",
"url"
],
"type": "object"
},
{
"description": "Stream CSV data chunks from data stage storage",
"properties": {
"dataStageId": {
"description": "The ID of the data stage",
"type": "string"
},
"type": {
"description": "Type name for this intake type",
"enum": [
"dataStage"
],
"type": "string"
}
},
"required": [
"dataStageId",
"type"
],
"type": "object"
},
{
"description": "Stream CSV data chunks from AI catalog dataset",
"properties": {
"datasetId": {
"description": "Either the populated value of the field or [redacted] due to permission settings",
"oneOf": [
{
"description": "The ID of the AI catalog dataset",
"type": "string"
},
{
"enum": [
"[redacted]"
],
"type": "string"
}
]
},
"datasetVersionId": {
"description": "The ID of the AI catalog dataset version",
"type": "string"
},
"type": {
"description": "Type name for this intake type",
"enum": [
"dataset"
],
"type": "string"
}
},
"required": [
"datasetId",
"type"
],
"type": "object"
},
{
"description": "Stream CSV data chunks from Google Storage",
"properties": {
"credentialId": {
"description": "Either the populated value of the field or [redacted] due to permission settings",
"oneOf": [
{
"description": "Use the specified credential to access the url",
"type": [
"string",
"null"
]
},
{
"enum": [
"[redacted]"
],
"type": "string"
}
]
},
"format": {
"default": "csv",
"description": "Type of input file format",
"enum": [
"csv",
"parquet"
],
"type": "string",
"x-versionadded": "v2.25"
},
"type": {
"description": "Type name for this intake type",
"enum": [
"gcp"
],
"type": "string"
},
"url": {
"description": "URL for the CSV file",
"format": "url",
"type": "string"
}
},
"required": [
"type",
"url"
],
"type": "object"
},
{
"description": "Stream CSV data chunks from Big Query using GCS",
"properties": {
"bucket": {
"description": "The name of gcs bucket for data export",
"type": "string"
},
"credentialId": {
"description": "Either the populated value of the field or [redacted] due to permission settings",
"oneOf": [
{
"description": "The ID of the GCP credentials",
"type": "string"
},
{
"enum": [
"[redacted]"
],
"type": "string"
}
]
},
"dataset": {
"description": "The name of the specified big query dataset to read input data from",
"type": "string"
},
"table": {
"description": "The name of the specified big query table to read input data from",
"type": "string"
},
"type": {
"description": "Type name for this intake type",
"enum": [
"bigquery"
],
"type": "string"
}
},
"required": [
"bucket",
"credentialId",
"dataset",
"table",
"type"
],
"type": "object"
},
{
"description": "Stream CSV data chunks from Amazon Cloud Storage S3",
"properties": {
"credentialId": {
"description": "Either the populated value of the field or [redacted] due to permission settings",
"oneOf": [
{
"description": "Use the specified credential to access the url",
"type": [
"string",
"null"
]
},
{
"enum": [
"[redacted]"
],
"type": "string"
}
]
},
"endpointUrl": {
"description": "Endpoint URL for the S3 connection (omit to use the default)",
"format": "url",
"type": "string",
"x-versionadded": "v2.29"
},
"format": {
"default": "csv",
"description": "Type of input file format",
"enum": [
"csv",
"parquet"
],
"type": "string",
"x-versionadded": "v2.25"
},
"type": {
"description": "Type name for this intake type",
"enum": [
"s3"
],
"type": "string"
},
"url": {
"description": "URL for the CSV file",
"format": "url",
"type": "string"
}
},
"required": [
"type",
"url"
],
"type": "object"
},
{
"description": "Stream CSV data chunks from Snowflake",
"properties": {
"catalog": {
"description": "The name of the specified database catalog to read input data from.",
"type": "string",
"x-versionadded": "v2.28"
},
"cloudStorageCredentialId": {
"description": "Either the populated value of the field or [redacted] due to permission settings",
"oneOf": [
{
"description": "The ID of the credential holding information about a user with read access to the cloud storage.",
"type": [
"string",
"null"
]
},
{
"enum": [
"[redacted]"
],
"type": "string"
}
]
},
"cloudStorageType": {
"default": "s3",
"description": "Type name for cloud storage",
"enum": [
"azure",
"gcp",
"s3"
],
"type": "string"
},
"credentialId": {
"description": "Either the populated value of the field or [redacted] due to permission settings",
"oneOf": [
{
"description": "The ID of the credential holding information about a user with read access to the Snowflake data source.",
"type": [
"string",
"null"
]
},
{
"enum": [
"[redacted]"
],
"type": "string"
}
]
},
"dataStoreId": {
"description": "Either the populated value of the field or [redacted] due to permission settings",
"oneOf": [
{
"description": "ID of the data store to connect to",
"type": "string"
},
{
"enum": [
"[redacted]"
],
"type": "string"
}
]
},
"externalStage": {
"description": "External storage",
"type": "string"
},
"query": {
"description": "A self-supplied SELECT statement of the dataset you wish to score. Helpful for supplying a more fine-grained selection of data not achievable through specification of \"table\" and/or \"schema\" parameters exclusively.If this job is executed with a job definition, then template variables are available which will be substituted for timestamps: {{ current_run_timestamp }}, {{ last_completed_run_time }}, {{ last_scheduled_run_time }}, {{ next_scheduled_run_time }}, {{ current_run_time }}",
"type": "string"
},
"schema": {
"description": "The name of the specified database schema to read input data from.",
"type": "string"
},
"table": {
"description": "The name of the specified database table to read input data from.",
"type": "string"
},
"type": {
"description": "Type name for this intake type",
"enum": [
"snowflake"
],
"type": "string"
}
},
"required": [
"dataStoreId",
"externalStage",
"type"
],
"type": "object"
},
{
"description": "Stream CSV data chunks from Azure Synapse",
"properties": {
"cloudStorageCredentialId": {
"description": "Either the populated value of the field or [redacted] due to permission settings",
"oneOf": [
{
"description": "The ID of the Azure credential holding information about a user with read access to the cloud storage.",
"type": [
"string",
"null"
]
},
{
"enum": [
"[redacted]"
],
"type": "string"
}
]
},
"credentialId": {
"description": "Either the populated value of the field or [redacted] due to permission settings",
"oneOf": [
{
"description": "The ID of the credential holding information about a user with read access to the JDBC data source.",
"type": [
"string",
"null"
]
},
{
"enum": [
"[redacted]"
],
"type": "string"
}
]
},
"dataStoreId": {
"description": "Either the populated value of the field or [redacted] due to permission settings",
"oneOf": [
{
"description": "ID of the data store to connect to",
"type": "string"
},
{
"enum": [
"[redacted]"
],
"type": "string"
}
]
},
"externalDataSource": {
"description": "External datasource name",
"type": "string"
},
"query": {
"description": "A self-supplied SELECT statement of the dataset you wish to score. Helpful for supplying a more fine-grained selection of data not achievable through specification of \"table\" and/or \"schema\" parameters exclusively.If this job is executed with a job definition, then template variables are available which will be substituted for timestamps: {{ current_run_timestamp }}, {{ last_completed_run_time }}, {{ last_scheduled_run_time }}, {{ next_scheduled_run_time }}, {{ current_run_time }}",
"type": "string"
},
"schema": {
"description": "The name of the specified database schema to read input data from.",
"type": "string"
},
"table": {
"description": "The name of the specified database table to read input data from.",
"type": "string"
},
"type": {
"description": "Type name for this intake type",
"enum": [
"synapse"
],
"type": "string"
}
},
"required": [
"dataStoreId",
"externalDataSource",
"type"
],
"type": "object"
},
{
"description": "Stream CSV data chunks from DSS dataset",
"properties": {
"datasetId": {
"description": "Either the populated value of the field or [redacted] due to permission settings",
"oneOf": [
{
"description": "The ID of the dataset",
"type": "string"
},
{
"enum": [
"[redacted]"
],
"type": "string"
}
]
},
"partition": {
"default": null,
"description": "Partition used to predict",
"enum": [
"holdout",
"validation",
"allBacktests",
null
],
"type": [
"string",
"null"
]
},
"projectId": {
"description": "The ID of the project",
"type": "string"
},
"type": {
"description": "Type name for this intake type",
"enum": [
"dss"
],
"type": "string"
}
},
"required": [
"projectId",
"type"
],
"type": "object"
},
{
"properties": {
"path": {
"description": "Path to data on host filesystem",
"type": "string"
},
"type": {
"description": "Type name for this intake type",
"enum": [
"filesystem"
],
"type": "string"
}
},
"required": [
"path",
"type"
],
"type": "object"
},
{
"description": "Stream CSV data chunks from HTTP",
"properties": {
"type": {
"description": "Type name for this intake type",
"enum": [
"http"
],
"type": "string"
},
"url": {
"description": "URL for the CSV file",
"format": "url",
"type": "string"
}
},
"required": [
"type",
"url"
],
"type": "object"
},
{
"description": "Stream CSV data chunks from JDBC",
"properties": {
"catalog": {
"description": "The name of the specified database catalog to read input data from.",
"type": "string",
"x-versionadded": "v2.22"
},
"credentialId": {
"description": "Either the populated value of the field or [redacted] due to permission settings",
"oneOf": [
{
"description": "The ID of the credential holding information about a user with read access to the JDBC data source.",
"type": [
"string",
"null"
]
},
{
"enum": [
"[redacted]"
],
"type": "string"
}
]
},
"dataStoreId": {
"description": "Either the populated value of the field or [redacted] due to permission settings",
"oneOf": [
{
"description": "ID of the data store to connect to",
"type": "string"
},
{
"enum": [
"[redacted]"
],
"type": "string"
}
]
},
"fetchSize": {
"description": "A user specified fetch size. Changing it can be used to balance throughput and memory usage. Deprecated and ignored since v2.21.",
"maximum": 1000000,
"minimum": 1,
"type": "integer"
},
"query": {
"description": "A self-supplied SELECT statement of the dataset you wish to score. Helpful for supplying a more fine-grained selection of data not achievable through specification of \"table\" and/or \"schema\" parameters exclusively.If this job is executed with a job definition, then template variables are available which will be substituted for timestamps: {{ current_run_timestamp }}, {{ last_completed_run_time }}, {{ last_scheduled_run_time }}, {{ next_scheduled_run_time }}, {{ current_run_time }}",
"type": "string"
},
"schema": {
"description": "The name of the specified database schema to read input data from.",
"type": "string"
},
"table": {
"description": "The name of the specified database table to read input data from.",
"type": "string"
},
"type": {
"description": "Type name for this intake type",
"enum": [
"jdbc"
],
"type": "string"
}
},
"required": [
"dataStoreId",
"type"
],
"type": "object"
},
{
"description": "Stream CSV data chunks from local file storage",
"properties": {
"async": {
"description": "The default behavior (async: true) will still submit the job to the queue and start processing as soon as the upload is started.Setting it to false will postpone submitting the job to the queue until all data has been uploaded.This is helpful if the user is on a bad connection and bottlednecked by the upload speed. Instead of blocking the queue this will allow others to submit to the queue until the upload has finished.",
"type": [
"boolean",
"null"
],
"x-versionadded": "v2.28"
},
"multipart": {
"description": "specify if the data will be uploaded in multiple parts instead of a single file",
"type": "boolean",
"x-versionadded": "v2.27"
},
"type": {
"description": "Type name for this intake type",
"enum": [
"local_file",
"localFile"
],
"type": "string"
}
},
"required": [
"type"
],
"type": "object"
},
{
"description": "Stream CSV data chunks from Datasphere using browser-datasphere",
"properties": {
"catalog": {
"description": "The name of the specified database catalog to read input data from.",
"type": "string"
},
"credentialId": {
"description": "Either the populated value of the field or [redacted] due to permission settings",
"oneOf": [
{
"description": "The ID of the credential holding information about a user with read access to the data source.",
"type": "string"
},
{
"enum": [
"[redacted]"
],
"type": "string"
}
]
},
"dataStoreId": {
"description": "Either the populated value of the field or [redacted] due to permission settings",
"oneOf": [
{
"description": "ID of the data store to connect to",
"type": "string"
},
{
"enum": [
"[redacted]"
],
"type": "string"
}
]
},
"schema": {
"description": "The name of the specified database schema to read input data from.",
"type": "string"
},
"table": {
"description": "The name of the specified database table to read input data from.",
"type": "string"
},
"type": {
"description": "Type name for this intake type",
"enum": [
"datasphere"
],
"type": "string"
}
},
"required": [
"credentialId",
"dataStoreId",
"schema",
"table",
"type"
],
"type": "object",
"x-versionadded": "v2.35"
}
]
},
"maxExplanations": {
"default": 0,
"description": "Number of explanations requested. Will be ordered by strength.",
"maximum": 100,
"minimum": 0,
"type": "integer"
},
"maxNgramExplanations": {
"description": "The maximum number of text ngram explanations to supply per row of the dataset. The default recommended `maxNgramExplanations` is `all` (no limit)",
"oneOf": [
{
"minimum": 0,
"type": "integer"
},
{
"enum": [
"all"
],
"type": "string"
}
],
"x-versionadded": "v2.30"
},
"modelId": {
"description": "ID of leaderboard model which is used in job for processing predictions dataset",
"type": "string",
"x-versionadded": "v2.30"
},
"modelPackageId": {
"description": "ID of model package from registry is used in job for processing predictions dataset",
"type": "string",
"x-versionadded": "v2.30"
},
"monitoringAggregation": {
"description": "Defines the aggregation policy for monitoring jobs.",
"properties": {
"retentionPolicy": {
"default": "percentage",
"description": "Monitoring jobs retention policy for aggregation.",
"enum": [
"samples",
"percentage"
],
"type": "string"
},
"retentionValue": {
"default": 0,
"description": "Amount/percentage of samples to retain.",
"type": "integer"
}
},
"type": "object"
},
"monitoringBatchPrefix": {
"description": "Name of the batch to create with this job",
"type": [
"string",
"null"
]
},
"monitoringColumns": {
"description": "Column names mapping for monitoring",
"properties": {
"actedUponColumn": {
"description": "Name of column that contains value for acted_on.",
"type": "string"
},
"actualsTimestampColumn": {
"description": "Name of column that contains actual timestamps.",
"type": "string"
},
"actualsValueColumn": {
"description": "Name of column that contains actuals value.",
"type": "string"
},
"associationIdColumn": {
"description": "Name of column that contains association Id.",
"type": "string"
},
"customMetricId": {
"description": "Id of custom metric to process values for.",
"type": "string"
},
"customMetricTimestampColumn": {
"description": "Name of column that contains custom metric values timestamps.",
"type": "string"
},
"customMetricTimestampFormat": {
"description": "Format of timestamps from customMetricTimestampColumn.",
"type": "string"
},
"customMetricValueColumn": {
"description": "Name of column that contains values for custom metric.",
"type": "string"
},
"monitoredStatusColumn": {
"description": "Column name used to mark monitored rows.",
"type": "string"
},
"predictionsColumns": {
"description": "Name of the column(s) which contain prediction values.",
"oneOf": [
{
"description": "Map containing column name(s) and class name(s) for multiclass problem.",
"items": {
"properties": {
"className": {
"description": "Class name.",
"type": "string"
},
"columnName": {
"description": "Column name that contains the prediction for a specific class.",
"type": "string"
}
},
"required": [
"className",
"columnName"
],
"type": "object"
},
"maxItems": 100,
"type": "array"
},
{
"description": "Column name that contains the prediction for regressions problem.",
"type": "string"
}
]
},
"reportDrift": {
"description": "True to report drift, False otherwise.",
"type": "boolean"
},
"reportPredictions": {
"description": "True to report prediction, False otherwise.",
"type": "boolean"
},
"uniqueRowIdentifierColumns": {
"description": "Column(s) name of unique row identifiers.",
"items": {
"type": "string"
},
"maxItems": 100,
"type": "array"
}
},
"type": "object"
},
"monitoringOutputSettings": {
"description": "Output settings for monitoring jobs",
"properties": {
"monitoredStatusColumn": {
"description": "Column name used to mark monitored rows.",
"type": "string"
},
"uniqueRowIdentifierColumns": {
"description": "Column(s) name of unique row identifiers.",
"items": {
"type": "string"
},
"maxItems": 100,
"type": "array"
}
},
"required": [
"monitoredStatusColumn",
"uniqueRowIdentifierColumns"
],
"type": "object"
},
"numConcurrent": {
"description": "Number of simultaneous requests to run against the prediction instance",
"minimum": 1,
"type": "integer"
},
"outputSettings": {
"description": "The response option configured for this job",
"oneOf": [
{
"description": "Save CSV data chunks to Azure Blob Storage",
"properties": {
"credentialId": {
"description": "Either the populated value of the field or [redacted] due to permission settings",
"oneOf": [
{
"description": "Use the specified credential to access the url",
"type": [
"string",
"null"
]
},
{
"enum": [
"[redacted]"
],
"type": "string"
}
]
},
"format": {
"default": "csv",
"description": "Type of output file format",
"enum": [
"csv",
"parquet"
],
"type": "string",
"x-versionadded": "v2.25"
},
"partitionColumns": {
"description": "For Parquet directory-scoring only. The column names of the intake data of which to partition the dataset. Columns are partitioned in the order they are given. At least one value is required if scoring to a directory (meaning the output url ends with a slash (\"/\").",
"items": {
"type": "string"
},
"maxItems": 100,
"type": "array",
"x-versionadded": "v2.26"
},
"type": {
"description": "Type name for this output type",
"enum": [
"azure"
],
"type": "string"
},
"url": {
"description": "URL for the file or directory",
"format": "url",
"type": "string"
}
},
"required": [
"type",
"url"
],
"type": "object"
},
{
"description": "Save CSV data chunks to Google Storage",
"properties": {
"credentialId": {
"description": "Either the populated value of the field or [redacted] due to permission settings",
"oneOf": [
{
"description": "Use the specified credential to access the url",
"type": [
"string",
"null"
]
},
{
"enum": [
"[redacted]"
],
"type": "string"
}
]
},
"format": {
"default": "csv",
"description": "Type of input file format",
"enum": [
"csv",
"parquet"
],
"type": "string",
"x-versionadded": "v2.25"
},
"partitionColumns": {
"description": "For Parquet directory-scoring only. The column names of the intake data of which to partition the dataset. Columns are partitioned in the order they are given. At least one value is required if scoring to a directory (meaning the output url ends with a slash (\"/\").",
"items": {
"type": "string"
},
"maxItems": 100,
"type": "array",
"x-versionadded": "v2.26"
},
"type": {
"description": "Type name for this output type",
"enum": [
"gcp"
],
"type": "string"
},
"url": {
"description": "URL for the CSV file",
"format": "url",
"type": "string"
}
},
"required": [
"type",
"url"
],
"type": "object"
},
{
"description": "Save CSV data chunks to Google BigQuery in bulk",
"properties": {
"bucket": {
"description": "The name of gcs bucket for data loading",
"type": "string"
},
"credentialId": {
"description": "Either the populated value of the field or [redacted] due to permission settings",
"oneOf": [
{
"description": "The ID of the GCP credentials",
"type": "string"
},
{
"enum": [
"[redacted]"
],
"type": "string"
}
]
},
"dataset": {
"description": "The name of the specified big query dataset to write data back",
"type": "string"
},
"table": {
"description": "The name of the specified big query table to write data back",
"type": "string"
},
"type": {
"description": "Type name for this output type",
"enum": [
"bigquery"
],
"type": "string"
}
},
"required": [
"bucket",
"credentialId",
"dataset",
"table",
"type"
],
"type": "object"
},
{
"description": "Saves CSV data chunks to Amazon Cloud Storage S3",
"properties": {
"credentialId": {
"description": "Either the populated value of the field or [redacted] due to permission settings",
"oneOf": [
{
"description": "Use the specified credential to access the url",
"type": [
"string",
"null"
]
},
{
"enum": [
"[redacted]"
],
"type": "string"
}
]
},
"endpointUrl": {
"description": "Endpoint URL for the S3 connection (omit to use the default)",
"format": "url",
"type": "string",
"x-versionadded": "v2.29"
},
"format": {
"default": "csv",
"description": "Type of output file format",
"enum": [
"csv",
"parquet"
],
"type": "string",
"x-versionadded": "v2.25"
},
"partitionColumns": {
"description": "For Parquet directory-scoring only. The column names of the intake data of which to partition the dataset. Columns are partitioned in the order they are given. At least one value is required if scoring to a directory (meaning the output url ends with a slash (\"/\").",
"items": {
"type": "string"
},
"maxItems": 100,
"type": "array",
"x-versionadded": "v2.26"
},
"serverSideEncryption": {
"description": "Configure Server-Side Encryption for S3 output",
"properties": {
"algorithm": {
"description": "The server-side encryption algorithm used when storing this object in Amazon S3 (for example, AES256, aws:kms).",
"type": "string"
},
"customerAlgorithm": {
"description": "Specifies the algorithm to use to when encrypting the object (for example, AES256).",
"type": "string"
},
"customerKey": {
"description": "Specifies the customer-provided encryption key for Amazon S3 to use in encrypting data. This value is used to store the object and then it is discarded; Amazon S3 does not store the encryption key. The key must be appropriate for use with the algorithm specified in customerAlgorithm. The key must be sent as an base64 encoded string.",
"type": "string"
},
"kmsEncryptionContext": {
"description": "Specifies the Amazon Web Services KMS Encryption Context to use for object encryption. The value of this header is a base64-encoded UTF-8 string holding JSON with the encryption context key-value pairs.",
"type": "string"
},
"kmsKeyId": {
"description": "Specifies the ID of the symmetric customer managed key to use for object encryption.",
"type": "string"
}
},
"type": "object"
},
"type": {
"description": "Type name for this output type",
"enum": [
"s3"
],
"type": "string"
},
"url": {
"description": "URL for the CSV file",
"format": "url",
"type": "string"
}
},
"required": [
"type",
"url"
],
"type": "object"
},
{
"description": "Save CSV data chunks to Snowflake in bulk",
"properties": {
"catalog": {
"description": "The name of the specified database catalog to write output data to.",
"type": "string",
"x-versionadded": "v2.28"
},
"cloudStorageCredentialId": {
"description": "Either the populated value of the field or [redacted] due to permission settings",
"oneOf": [
{
"description": "The ID of the credential holding information about a user with write access to the cloud storage.",
"type": [
"string",
"null"
]
},
{
"enum": [
"[redacted]"
],
"type": "string"
}
]
},
"cloudStorageType": {
"default": "s3",
"description": "Type name for cloud storage",
"enum": [
"azure",
"gcp",
"s3"
],
"type": "string"
},
"createTableIfNotExists": {
"default": false,
"description": "Attempt to create the table first if no existing one is detected, before writing data with the strategy defined in the `statementType` parameter.",
"type": "boolean",
"x-versionadded": "v2.25"
},
"credentialId": {
"description": "Either the populated value of the field or [redacted] due to permission settings",
"oneOf": [
{
"description": "The ID of the credential holding information about a user with write access to the Snowflake data source.",
"type": [
"string",
"null"
]
},
{
"enum": [
"[redacted]"
],
"type": "string"
}
]
},
"dataStoreId": {
"description": "Either the populated value of the field or [redacted] due to permission settings",
"oneOf": [
{
"description": "ID of the data store to connect to",
"type": "string"
},
{
"enum": [
"[redacted]"
],
"type": "string"
}
]
},
"externalStage": {
"description": "External storage",
"type": "string"
},
"schema": {
"description": "The name of the specified database schema to write results to.",
"type": "string"
},
"statementType": {
"description": "The statement type to use when writing the results.",
"enum": [
"insert",
"create_table",
"createTable"
],
"type": "string"
},
"table": {
"description": "The name of the specified database table to write results to.",
"type": "string"
},
"type": {
"description": "Type name for this output type",
"enum": [
"snowflake"
],
"type": "string"
}
},
"required": [
"dataStoreId",
"externalStage",
"statementType",
"table",
"type"
],
"type": "object"
},
{
"description": "Save CSV data chunks to Azure Synapse in bulk",
"properties": {
"cloudStorageCredentialId": {
"description": "Either the populated value of the field or [redacted] due to permission settings",
"oneOf": [
{
"description": "The ID of the credential holding information about a user with write access to the cloud storage.",
"type": [
"string",
"null"
]
},
{
"enum": [
"[redacted]"
],
"type": "string"
}
]
},
"createTableIfNotExists": {
"default": false,
"description": "Attempt to create the table first if no existing one is detected, before writing data with the strategy defined in the `statementType` parameter.",
"type": "boolean",
"x-versionadded": "v2.25"
},
"credentialId": {
"description": "Either the populated value of the field or [redacted] due to permission settings",
"oneOf": [
{
"description": "The ID of the credential holding information about a user with write access to the JDBC data source.",
"type": [
"string",
"null"
]
},
{
"enum": [
"[redacted]"
],
"type": "string"
}
]
},
"dataStoreId": {
"description": "Either the populated value of the field or [redacted] due to permission settings",
"oneOf": [
{
"description": "ID of the data store to connect to",
"type": "string"
},
{
"enum": [
"[redacted]"
],
"type": "string"
}
]
},
"externalDataSource": {
"description": "External data source name",
"type": "string"
},
"schema": {
"description": "The name of the specified database schema to write results to.",
"type": "string"
},
"statementType": {
"description": "The statement type to use when writing the results.",
"enum": [
"insert",
"create_table",
"createTable"
],
"type": "string"
},
"table": {
"description": "The name of the specified database table to write results to.",
"type": "string"
},
"type": {
"description": "Type name for this output type",
"enum": [
"synapse"
],
"type": "string"
}
},
"required": [
"dataStoreId",
"externalDataSource",
"statementType",
"table",
"type"
],
"type": "object"
},
{
"properties": {
"path": {
"description": "Path to results on host filesystem",
"type": "string"
},
"type": {
"description": "Type name for this output type",
"enum": [
"filesystem"
],
"type": "string"
}
},
"required": [
"path",
"type"
],
"type": "object"
},
{
"description": "Save CSV data chunks to HTTP data endpoint",
"properties": {
"headers": {
"description": "Extra headers to send with the request",
"type": "object"
},
"method": {
"description": "Method to use when saving the CSV file",
"enum": [
"POST",
"PUT"
],
"type": "string"
},
"type": {
"description": "Type name for this output type",
"enum": [
"http"
],
"type": "string"
},
"url": {
"description": "URL for the CSV file",
"format": "url",
"type": "string"
}
},
"required": [
"method",
"type",
"url"
],
"type": "object"
},
{
"description": "Save CSV data chunks via JDBC",
"properties": {
"catalog": {
"description": "The name of the specified database catalog to write output data to.",
"type": "string",
"x-versionadded": "v2.22"
},
"commitInterval": {
"default": 600,
"description": "Defines a time interval in seconds between each commit is done to the JDBC source. If set to 0, the batch prediction operation will write the entire job before committing.",
"maximum": 86400,
"minimum": 0,
"type": "integer",
"x-versionadded": "v2.21"
},
"createTableIfNotExists": {
"default": false,
"description": "Attempt to create the table first if no existing one is detected, before writing data with the strategy defined in the `statementType` parameter.",
"type": "boolean",
"x-versionadded": "v2.24"
},
"credentialId": {
"description": "Either the populated value of the field or [redacted] due to permission settings",
"oneOf": [
{
"description": "The ID of the credential holding information about a user with write access to the JDBC data source.",
"type": [
"string",
"null"
]
},
{
"enum": [
"[redacted]"
],
"type": "string"
}
]
},
"dataStoreId": {
"description": "Either the populated value of the field or [redacted] due to permission settings",
"oneOf": [
{
"description": "ID of the data store to connect to",
"type": "string"
},
{
"enum": [
"[redacted]"
],
"type": "string"
}
]
},
"schema": {
"description": "The name of the specified database schema to write the results to.",
"type": "string"
},
"statementType": {
"description": "The statement type to use when writing the results. Deprecation Warning: Use of `create_table` is now discouraged. Use one of the other possibilities along with the parameter `createTableIfNotExists` set to `true`.",
"enum": [
"createTable",
"create_table",
"insert",
"insertUpdate",
"insert_update",
"update"
],
"type": "string"
},
"table": {
"description": "The name of the specified database table to write the results to.If this job is executed with a job definition, then template variables are available which will be substituted for timestamps: {{ current_run_timestamp }}, {{ last_completed_run_time }}, {{ last_scheduled_run_time }}, {{ next_scheduled_run_time }}, {{ current_run_time }}",
"type": "string"
},
"type": {
"description": "Type name for this intake type",
"enum": [
"jdbc"
],
"type": "string"
},
"updateColumns": {
"description": "The column names to be updated if statementType is set to either update or upsert.",
"items": {
"type": "string"
},
"maxItems": 100,
"type": "array"
},
"whereColumns": {
"description": "The column names to be used in the where clause if statementType is set to update or upsert.",
"items": {
"type": "string"
},
"maxItems": 100,
"type": "array"
}
},
"required": [
"dataStoreId",
"statementType",
"table",
"type"
],
"type": "object"
},
{
"description": "Save CSV data chunks to local file storage",
"properties": {
"type": {
"description": "Type name for this output type",
"enum": [
"local_file",
"localFile"
],
"type": "string"
}
},
"required": [
"type"
],
"type": "object"
},
{
"description": "Saves CSV data chunks to Datasphere using browser-datasphere",
"properties": {
"catalog": {
"description": "The name of the specified database catalog to read input data from.",
"type": "string"
},
"credentialId": {
"description": "Either the populated value of the field or [redacted] due to permission settings",
"oneOf": [
{
"description": "The ID of the credential holding information about a user with write access to the data destination.",
"type": "string"
},
{
"enum": [
"[redacted]"
],
"type": "string"
}
]
},
"dataStoreId": {
"description": "Either the populated value of the field or [redacted] due to permission settings",
"oneOf": [
{
"description": "The ID of the data store to connect to",
"type": "string"
},
{
"enum": [
"[redacted]"
],
"type": "string"
}
]
},
"schema": {
"description": "The name of the specified database schema to read input data from.",
"type": "string"
},
"table": {
"description": "The name of the specified database table to read input data from.",
"type": "string"
},
"type": {
"description": "The type name for this output type",
"enum": [
"datasphere"
],
"type": "string"
}
},
"required": [
"credentialId",
"dataStoreId",
"schema",
"table",
"type"
],
"type": "object",
"x-versionadded": "v2.35"
}
]
},
"passthroughColumns": {
"description": "Pass through columns from the original dataset",
"items": {
"description": "A column name from the original dataset to pass through to the resulting predictions",
"maxLength": 50,
"minLength": 1,
"type": "string"
},
"maxItems": 100,
"type": "array"
},
"passthroughColumnsSet": {
"description": "Pass through all columns from the original dataset",
"enum": [
"all"
],
"type": "string"
},
"pinnedModelId": {
"description": "Specify a model ID used for scoring",
"type": "string"
},
"predictionInstance": {
"description": "Override the default prediction instance from the deployment when scoring this job.",
"properties": {
"apiKey": {
"description": "By default, prediction requests will use the API key of the user that created the job. This allows you to make requests on behalf of other users.",
"type": "string"
},
"datarobotKey": {
"description": "If running a job against a prediction instance in the Managed AI Cloud, you must provide the organization level DataRobot-Key.",
"type": "string"
},
"hostName": {
"description": "Override the default host name of the deployment with this.",
"type": "string"
},
"sslEnabled": {
"default": true,
"description": "Use SSL (HTTPS) when communicating with the overriden prediction server.",
"type": "boolean"
}
},
"required": [
"hostName",
"sslEnabled"
],
"type": "object"
},
"predictionWarningEnabled": {
"description": "Enable prediction warnings.",
"type": [
"boolean",
"null"
]
},
"redactedFields": {
"description": "A list of qualified field names from intake- and/or outputSettings that was redacted due to permissions and sharing settings. For example: intakeSettings.dataStoreId",
"items": {
"description": "Field names that are potentially redacted",
"type": "string"
},
"type": "array",
"x-versionadded": "v2.30"
},
"skipDriftTracking": {
"default": false,
"description": "Skip drift tracking for this job.",
"type": "boolean"
},
"thresholdHigh": {
"description": "Compute explanations for predictions above this threshold",
"type": "number"
},
"thresholdLow": {
"description": "Compute explanations for predictions below this threshold",
"type": "number"
},
"timeseriesSettings": {
"description": "Time Series settings included of this job is a Time Series job.",
"oneOf": [
{
"properties": {
"forecastPoint": {
"description": "Used for forecast predictions in order to override the inferred forecast point from the dataset.",
"format": "date-time",
"type": "string"
},
"relaxKnownInAdvanceFeaturesCheck": {
"default": false,
"description": "If activated, missing values in the known in advance features are allowed in the forecast window at prediction time. If omitted or false, missing values are not allowed.",
"type": "boolean"
},
"type": {
"description": "Forecast mode makes predictions using forecastPoint or rows in the dataset without target.",
"enum": [
"forecast"
],
"type": "string"
}
},
"required": [
"type"
],
"type": "object"
},
{
"properties": {
"predictionsEndDate": {
"description": "Used for historical predictions in order to override date to which predictions should be calculated. By default value will be inferred automatically from the dataset.",
"format": "date-time",
"type": "string"
},
"predictionsStartDate": {
"description": "Used for historical predictions in order to override date from which predictions should be calculated. By default value will be inferred automatically from the dataset.",
"format": "date-time",
"type": "string"
},
"relaxKnownInAdvanceFeaturesCheck": {
"default": false,
"description": "If activated, missing values in the known in advance features are allowed in the forecast window at prediction time. If omitted or false, missing values are not allowed.",
"type": "boolean"
},
"type": {
"description": "Historical mode enables bulk predictions which calculates predictions for all possible forecast points and forecast distances in the dataset within the predictionsStartDate/predictionsEndDate range.",
"enum": [
"historical"
],
"type": "string"
}
},
"required": [
"type"
],
"type": "object"
}
],
"x-versionadded": "v2.30"
}
},
"required": [
"abortOnError",
"csvSettings",
"disableRowLevelErrorHandling",
"includePredictionStatus",
"includeProbabilities",
"includeProbabilitiesClasses",
"intakeSettings",
"maxExplanations",
"redactedFields",
"skipDriftTracking"
],
"type": "object",
"x-versionadded": "v2.35"
},
"links": {
"description": "Links useful for this job",
"properties": {
"csvUpload": {
"description": "The URL used to upload the dataset for this job. Only available for localFile intake.",
"format": "url",
"type": "string"
},
"download": {
"description": "The URL used to download the results from this job. Only available for localFile outputs. Will be null if the download is not yet available.",
"type": [
"string",
"null"
]
},
"self": {
"description": "The URL used access this job.",
"format": "url",
"type": "string"
}
},
"required": [
"self"
],
"type": "object",
"x-versionadded": "v2.35"
},
"logs": {
"description": "The job log.",
"items": {
"description": "A log line from the job log.",
"type": "string"
},
"type": "array"
},
"monitoringBatchId": {
"description": "Id of the monitoring batch created by this job. Only present if the job runs on a deployment with batch monitoring enabled.",
"type": [
"string",
"null"
],
"x-versionadded": "v2.33"
},
"percentageCompleted": {
"description": "Indicates job progress which is based on number of already processed rows in dataset",
"maximum": 100,
"minimum": 0,
"type": "number"
},
"queuePosition": {
"description": "To ensure a dedicated prediction instance is not overloaded, only one job will be run against it at a time. This is the number of jobs that are awaiting processing before this job start running. May not be available in all environments.",
"minimum": 0,
"type": [
"integer",
"null"
],
"x-versionadded": "v2.30"
},
"queued": {
"description": "The job has been put on the queue for execution.",
"type": "boolean",
"x-versionadded": "v2.30"
},
"resultsDeleted": {
"description": "Indicates if the job was subject to garbage collection and had its artifacts deleted (output files, if any, and scoring data on local storage)",
"type": "boolean",
"x-versionadded": "v2.30"
},
"scoredRows": {
"description": "Number of rows that have been used in prediction computation",
"minimum": 0,
"type": "integer"
},
"skippedRows": {
"description": "Number of rows that have been skipped during scoring. May contain non-zero value only in time-series predictions case if provided dataset contains more than required historical rows.",
"minimum": 0,
"type": "integer",
"x-versionadded": "v2.30"
},
"source": {
"description": "Source from which batch job was started",
"type": "string",
"x-versionadded": "v2.30"
},
"status": {
"description": "The current job status",
"enum": [
"INITIALIZING",
"RUNNING",
"COMPLETED",
"ABORTED",
"FAILED"
],
"type": "string"
},
"statusDetails": {
"description": "Explanation for current status",
"type": "string"
}
},
"required": [
"created",
"createdBy",
"elapsedTimeSec",
"failedRows",
"id",
"jobIntakeSize",
"jobOutputSize",
"jobSpec",
"links",
"logs",
"monitoringBatchId",
"percentageCompleted",
"queued",
"scoredRows",
"skippedRows",
"status",
"statusDetails"
],
"type": "object",
"x-versionadded": "v2.35"
}
Responses
Status |
Meaning |
Description |
Schema |
202 |
Accepted |
Job details for the created Batch Prediction job |
BatchJobResponse |
404 |
Not Found |
Job was deleted, never existed or you do not have access to it |
None |
422 |
Unprocessable Entity |
Could not create a Batch job. Possible reasons: {} |
None |
To perform this operation, you must be authenticated by means of one of the following methods:
BearerAuth
DELETE /api/v2/batchJobs/{batchJobId}/
If the job is running, it will be aborted. Then it will be removed, meaning all underlying data will be deleted and the job is removed from the list of jobs.
Parameters
Name |
In |
Type |
Required |
Description |
batchJobId |
path |
string |
true |
ID of the Batch job |
partNumber |
path |
integer |
true |
The number of which csv part is being uploaded when using multipart upload |
Responses
Status |
Meaning |
Description |
Schema |
202 |
Accepted |
Job cancelled |
None |
404 |
Not Found |
Job does not exist or was not submitted to the queue. |
None |
409 |
Conflict |
Job cannot be aborted for some reason. Possible reasons: job is already aborted or completed. |
None |
To perform this operation, you must be authenticated by means of one of the following methods:
BearerAuth
GET /api/v2/batchJobs/{batchJobId}/
Retrieve a Batch job.
Parameters
Name |
In |
Type |
Required |
Description |
batchJobId |
path |
string |
true |
ID of the Batch job |
partNumber |
path |
integer |
true |
The number of which csv part is being uploaded when using multipart upload |
Example responses
200 Response
{
"properties": {
"batchMonitoringJobDefinition": {
"description": "The Batch Prediction Job Definition linking to this job, if any.",
"properties": {
"createdBy": {
"description": "The ID of creator of this job definition",
"type": "string"
},
"id": {
"description": "The ID of the Batch Prediction job definition",
"type": "string"
},
"name": {
"description": "A human-readable name for the definition, must be unique across organisations",
"type": "string"
}
},
"required": [
"createdBy",
"id",
"name"
],
"type": "object",
"x-versionadded": "v2.35"
},
"batchPredictionJobDefinition": {
"description": "The Batch Prediction Job Definition linking to this job, if any.",
"properties": {
"createdBy": {
"description": "The ID of creator of this job definition",
"type": "string"
},
"id": {
"description": "The ID of the Batch Prediction job definition",
"type": "string"
},
"name": {
"description": "A human-readable name for the definition, must be unique across organisations",
"type": "string"
}
},
"required": [
"createdBy",
"id",
"name"
],
"type": "object",
"x-versionadded": "v2.35"
},
"created": {
"description": "When was this job created",
"format": "date-time",
"type": "string",
"x-versionadded": "v2.30"
},
"createdBy": {
"description": "Who created this job",
"properties": {
"fullName": {
"description": "The full name of the user who created this job (if defined by the user)",
"type": [
"string",
"null"
]
},
"userId": {
"description": "The User ID of the user who created this job",
"type": "string"
},
"username": {
"description": "The username (e-mail address) of the user who created this job",
"type": "string"
}
},
"required": [
"fullName",
"userId",
"username"
],
"type": "object"
},
"elapsedTimeSec": {
"description": "Number of seconds the job has been processing for",
"minimum": 0,
"type": "integer"
},
"failedRows": {
"description": "Number of rows that have failed scoring",
"minimum": 0,
"type": "integer"
},
"hidden": {
"description": "When was this job was hidden last, blank if visible",
"format": "date-time",
"type": "string",
"x-versionadded": "v2.30"
},
"id": {
"description": "The ID of the Batch job",
"type": "string",
"x-versionadded": "v2.30"
},
"intakeDatasetDisplayName": {
"description": "If applicable (e.g. for AI catalog), will contain the dataset name used for the intake dataset.",
"type": [
"string",
"null"
],
"x-versionadded": "v2.30"
},
"jobIntakeSize": {
"description": "Number of bytes in the intake dataset for this job",
"minimum": 0,
"type": [
"integer",
"null"
]
},
"jobOutputSize": {
"description": "Number of bytes in the output dataset for this job",
"minimum": 0,
"type": [
"integer",
"null"
]
},
"jobSpec": {
"description": "The job configuration used to create this job",
"properties": {
"abortOnError": {
"default": true,
"description": "Should this job abort if too many errors are encountered",
"type": "boolean"
},
"batchJobType": {
"description": "Batch job type.",
"enum": [
"monitoring",
"prediction"
],
"type": "string",
"x-versionadded": "v2.30"
},
"chunkSize": {
"default": "auto",
"description": "Which strategy should be used to determine the chunk size. Can be either a named strategy or a fixed size in bytes.",
"oneOf": [
{
"enum": [
"auto",
"fixed",
"dynamic"
],
"type": "string"
},
{
"maximum": 41943040,
"minimum": 20,
"type": "integer"
}
]
},
"columnNamesRemapping": {
"description": "Remap (rename or remove columns from) the output from this job",
"oneOf": [
{
"description": "Provide a dictionary with key/value pairs to remap (deprecated)",
"type": "object"
},
{
"description": "Provide a list of items to remap",
"items": {
"properties": {
"inputName": {
"description": "Rename column with this name",
"type": "string"
},
"outputName": {
"description": "Rename column to this name (leave as null to remove from the output)",
"type": [
"string",
"null"
]
}
},
"required": [
"inputName",
"outputName"
],
"type": "object"
},
"maxItems": 1000,
"type": "array"
}
]
},
"csvSettings": {
"description": "The CSV settings used for this job",
"properties": {
"delimiter": {
"default": ",",
"description": "CSV fields are delimited by this character. Use the string \"tab\" to denote TSV (TAB separated values).",
"oneOf": [
{
"enum": [
"tab"
],
"type": "string"
},
{
"maxLength": 1,
"minLength": 1,
"type": "string"
}
]
},
"encoding": {
"default": "utf-8",
"description": "The encoding to be used for intake and output. For example (but not limited to): \"shift_jis\", \"latin_1\" or \"mskanji\".",
"type": "string"
},
"quotechar": {
"default": "\"",
"description": "Fields containing the delimiter or newlines must be quoted using this character.",
"maxLength": 1,
"minLength": 1,
"type": "string"
}
},
"required": [
"delimiter",
"encoding",
"quotechar"
],
"type": "object"
},
"deploymentId": {
"description": "ID of deployment which is used in job for processing predictions dataset",
"type": "string"
},
"disableRowLevelErrorHandling": {
"default": false,
"description": "Skip row by row error handling",
"type": "boolean"
},
"explanationAlgorithm": {
"description": "Which algorithm will be used to calculate prediction explanations",
"enum": [
"shap",
"xemp"
],
"type": "string"
},
"explanationClassNames": {
"description": "List of class names that will be explained for each row for multiclass. Mutually exclusive with explanationNumTopClasses. If neither specified - we assume explanationNumTopClasses=1",
"items": {
"description": "Class name to explain",
"type": "string"
},
"maxItems": 10,
"minItems": 1,
"type": "array",
"x-versionadded": "v2.30"
},
"explanationNumTopClasses": {
"description": "Number of top predicted classes for each row that will be explained for multiclass. Mutually exclusive with explanationClassNames. If neither specified - we assume explanationNumTopClasses=1",
"maximum": 10,
"minimum": 1,
"type": "integer",
"x-versionadded": "v2.30"
},
"includePredictionStatus": {
"default": false,
"description": "Include prediction status column in the output",
"type": "boolean"
},
"includeProbabilities": {
"default": true,
"description": "Include probabilities for all classes",
"type": "boolean"
},
"includeProbabilitiesClasses": {
"default": [],
"description": "Include only probabilities for these specific class names.",
"items": {
"description": "Include probability for this class name",
"type": "string"
},
"maxItems": 100,
"type": "array"
},
"intakeSettings": {
"default": {
"type": "localFile"
},
"description": "The response option configured for this job",
"oneOf": [
{
"description": "Stream CSV data chunks from Azure",
"properties": {
"credentialId": {
"description": "Either the populated value of the field or [redacted] due to permission settings",
"oneOf": [
{
"description": "Use the specified credential to access the url",
"type": [
"string",
"null"
]
},
{
"enum": [
"[redacted]"
],
"type": "string"
}
]
},
"format": {
"default": "csv",
"description": "Type of input file format",
"enum": [
"csv",
"parquet"
],
"type": "string",
"x-versionadded": "v2.25"
},
"type": {
"description": "Type name for this intake type",
"enum": [
"azure"
],
"type": "string"
},
"url": {
"description": "URL for the CSV file",
"format": "url",
"type": "string"
}
},
"required": [
"type",
"url"
],
"type": "object"
},
{
"description": "Stream CSV data chunks from data stage storage",
"properties": {
"dataStageId": {
"description": "The ID of the data stage",
"type": "string"
},
"type": {
"description": "Type name for this intake type",
"enum": [
"dataStage"
],
"type": "string"
}
},
"required": [
"dataStageId",
"type"
],
"type": "object"
},
{
"description": "Stream CSV data chunks from AI catalog dataset",
"properties": {
"datasetId": {
"description": "Either the populated value of the field or [redacted] due to permission settings",
"oneOf": [
{
"description": "The ID of the AI catalog dataset",
"type": "string"
},
{
"enum": [
"[redacted]"
],
"type": "string"
}
]
},
"datasetVersionId": {
"description": "The ID of the AI catalog dataset version",
"type": "string"
},
"type": {
"description": "Type name for this intake type",
"enum": [
"dataset"
],
"type": "string"
}
},
"required": [
"datasetId",
"type"
],
"type": "object"
},
{
"description": "Stream CSV data chunks from Google Storage",
"properties": {
"credentialId": {
"description": "Either the populated value of the field or [redacted] due to permission settings",
"oneOf": [
{
"description": "Use the specified credential to access the url",
"type": [
"string",
"null"
]
},
{
"enum": [
"[redacted]"
],
"type": "string"
}
]
},
"format": {
"default": "csv",
"description": "Type of input file format",
"enum": [
"csv",
"parquet"
],
"type": "string",
"x-versionadded": "v2.25"
},
"type": {
"description": "Type name for this intake type",
"enum": [
"gcp"
],
"type": "string"
},
"url": {
"description": "URL for the CSV file",
"format": "url",
"type": "string"
}
},
"required": [
"type",
"url"
],
"type": "object"
},
{
"description": "Stream CSV data chunks from Big Query using GCS",
"properties": {
"bucket": {
"description": "The name of gcs bucket for data export",
"type": "string"
},
"credentialId": {
"description": "Either the populated value of the field or [redacted] due to permission settings",
"oneOf": [
{
"description": "The ID of the GCP credentials",
"type": "string"
},
{
"enum": [
"[redacted]"
],
"type": "string"
}
]
},
"dataset": {
"description": "The name of the specified big query dataset to read input data from",
"type": "string"
},
"table": {
"description": "The name of the specified big query table to read input data from",
"type": "string"
},
"type": {
"description": "Type name for this intake type",
"enum": [
"bigquery"
],
"type": "string"
}
},
"required": [
"bucket",
"credentialId",
"dataset",
"table",
"type"
],
"type": "object"
},
{
"description": "Stream CSV data chunks from Amazon Cloud Storage S3",
"properties": {
"credentialId": {
"description": "Either the populated value of the field or [redacted] due to permission settings",
"oneOf": [
{
"description": "Use the specified credential to access the url",
"type": [
"string",
"null"
]
},
{
"enum": [
"[redacted]"
],
"type": "string"
}
]
},
"endpointUrl": {
"description": "Endpoint URL for the S3 connection (omit to use the default)",
"format": "url",
"type": "string",
"x-versionadded": "v2.29"
},
"format": {
"default": "csv",
"description": "Type of input file format",
"enum": [
"csv",
"parquet"
],
"type": "string",
"x-versionadded": "v2.25"
},
"type": {
"description": "Type name for this intake type",
"enum": [
"s3"
],
"type": "string"
},
"url": {
"description": "URL for the CSV file",
"format": "url",
"type": "string"
}
},
"required": [
"type",
"url"
],
"type": "object"
},
{
"description": "Stream CSV data chunks from Snowflake",
"properties": {
"catalog": {
"description": "The name of the specified database catalog to read input data from.",
"type": "string",
"x-versionadded": "v2.28"
},
"cloudStorageCredentialId": {
"description": "Either the populated value of the field or [redacted] due to permission settings",
"oneOf": [
{
"description": "The ID of the credential holding information about a user with read access to the cloud storage.",
"type": [
"string",
"null"
]
},
{
"enum": [
"[redacted]"
],
"type": "string"
}
]
},
"cloudStorageType": {
"default": "s3",
"description": "Type name for cloud storage",
"enum": [
"azure",
"gcp",
"s3"
],
"type": "string"
},
"credentialId": {
"description": "Either the populated value of the field or [redacted] due to permission settings",
"oneOf": [
{
"description": "The ID of the credential holding information about a user with read access to the Snowflake data source.",
"type": [
"string",
"null"
]
},
{
"enum": [
"[redacted]"
],
"type": "string"
}
]
},
"dataStoreId": {
"description": "Either the populated value of the field or [redacted] due to permission settings",
"oneOf": [
{
"description": "ID of the data store to connect to",
"type": "string"
},
{
"enum": [
"[redacted]"
],
"type": "string"
}
]
},
"externalStage": {
"description": "External storage",
"type": "string"
},
"query": {
"description": "A self-supplied SELECT statement of the dataset you wish to score. Helpful for supplying a more fine-grained selection of data not achievable through specification of \"table\" and/or \"schema\" parameters exclusively.If this job is executed with a job definition, then template variables are available which will be substituted for timestamps: {{ current_run_timestamp }}, {{ last_completed_run_time }}, {{ last_scheduled_run_time }}, {{ next_scheduled_run_time }}, {{ current_run_time }}",
"type": "string"
},
"schema": {
"description": "The name of the specified database schema to read input data from.",
"type": "string"
},
"table": {
"description": "The name of the specified database table to read input data from.",
"type": "string"
},
"type": {
"description": "Type name for this intake type",
"enum": [
"snowflake"
],
"type": "string"
}
},
"required": [
"dataStoreId",
"externalStage",
"type"
],
"type": "object"
},
{
"description": "Stream CSV data chunks from Azure Synapse",
"properties": {
"cloudStorageCredentialId": {
"description": "Either the populated value of the field or [redacted] due to permission settings",
"oneOf": [
{
"description": "The ID of the Azure credential holding information about a user with read access to the cloud storage.",
"type": [
"string",
"null"
]
},
{
"enum": [
"[redacted]"
],
"type": "string"
}
]
},
"credentialId": {
"description": "Either the populated value of the field or [redacted] due to permission settings",
"oneOf": [
{
"description": "The ID of the credential holding information about a user with read access to the JDBC data source.",
"type": [
"string",
"null"
]
},
{
"enum": [
"[redacted]"
],
"type": "string"
}
]
},
"dataStoreId": {
"description": "Either the populated value of the field or [redacted] due to permission settings",
"oneOf": [
{
"description": "ID of the data store to connect to",
"type": "string"
},
{
"enum": [
"[redacted]"
],
"type": "string"
}
]
},
"externalDataSource": {
"description": "External datasource name",
"type": "string"
},
"query": {
"description": "A self-supplied SELECT statement of the dataset you wish to score. Helpful for supplying a more fine-grained selection of data not achievable through specification of \"table\" and/or \"schema\" parameters exclusively.If this job is executed with a job definition, then template variables are available which will be substituted for timestamps: {{ current_run_timestamp }}, {{ last_completed_run_time }}, {{ last_scheduled_run_time }}, {{ next_scheduled_run_time }}, {{ current_run_time }}",
"type": "string"
},
"schema": {
"description": "The name of the specified database schema to read input data from.",
"type": "string"
},
"table": {
"description": "The name of the specified database table to read input data from.",
"type": "string"
},
"type": {
"description": "Type name for this intake type",
"enum": [
"synapse"
],
"type": "string"
}
},
"required": [
"dataStoreId",
"externalDataSource",
"type"
],
"type": "object"
},
{
"description": "Stream CSV data chunks from DSS dataset",
"properties": {
"datasetId": {
"description": "Either the populated value of the field or [redacted] due to permission settings",
"oneOf": [
{
"description": "The ID of the dataset",
"type": "string"
},
{
"enum": [
"[redacted]"
],
"type": "string"
}
]
},
"partition": {
"default": null,
"description": "Partition used to predict",
"enum": [
"holdout",
"validation",
"allBacktests",
null
],
"type": [
"string",
"null"
]
},
"projectId": {
"description": "The ID of the project",
"type": "string"
},
"type": {
"description": "Type name for this intake type",
"enum": [
"dss"
],
"type": "string"
}
},
"required": [
"projectId",
"type"
],
"type": "object"
},
{
"properties": {
"path": {
"description": "Path to data on host filesystem",
"type": "string"
},
"type": {
"description": "Type name for this intake type",
"enum": [
"filesystem"
],
"type": "string"
}
},
"required": [
"path",
"type"
],
"type": "object"
},
{
"description": "Stream CSV data chunks from HTTP",
"properties": {
"type": {
"description": "Type name for this intake type",
"enum": [
"http"
],
"type": "string"
},
"url": {
"description": "URL for the CSV file",
"format": "url",
"type": "string"
}
},
"required": [
"type",
"url"
],
"type": "object"
},
{
"description": "Stream CSV data chunks from JDBC",
"properties": {
"catalog": {
"description": "The name of the specified database catalog to read input data from.",
"type": "string",
"x-versionadded": "v2.22"
},
"credentialId": {
"description": "Either the populated value of the field or [redacted] due to permission settings",
"oneOf": [
{
"description": "The ID of the credential holding information about a user with read access to the JDBC data source.",
"type": [
"string",
"null"
]
},
{
"enum": [
"[redacted]"
],
"type": "string"
}
]
},
"dataStoreId": {
"description": "Either the populated value of the field or [redacted] due to permission settings",
"oneOf": [
{
"description": "ID of the data store to connect to",
"type": "string"
},
{
"enum": [
"[redacted]"
],
"type": "string"
}
]
},
"fetchSize": {
"description": "A user specified fetch size. Changing it can be used to balance throughput and memory usage. Deprecated and ignored since v2.21.",
"maximum": 1000000,
"minimum": 1,
"type": "integer"
},
"query": {
"description": "A self-supplied SELECT statement of the dataset you wish to score. Helpful for supplying a more fine-grained selection of data not achievable through specification of \"table\" and/or \"schema\" parameters exclusively.If this job is executed with a job definition, then template variables are available which will be substituted for timestamps: {{ current_run_timestamp }}, {{ last_completed_run_time }}, {{ last_scheduled_run_time }}, {{ next_scheduled_run_time }}, {{ current_run_time }}",
"type": "string"
},
"schema": {
"description": "The name of the specified database schema to read input data from.",
"type": "string"
},
"table": {
"description": "The name of the specified database table to read input data from.",
"type": "string"
},
"type": {
"description": "Type name for this intake type",
"enum": [
"jdbc"
],
"type": "string"
}
},
"required": [
"dataStoreId",
"type"
],
"type": "object"
},
{
"description": "Stream CSV data chunks from local file storage",
"properties": {
"async": {
"description": "The default behavior (async: true) will still submit the job to the queue and start processing as soon as the upload is started.Setting it to false will postpone submitting the job to the queue until all data has been uploaded.This is helpful if the user is on a bad connection and bottlednecked by the upload speed. Instead of blocking the queue this will allow others to submit to the queue until the upload has finished.",
"type": [
"boolean",
"null"
],
"x-versionadded": "v2.28"
},
"multipart": {
"description": "specify if the data will be uploaded in multiple parts instead of a single file",
"type": "boolean",
"x-versionadded": "v2.27"
},
"type": {
"description": "Type name for this intake type",
"enum": [
"local_file",
"localFile"
],
"type": "string"
}
},
"required": [
"type"
],
"type": "object"
},
{
"description": "Stream CSV data chunks from Datasphere using browser-datasphere",
"properties": {
"catalog": {
"description": "The name of the specified database catalog to read input data from.",
"type": "string"
},
"credentialId": {
"description": "Either the populated value of the field or [redacted] due to permission settings",
"oneOf": [
{
"description": "The ID of the credential holding information about a user with read access to the data source.",
"type": "string"
},
{
"enum": [
"[redacted]"
],
"type": "string"
}
]
},
"dataStoreId": {
"description": "Either the populated value of the field or [redacted] due to permission settings",
"oneOf": [
{
"description": "ID of the data store to connect to",
"type": "string"
},
{
"enum": [
"[redacted]"
],
"type": "string"
}
]
},
"schema": {
"description": "The name of the specified database schema to read input data from.",
"type": "string"
},
"table": {
"description": "The name of the specified database table to read input data from.",
"type": "string"
},
"type": {
"description": "Type name for this intake type",
"enum": [
"datasphere"
],
"type": "string"
}
},
"required": [
"credentialId",
"dataStoreId",
"schema",
"table",
"type"
],
"type": "object",
"x-versionadded": "v2.35"
}
]
},
"maxExplanations": {
"default": 0,
"description": "Number of explanations requested. Will be ordered by strength.",
"maximum": 100,
"minimum": 0,
"type": "integer"
},
"maxNgramExplanations": {
"description": "The maximum number of text ngram explanations to supply per row of the dataset. The default recommended `maxNgramExplanations` is `all` (no limit)",
"oneOf": [
{
"minimum": 0,
"type": "integer"
},
{
"enum": [
"all"
],
"type": "string"
}
],
"x-versionadded": "v2.30"
},
"modelId": {
"description": "ID of leaderboard model which is used in job for processing predictions dataset",
"type": "string",
"x-versionadded": "v2.30"
},
"modelPackageId": {
"description": "ID of model package from registry is used in job for processing predictions dataset",
"type": "string",
"x-versionadded": "v2.30"
},
"monitoringAggregation": {
"description": "Defines the aggregation policy for monitoring jobs.",
"properties": {
"retentionPolicy": {
"default": "percentage",
"description": "Monitoring jobs retention policy for aggregation.",
"enum": [
"samples",
"percentage"
],
"type": "string"
},
"retentionValue": {
"default": 0,
"description": "Amount/percentage of samples to retain.",
"type": "integer"
}
},
"type": "object"
},
"monitoringBatchPrefix": {
"description": "Name of the batch to create with this job",
"type": [
"string",
"null"
]
},
"monitoringColumns": {
"description": "Column names mapping for monitoring",
"properties": {
"actedUponColumn": {
"description": "Name of column that contains value for acted_on.",
"type": "string"
},
"actualsTimestampColumn": {
"description": "Name of column that contains actual timestamps.",
"type": "string"
},
"actualsValueColumn": {
"description": "Name of column that contains actuals value.",
"type": "string"
},
"associationIdColumn": {
"description": "Name of column that contains association Id.",
"type": "string"
},
"customMetricId": {
"description": "Id of custom metric to process values for.",
"type": "string"
},
"customMetricTimestampColumn": {
"description": "Name of column that contains custom metric values timestamps.",
"type": "string"
},
"customMetricTimestampFormat": {
"description": "Format of timestamps from customMetricTimestampColumn.",
"type": "string"
},
"customMetricValueColumn": {
"description": "Name of column that contains values for custom metric.",
"type": "string"
},
"monitoredStatusColumn": {
"description": "Column name used to mark monitored rows.",
"type": "string"
},
"predictionsColumns": {
"description": "Name of the column(s) which contain prediction values.",
"oneOf": [
{
"description": "Map containing column name(s) and class name(s) for multiclass problem.",
"items": {
"properties": {
"className": {
"description": "Class name.",
"type": "string"
},
"columnName": {
"description": "Column name that contains the prediction for a specific class.",
"type": "string"
}
},
"required": [
"className",
"columnName"
],
"type": "object"
},
"maxItems": 100,
"type": "array"
},
{
"description": "Column name that contains the prediction for regressions problem.",
"type": "string"
}
]
},
"reportDrift": {
"description": "True to report drift, False otherwise.",
"type": "boolean"
},
"reportPredictions": {
"description": "True to report prediction, False otherwise.",
"type": "boolean"
},
"uniqueRowIdentifierColumns": {
"description": "Column(s) name of unique row identifiers.",
"items": {
"type": "string"
},
"maxItems": 100,
"type": "array"
}
},
"type": "object"
},
"monitoringOutputSettings": {
"description": "Output settings for monitoring jobs",
"properties": {
"monitoredStatusColumn": {
"description": "Column name used to mark monitored rows.",
"type": "string"
},
"uniqueRowIdentifierColumns": {
"description": "Column(s) name of unique row identifiers.",
"items": {
"type": "string"
},
"maxItems": 100,
"type": "array"
}
},
"required": [
"monitoredStatusColumn",
"uniqueRowIdentifierColumns"
],
"type": "object"
},
"numConcurrent": {
"description": "Number of simultaneous requests to run against the prediction instance",
"minimum": 1,
"type": "integer"
},
"outputSettings": {
"description": "The response option configured for this job",
"oneOf": [
{
"description": "Save CSV data chunks to Azure Blob Storage",
"properties": {
"credentialId": {
"description": "Either the populated value of the field or [redacted] due to permission settings",
"oneOf": [
{
"description": "Use the specified credential to access the url",
"type": [
"string",
"null"
]
},
{
"enum": [
"[redacted]"
],
"type": "string"
}
]
},
"format": {
"default": "csv",
"description": "Type of output file format",
"enum": [
"csv",
"parquet"
],
"type": "string",
"x-versionadded": "v2.25"
},
"partitionColumns": {
"description": "For Parquet directory-scoring only. The column names of the intake data of which to partition the dataset. Columns are partitioned in the order they are given. At least one value is required if scoring to a directory (meaning the output url ends with a slash (\"/\").",
"items": {
"type": "string"
},
"maxItems": 100,
"type": "array",
"x-versionadded": "v2.26"
},
"type": {
"description": "Type name for this output type",
"enum": [
"azure"
],
"type": "string"
},
"url": {
"description": "URL for the file or directory",
"format": "url",
"type": "string"
}
},
"required": [
"type",
"url"
],
"type": "object"
},
{
"description": "Save CSV data chunks to Google Storage",
"properties": {
"credentialId": {
"description": "Either the populated value of the field or [redacted] due to permission settings",
"oneOf": [
{
"description": "Use the specified credential to access the url",
"type": [
"string",
"null"
]
},
{
"enum": [
"[redacted]"
],
"type": "string"
}
]
},
"format": {
"default": "csv",
"description": "Type of input file format",
"enum": [
"csv",
"parquet"
],
"type": "string",
"x-versionadded": "v2.25"
},
"partitionColumns": {
"description": "For Parquet directory-scoring only. The column names of the intake data of which to partition the dataset. Columns are partitioned in the order they are given. At least one value is required if scoring to a directory (meaning the output url ends with a slash (\"/\").",
"items": {
"type": "string"
},
"maxItems": 100,
"type": "array",
"x-versionadded": "v2.26"
},
"type": {
"description": "Type name for this output type",
"enum": [
"gcp"
],
"type": "string"
},
"url": {
"description": "URL for the CSV file",
"format": "url",
"type": "string"
}
},
"required": [
"type",
"url"
],
"type": "object"
},
{
"description": "Save CSV data chunks to Google BigQuery in bulk",
"properties": {
"bucket": {
"description": "The name of gcs bucket for data loading",
"type": "string"
},
"credentialId": {
"description": "Either the populated value of the field or [redacted] due to permission settings",
"oneOf": [
{
"description": "The ID of the GCP credentials",
"type": "string"
},
{
"enum": [
"[redacted]"
],
"type": "string"
}
]
},
"dataset": {
"description": "The name of the specified big query dataset to write data back",
"type": "string"
},
"table": {
"description": "The name of the specified big query table to write data back",
"type": "string"
},
"type": {
"description": "Type name for this output type",
"enum": [
"bigquery"
],
"type": "string"
}
},
"required": [
"bucket",
"credentialId",
"dataset",
"table",
"type"
],
"type": "object"
},
{
"description": "Saves CSV data chunks to Amazon Cloud Storage S3",
"properties": {
"credentialId": {
"description": "Either the populated value of the field or [redacted] due to permission settings",
"oneOf": [
{
"description": "Use the specified credential to access the url",
"type": [
"string",
"null"
]
},
{
"enum": [
"[redacted]"
],
"type": "string"
}
]
},
"endpointUrl": {
"description": "Endpoint URL for the S3 connection (omit to use the default)",
"format": "url",
"type": "string",
"x-versionadded": "v2.29"
},
"format": {
"default": "csv",
"description": "Type of output file format",
"enum": [
"csv",
"parquet"
],
"type": "string",
"x-versionadded": "v2.25"
},
"partitionColumns": {
"description": "For Parquet directory-scoring only. The column names of the intake data of which to partition the dataset. Columns are partitioned in the order they are given. At least one value is required if scoring to a directory (meaning the output url ends with a slash (\"/\").",
"items": {
"type": "string"
},
"maxItems": 100,
"type": "array",
"x-versionadded": "v2.26"
},
"serverSideEncryption": {
"description": "Configure Server-Side Encryption for S3 output",
"properties": {
"algorithm": {
"description": "The server-side encryption algorithm used when storing this object in Amazon S3 (for example, AES256, aws:kms).",
"type": "string"
},
"customerAlgorithm": {
"description": "Specifies the algorithm to use to when encrypting the object (for example, AES256).",
"type": "string"
},
"customerKey": {
"description": "Specifies the customer-provided encryption key for Amazon S3 to use in encrypting data. This value is used to store the object and then it is discarded; Amazon S3 does not store the encryption key. The key must be appropriate for use with the algorithm specified in customerAlgorithm. The key must be sent as an base64 encoded string.",
"type": "string"
},
"kmsEncryptionContext": {
"description": "Specifies the Amazon Web Services KMS Encryption Context to use for object encryption. The value of this header is a base64-encoded UTF-8 string holding JSON with the encryption context key-value pairs.",
"type": "string"
},
"kmsKeyId": {
"description": "Specifies the ID of the symmetric customer managed key to use for object encryption.",
"type": "string"
}
},
"type": "object"
},
"type": {
"description": "Type name for this output type",
"enum": [
"s3"
],
"type": "string"
},
"url": {
"description": "URL for the CSV file",
"format": "url",
"type": "string"
}
},
"required": [
"type",
"url"
],
"type": "object"
},
{
"description": "Save CSV data chunks to Snowflake in bulk",
"properties": {
"catalog": {
"description": "The name of the specified database catalog to write output data to.",
"type": "string",
"x-versionadded": "v2.28"
},
"cloudStorageCredentialId": {
"description": "Either the populated value of the field or [redacted] due to permission settings",
"oneOf": [
{
"description": "The ID of the credential holding information about a user with write access to the cloud storage.",
"type": [
"string",
"null"
]
},
{
"enum": [
"[redacted]"
],
"type": "string"
}
]
},
"cloudStorageType": {
"default": "s3",
"description": "Type name for cloud storage",
"enum": [
"azure",
"gcp",
"s3"
],
"type": "string"
},
"createTableIfNotExists": {
"default": false,
"description": "Attempt to create the table first if no existing one is detected, before writing data with the strategy defined in the `statementType` parameter.",
"type": "boolean",
"x-versionadded": "v2.25"
},
"credentialId": {
"description": "Either the populated value of the field or [redacted] due to permission settings",
"oneOf": [
{
"description": "The ID of the credential holding information about a user with write access to the Snowflake data source.",
"type": [
"string",
"null"
]
},
{
"enum": [
"[redacted]"
],
"type": "string"
}
]
},
"dataStoreId": {
"description": "Either the populated value of the field or [redacted] due to permission settings",
"oneOf": [
{
"description": "ID of the data store to connect to",
"type": "string"
},
{
"enum": [
"[redacted]"
],
"type": "string"
}
]
},
"externalStage": {
"description": "External storage",
"type": "string"
},
"schema": {
"description": "The name of the specified database schema to write results to.",
"type": "string"
},
"statementType": {
"description": "The statement type to use when writing the results.",
"enum": [
"insert",
"create_table",
"createTable"
],
"type": "string"
},
"table": {
"description": "The name of the specified database table to write results to.",
"type": "string"
},
"type": {
"description": "Type name for this output type",
"enum": [
"snowflake"
],
"type": "string"
}
},
"required": [
"dataStoreId",
"externalStage",
"statementType",
"table",
"type"
],
"type": "object"
},
{
"description": "Save CSV data chunks to Azure Synapse in bulk",
"properties": {
"cloudStorageCredentialId": {
"description": "Either the populated value of the field or [redacted] due to permission settings",
"oneOf": [
{
"description": "The ID of the credential holding information about a user with write access to the cloud storage.",
"type": [
"string",
"null"
]
},
{
"enum": [
"[redacted]"
],
"type": "string"
}
]
},
"createTableIfNotExists": {
"default": false,
"description": "Attempt to create the table first if no existing one is detected, before writing data with the strategy defined in the `statementType` parameter.",
"type": "boolean",
"x-versionadded": "v2.25"
},
"credentialId": {
"description": "Either the populated value of the field or [redacted] due to permission settings",
"oneOf": [
{
"description": "The ID of the credential holding information about a user with write access to the JDBC data source.",
"type": [
"string",
"null"
]
},
{
"enum": [
"[redacted]"
],
"type": "string"
}
]
},
"dataStoreId": {
"description": "Either the populated value of the field or [redacted] due to permission settings",
"oneOf": [
{
"description": "ID of the data store to connect to",
"type": "string"
},
{
"enum": [
"[redacted]"
],
"type": "string"
}
]
},
"externalDataSource": {
"description": "External data source name",
"type": "string"
},
"schema": {
"description": "The name of the specified database schema to write results to.",
"type": "string"
},
"statementType": {
"description": "The statement type to use when writing the results.",
"enum": [
"insert",
"create_table",
"createTable"
],
"type": "string"
},
"table": {
"description": "The name of the specified database table to write results to.",
"type": "string"
},
"type": {
"description": "Type name for this output type",
"enum": [
"synapse"
],
"type": "string"
}
},
"required": [
"dataStoreId",
"externalDataSource",
"statementType",
"table",
"type"
],
"type": "object"
},
{
"properties": {
"path": {
"description": "Path to results on host filesystem",
"type": "string"
},
"type": {
"description": "Type name for this output type",
"enum": [
"filesystem"
],
"type": "string"
}
},
"required": [
"path",
"type"
],
"type": "object"
},
{
"description": "Save CSV data chunks to HTTP data endpoint",
"properties": {
"headers": {
"description": "Extra headers to send with the request",
"type": "object"
},
"method": {
"description": "Method to use when saving the CSV file",
"enum": [
"POST",
"PUT"
],
"type": "string"
},
"type": {
"description": "Type name for this output type",
"enum": [
"http"
],
"type": "string"
},
"url": {
"description": "URL for the CSV file",
"format": "url",
"type": "string"
}
},
"required": [
"method",
"type",
"url"
],
"type": "object"
},
{
"description": "Save CSV data chunks via JDBC",
"properties": {
"catalog": {
"description": "The name of the specified database catalog to write output data to.",
"type": "string",
"x-versionadded": "v2.22"
},
"commitInterval": {
"default": 600,
"description": "Defines a time interval in seconds between each commit is done to the JDBC source. If set to 0, the batch prediction operation will write the entire job before committing.",
"maximum": 86400,
"minimum": 0,
"type": "integer",
"x-versionadded": "v2.21"
},
"createTableIfNotExists": {
"default": false,
"description": "Attempt to create the table first if no existing one is detected, before writing data with the strategy defined in the `statementType` parameter.",
"type": "boolean",
"x-versionadded": "v2.24"
},
"credentialId": {
"description": "Either the populated value of the field or [redacted] due to permission settings",
"oneOf": [
{
"description": "The ID of the credential holding information about a user with write access to the JDBC data source.",
"type": [
"string",
"null"
]
},
{
"enum": [
"[redacted]"
],
"type": "string"
}
]
},
"dataStoreId": {
"description": "Either the populated value of the field or [redacted] due to permission settings",
"oneOf": [
{
"description": "ID of the data store to connect to",
"type": "string"
},
{
"enum": [
"[redacted]"
],
"type": "string"
}
]
},
"schema": {
"description": "The name of the specified database schema to write the results to.",
"type": "string"
},
"statementType": {
"description": "The statement type to use when writing the results. Deprecation Warning: Use of `create_table` is now discouraged. Use one of the other possibilities along with the parameter `createTableIfNotExists` set to `true`.",
"enum": [
"createTable",
"create_table",
"insert",
"insertUpdate",
"insert_update",
"update"
],
"type": "string"
},
"table": {
"description": "The name of the specified database table to write the results to.If this job is executed with a job definition, then template variables are available which will be substituted for timestamps: {{ current_run_timestamp }}, {{ last_completed_run_time }}, {{ last_scheduled_run_time }}, {{ next_scheduled_run_time }}, {{ current_run_time }}",
"type": "string"
},
"type": {
"description": "Type name for this intake type",
"enum": [
"jdbc"
],
"type": "string"
},
"updateColumns": {
"description": "The column names to be updated if statementType is set to either update or upsert.",
"items": {
"type": "string"
},
"maxItems": 100,
"type": "array"
},
"whereColumns": {
"description": "The column names to be used in the where clause if statementType is set to update or upsert.",
"items": {
"type": "string"
},
"maxItems": 100,
"type": "array"
}
},
"required": [
"dataStoreId",
"statementType",
"table",
"type"
],
"type": "object"
},
{
"description": "Save CSV data chunks to local file storage",
"properties": {
"type": {
"description": "Type name for this output type",
"enum": [
"local_file",
"localFile"
],
"type": "string"
}
},
"required": [
"type"
],
"type": "object"
},
{
"description": "Saves CSV data chunks to Datasphere using browser-datasphere",
"properties": {
"catalog": {
"description": "The name of the specified database catalog to read input data from.",
"type": "string"
},
"credentialId": {
"description": "Either the populated value of the field or [redacted] due to permission settings",
"oneOf": [
{
"description": "The ID of the credential holding information about a user with write access to the data destination.",
"type": "string"
},
{
"enum": [
"[redacted]"
],
"type": "string"
}
]
},
"dataStoreId": {
"description": "Either the populated value of the field or [redacted] due to permission settings",
"oneOf": [
{
"description": "The ID of the data store to connect to",
"type": "string"
},
{
"enum": [
"[redacted]"
],
"type": "string"
}
]
},
"schema": {
"description": "The name of the specified database schema to read input data from.",
"type": "string"
},
"table": {
"description": "The name of the specified database table to read input data from.",
"type": "string"
},
"type": {
"description": "The type name for this output type",
"enum": [
"datasphere"
],
"type": "string"
}
},
"required": [
"credentialId",
"dataStoreId",
"schema",
"table",
"type"
],
"type": "object",
"x-versionadded": "v2.35"
}
]
},
"passthroughColumns": {
"description": "Pass through columns from the original dataset",
"items": {
"description": "A column name from the original dataset to pass through to the resulting predictions",
"maxLength": 50,
"minLength": 1,
"type": "string"
},
"maxItems": 100,
"type": "array"
},
"passthroughColumnsSet": {
"description": "Pass through all columns from the original dataset",
"enum": [
"all"
],
"type": "string"
},
"pinnedModelId": {
"description": "Specify a model ID used for scoring",
"type": "string"
},
"predictionInstance": {
"description": "Override the default prediction instance from the deployment when scoring this job.",
"properties": {
"apiKey": {
"description": "By default, prediction requests will use the API key of the user that created the job. This allows you to make requests on behalf of other users.",
"type": "string"
},
"datarobotKey": {
"description": "If running a job against a prediction instance in the Managed AI Cloud, you must provide the organization level DataRobot-Key.",
"type": "string"
},
"hostName": {
"description": "Override the default host name of the deployment with this.",
"type": "string"
},
"sslEnabled": {
"default": true,
"description": "Use SSL (HTTPS) when communicating with the overriden prediction server.",
"type": "boolean"
}
},
"required": [
"hostName",
"sslEnabled"
],
"type": "object"
},
"predictionWarningEnabled": {
"description": "Enable prediction warnings.",
"type": [
"boolean",
"null"
]
},
"redactedFields": {
"description": "A list of qualified field names from intake- and/or outputSettings that was redacted due to permissions and sharing settings. For example: intakeSettings.dataStoreId",
"items": {
"description": "Field names that are potentially redacted",
"type": "string"
},
"type": "array",
"x-versionadded": "v2.30"
},
"skipDriftTracking": {
"default": false,
"description": "Skip drift tracking for this job.",
"type": "boolean"
},
"thresholdHigh": {
"description": "Compute explanations for predictions above this threshold",
"type": "number"
},
"thresholdLow": {
"description": "Compute explanations for predictions below this threshold",
"type": "number"
},
"timeseriesSettings": {
"description": "Time Series settings included of this job is a Time Series job.",
"oneOf": [
{
"properties": {
"forecastPoint": {
"description": "Used for forecast predictions in order to override the inferred forecast point from the dataset.",
"format": "date-time",
"type": "string"
},
"relaxKnownInAdvanceFeaturesCheck": {
"default": false,
"description": "If activated, missing values in the known in advance features are allowed in the forecast window at prediction time. If omitted or false, missing values are not allowed.",
"type": "boolean"
},
"type": {
"description": "Forecast mode makes predictions using forecastPoint or rows in the dataset without target.",
"enum": [
"forecast"
],
"type": "string"
}
},
"required": [
"type"
],
"type": "object"
},
{
"properties": {
"predictionsEndDate": {
"description": "Used for historical predictions in order to override date to which predictions should be calculated. By default value will be inferred automatically from the dataset.",
"format": "date-time",
"type": "string"
},
"predictionsStartDate": {
"description": "Used for historical predictions in order to override date from which predictions should be calculated. By default value will be inferred automatically from the dataset.",
"format": "date-time",
"type": "string"
},
"relaxKnownInAdvanceFeaturesCheck": {
"default": false,
"description": "If activated, missing values in the known in advance features are allowed in the forecast window at prediction time. If omitted or false, missing values are not allowed.",
"type": "boolean"
},
"type": {
"description": "Historical mode enables bulk predictions which calculates predictions for all possible forecast points and forecast distances in the dataset within the predictionsStartDate/predictionsEndDate range.",
"enum": [
"historical"
],
"type": "string"
}
},
"required": [
"type"
],
"type": "object"
}
],
"x-versionadded": "v2.30"
}
},
"required": [
"abortOnError",
"csvSettings",
"disableRowLevelErrorHandling",
"includePredictionStatus",
"includeProbabilities",
"includeProbabilitiesClasses",
"intakeSettings",
"maxExplanations",
"redactedFields",
"skipDriftTracking"
],
"type": "object",
"x-versionadded": "v2.35"
},
"links": {
"description": "Links useful for this job",
"properties": {
"csvUpload": {
"description": "The URL used to upload the dataset for this job. Only available for localFile intake.",
"format": "url",
"type": "string"
},
"download": {
"description": "The URL used to download the results from this job. Only available for localFile outputs. Will be null if the download is not yet available.",
"type": [
"string",
"null"
]
},
"self": {
"description": "The URL used access this job.",
"format": "url",
"type": "string"
}
},
"required": [
"self"
],
"type": "object",
"x-versionadded": "v2.35"
},
"logs": {
"description": "The job log.",
"items": {
"description": "A log line from the job log.",
"type": "string"
},
"type": "array"
},
"monitoringBatchId": {
"description": "Id of the monitoring batch created by this job. Only present if the job runs on a deployment with batch monitoring enabled.",
"type": [
"string",
"null"
],
"x-versionadded": "v2.33"
},
"percentageCompleted": {
"description": "Indicates job progress which is based on number of already processed rows in dataset",
"maximum": 100,
"minimum": 0,
"type": "number"
},
"queuePosition": {
"description": "To ensure a dedicated prediction instance is not overloaded, only one job will be run against it at a time. This is the number of jobs that are awaiting processing before this job start running. May not be available in all environments.",
"minimum": 0,
"type": [
"integer",
"null"
],
"x-versionadded": "v2.30"
},
"queued": {
"description": "The job has been put on the queue for execution.",
"type": "boolean",
"x-versionadded": "v2.30"
},
"resultsDeleted": {
"description": "Indicates if the job was subject to garbage collection and had its artifacts deleted (output files, if any, and scoring data on local storage)",
"type": "boolean",
"x-versionadded": "v2.30"
},
"scoredRows": {
"description": "Number of rows that have been used in prediction computation",
"minimum": 0,
"type": "integer"
},
"skippedRows": {
"description": "Number of rows that have been skipped during scoring. May contain non-zero value only in time-series predictions case if provided dataset contains more than required historical rows.",
"minimum": 0,
"type": "integer",
"x-versionadded": "v2.30"
},
"source": {
"description": "Source from which batch job was started",
"type": "string",
"x-versionadded": "v2.30"
},
"status": {
"description": "The current job status",
"enum": [
"INITIALIZING",
"RUNNING",
"COMPLETED",
"ABORTED",
"FAILED"
],
"type": "string"
},
"statusDetails": {
"description": "Explanation for current status",
"type": "string"
}
},
"required": [
"created",
"createdBy",
"elapsedTimeSec",
"failedRows",
"id",
"jobIntakeSize",
"jobOutputSize",
"jobSpec",
"links",
"logs",
"monitoringBatchId",
"percentageCompleted",
"queued",
"scoredRows",
"skippedRows",
"status",
"statusDetails"
],
"type": "object",
"x-versionadded": "v2.35"
}
Responses
Status |
Meaning |
Description |
Schema |
200 |
OK |
Job details for the requested Batch job |
BatchJobResponse |
To perform this operation, you must be authenticated by means of one of the following methods:
BearerAuth
PUT /api/v2/batchJobs/{batchJobId}/csvUpload/
Stream CSV data to the job. Only available for jobs thatuses the localFile intake option.
Parameters
Name |
In |
Type |
Required |
Description |
batchJobId |
path |
string |
true |
ID of the Batch job |
partNumber |
path |
integer |
true |
The number of which csv part is being uploaded when using multipart upload |
Responses
Status |
Meaning |
Description |
Schema |
202 |
Accepted |
Job data was successfully submitted |
None |
404 |
Not Found |
Job does not exist or does not require data |
None |
409 |
Conflict |
Dataset upload has already begun |
None |
415 |
Unsupported Media Type |
Not acceptable MIME type |
None |
422 |
Unprocessable Entity |
Job was "ABORTED" due to too many errors in the data |
None |
To perform this operation, you must be authenticated by means of one of the following methods:
BearerAuth
GET /api/v2/batchJobs/{batchJobId}/download/
This is only valid for jobs scored using the "localFile" output option
Parameters
Name |
In |
Type |
Required |
Description |
batchJobId |
path |
string |
true |
ID of the Batch job |
partNumber |
path |
integer |
true |
The number of which csv part is being uploaded when using multipart upload |
Responses
Status |
Meaning |
Description |
Schema |
200 |
OK |
Job was downloaded correctly |
None |
404 |
Not Found |
Job does not exist or is not completed |
None |
406 |
Not Acceptable |
Not acceptable MIME type |
None |
422 |
Unprocessable Entity |
Job was "ABORTED" due to too many errors in the data |
None |
Status |
Header |
Type |
Format |
Description |
200 |
Content-Disposition |
string |
|
Contains an auto generated filename for this download ("attachment;filename=result-.csv"). |
200 |
Content-Type |
string |
|
MIME type of the returned data |
To perform this operation, you must be authenticated by means of one of the following methods:
BearerAuth
GET /api/v2/batchPredictionJobDefinitions/
List all Batch Prediction jobs definitions available
Parameters
Name |
In |
Type |
Required |
Description |
offset |
query |
integer |
true |
This many results will be skipped |
limit |
query |
integer |
true |
At most this many results are returned |
searchName |
query |
string |
false |
A human-readable name for the definition, must be unique across organisations. |
deploymentId |
query |
string |
false |
Includes only definitions for this particular deployment |
Example responses
200 Response
{
"properties": {
"count": {
"description": "Number of items returned on this page.",
"type": "integer"
},
"data": {
"description": "An array of scheduled jobs",
"items": {
"properties": {
"batchPredictionJob": {
"description": "The Batch Prediction Job specification to be put on the queue in intervals",
"properties": {
"abortOnError": {
"default": true,
"description": "Should this job abort if too many errors are encountered",
"type": "boolean"
},
"batchJobType": {
"description": "Batch job type.",
"enum": [
"monitoring",
"prediction"
],
"type": "string",
"x-versionadded": "v2.30"
},
"chunkSize": {
"default": "auto",
"description": "Which strategy should be used to determine the chunk size. Can be either a named strategy or a fixed size in bytes.",
"oneOf": [
{
"enum": [
"auto",
"fixed",
"dynamic"
],
"type": "string"
},
{
"maximum": 41943040,
"minimum": 20,
"type": "integer"
}
]
},
"columnNamesRemapping": {
"description": "Remap (rename or remove columns from) the output from this job",
"oneOf": [
{
"description": "Provide a dictionary with key/value pairs to remap (deprecated)",
"type": "object"
},
{
"description": "Provide a list of items to remap",
"items": {
"properties": {
"inputName": {
"description": "Rename column with this name",
"type": "string"
},
"outputName": {
"description": "Rename column to this name (leave as null to remove from the output)",
"type": [
"string",
"null"
]
}
},
"required": [
"inputName",
"outputName"
],
"type": "object"
},
"maxItems": 1000,
"type": "array"
}
]
},
"csvSettings": {
"description": "The CSV settings used for this job",
"properties": {
"delimiter": {
"default": ",",
"description": "CSV fields are delimited by this character. Use the string \"tab\" to denote TSV (TAB separated values).",
"oneOf": [
{
"enum": [
"tab"
],
"type": "string"
},
{
"maxLength": 1,
"minLength": 1,
"type": "string"
}
]
},
"encoding": {
"default": "utf-8",
"description": "The encoding to be used for intake and output. For example (but not limited to): \"shift_jis\", \"latin_1\" or \"mskanji\".",
"type": "string"
},
"quotechar": {
"default": "\"",
"description": "Fields containing the delimiter or newlines must be quoted using this character.",
"maxLength": 1,
"minLength": 1,
"type": "string"
}
},
"required": [
"delimiter",
"encoding",
"quotechar"
],
"type": "object"
},
"deploymentId": {
"description": "ID of deployment which is used in job for processing predictions dataset",
"type": "string"
},
"disableRowLevelErrorHandling": {
"default": false,
"description": "Skip row by row error handling",
"type": "boolean"
},
"explanationAlgorithm": {
"description": "Which algorithm will be used to calculate prediction explanations",
"enum": [
"shap",
"xemp"
],
"type": "string"
},
"explanationClassNames": {
"description": "List of class names that will be explained for each row for multiclass. Mutually exclusive with explanationNumTopClasses. If neither specified - we assume explanationNumTopClasses=1",
"items": {
"description": "Class name to explain",
"type": "string"
},
"maxItems": 10,
"minItems": 1,
"type": "array",
"x-versionadded": "v2.30"
},
"explanationNumTopClasses": {
"description": "Number of top predicted classes for each row that will be explained for multiclass. Mutually exclusive with explanationClassNames. If neither specified - we assume explanationNumTopClasses=1",
"maximum": 10,
"minimum": 1,
"type": "integer",
"x-versionadded": "v2.30"
},
"includePredictionStatus": {
"default": false,
"description": "Include prediction status column in the output",
"type": "boolean"
},
"includeProbabilities": {
"default": true,
"description": "Include probabilities for all classes",
"type": "boolean"
},
"includeProbabilitiesClasses": {
"default": [],
"description": "Include only probabilities for these specific class names.",
"items": {
"description": "Include probability for this class name",
"type": "string"
},
"maxItems": 100,
"type": "array"
},
"intakeSettings": {
"default": {
"type": "localFile"
},
"description": "The response option configured for this job",
"oneOf": [
{
"description": "Stream CSV data chunks from Azure",
"properties": {
"credentialId": {
"description": "Either the populated value of the field or [redacted] due to permission settings",
"oneOf": [
{
"description": "Use the specified credential to access the url",
"type": [
"string",
"null"
]
},
{
"enum": [
"[redacted]"
],
"type": "string"
}
]
},
"format": {
"default": "csv",
"description": "Type of input file format",
"enum": [
"csv",
"parquet"
],
"type": "string",
"x-versionadded": "v2.25"
},
"type": {
"description": "Type name for this intake type",
"enum": [
"azure"
],
"type": "string"
},
"url": {
"description": "URL for the CSV file",
"format": "url",
"type": "string"
}
},
"required": [
"type",
"url"
],
"type": "object"
},
{
"description": "Stream CSV data chunks from data stage storage",
"properties": {
"dataStageId": {
"description": "The ID of the data stage",
"type": "string"
},
"type": {
"description": "Type name for this intake type",
"enum": [
"dataStage"
],
"type": "string"
}
},
"required": [
"dataStageId",
"type"
],
"type": "object"
},
{
"description": "Stream CSV data chunks from AI catalog dataset",
"properties": {
"datasetId": {
"description": "Either the populated value of the field or [redacted] due to permission settings",
"oneOf": [
{
"description": "The ID of the AI catalog dataset",
"type": "string"
},
{
"enum": [
"[redacted]"
],
"type": "string"
}
]
},
"datasetVersionId": {
"description": "The ID of the AI catalog dataset version",
"type": "string"
},
"type": {
"description": "Type name for this intake type",
"enum": [
"dataset"
],
"type": "string"
}
},
"required": [
"datasetId",
"type"
],
"type": "object"
},
{
"description": "Stream CSV data chunks from Google Storage",
"properties": {
"credentialId": {
"description": "Either the populated value of the field or [redacted] due to permission settings",
"oneOf": [
{
"description": "Use the specified credential to access the url",
"type": [
"string",
"null"
]
},
{
"enum": [
"[redacted]"
],
"type": "string"
}
]
},
"format": {
"default": "csv",
"description": "Type of input file format",
"enum": [
"csv",
"parquet"
],
"type": "string",
"x-versionadded": "v2.25"
},
"type": {
"description": "Type name for this intake type",
"enum": [
"gcp"
],
"type": "string"
},
"url": {
"description": "URL for the CSV file",
"format": "url",
"type": "string"
}
},
"required": [
"type",
"url"
],
"type": "object"
},
{
"description": "Stream CSV data chunks from Big Query using GCS",
"properties": {
"bucket": {
"description": "The name of gcs bucket for data export",
"type": "string"
},
"credentialId": {
"description": "Either the populated value of the field or [redacted] due to permission settings",
"oneOf": [
{
"description": "The ID of the GCP credentials",
"type": "string"
},
{
"enum": [
"[redacted]"
],
"type": "string"
}
]
},
"dataset": {
"description": "The name of the specified big query dataset to read input data from",
"type": "string"
},
"table": {
"description": "The name of the specified big query table to read input data from",
"type": "string"
},
"type": {
"description": "Type name for this intake type",
"enum": [
"bigquery"
],
"type": "string"
}
},
"required": [
"bucket",
"credentialId",
"dataset",
"table",
"type"
],
"type": "object"
},
{
"description": "Stream CSV data chunks from Amazon Cloud Storage S3",
"properties": {
"credentialId": {
"description": "Either the populated value of the field or [redacted] due to permission settings",
"oneOf": [
{
"description": "Use the specified credential to access the url",
"type": [
"string",
"null"
]
},
{
"enum": [
"[redacted]"
],
"type": "string"
}
]
},
"endpointUrl": {
"description": "Endpoint URL for the S3 connection (omit to use the default)",
"format": "url",
"type": "string",
"x-versionadded": "v2.29"
},
"format": {
"default": "csv",
"description": "Type of input file format",
"enum": [
"csv",
"parquet"
],
"type": "string",
"x-versionadded": "v2.25"
},
"type": {
"description": "Type name for this intake type",
"enum": [
"s3"
],
"type": "string"
},
"url": {
"description": "URL for the CSV file",
"format": "url",
"type": "string"
}
},
"required": [
"type",
"url"
],
"type": "object"
},
{
"description": "Stream CSV data chunks from Snowflake",
"properties": {
"catalog": {
"description": "The name of the specified database catalog to read input data from.",
"type": "string",
"x-versionadded": "v2.28"
},
"cloudStorageCredentialId": {
"description": "Either the populated value of the field or [redacted] due to permission settings",
"oneOf": [
{
"description": "The ID of the credential holding information about a user with read access to the cloud storage.",
"type": [
"string",
"null"
]
},
{
"enum": [
"[redacted]"
],
"type": "string"
}
]
},
"cloudStorageType": {
"default": "s3",
"description": "Type name for cloud storage",
"enum": [
"azure",
"gcp",
"s3"
],
"type": "string"
},
"credentialId": {
"description": "Either the populated value of the field or [redacted] due to permission settings",
"oneOf": [
{
"description": "The ID of the credential holding information about a user with read access to the Snowflake data source.",
"type": [
"string",
"null"
]
},
{
"enum": [
"[redacted]"
],
"type": "string"
}
]
},
"dataStoreId": {
"description": "Either the populated value of the field or [redacted] due to permission settings",
"oneOf": [
{
"description": "ID of the data store to connect to",
"type": "string"
},
{
"enum": [
"[redacted]"
],
"type": "string"
}
]
},
"externalStage": {
"description": "External storage",
"type": "string"
},
"query": {
"description": "A self-supplied SELECT statement of the dataset you wish to score. Helpful for supplying a more fine-grained selection of data not achievable through specification of \"table\" and/or \"schema\" parameters exclusively.If this job is executed with a job definition, then template variables are available which will be substituted for timestamps: {{ current_run_timestamp }}, {{ last_completed_run_time }}, {{ last_scheduled_run_time }}, {{ next_scheduled_run_time }}, {{ current_run_time }}",
"type": "string"
},
"schema": {
"description": "The name of the specified database schema to read input data from.",
"type": "string"
},
"table": {
"description": "The name of the specified database table to read input data from.",
"type": "string"
},
"type": {
"description": "Type name for this intake type",
"enum": [
"snowflake"
],
"type": "string"
}
},
"required": [
"dataStoreId",
"externalStage",
"type"
],
"type": "object"
},
{
"description": "Stream CSV data chunks from Azure Synapse",
"properties": {
"cloudStorageCredentialId": {
"description": "Either the populated value of the field or [redacted] due to permission settings",
"oneOf": [
{
"description": "The ID of the Azure credential holding information about a user with read access to the cloud storage.",
"type": [
"string",
"null"
]
},
{
"enum": [
"[redacted]"
],
"type": "string"
}
]
},
"credentialId": {
"description": "Either the populated value of the field or [redacted] due to permission settings",
"oneOf": [
{
"description": "The ID of the credential holding information about a user with read access to the JDBC data source.",
"type": [
"string",
"null"
]
},
{
"enum": [
"[redacted]"
],
"type": "string"
}
]
},
"dataStoreId": {
"description": "Either the populated value of the field or [redacted] due to permission settings",
"oneOf": [
{
"description": "ID of the data store to connect to",
"type": "string"
},
{
"enum": [
"[redacted]"
],
"type": "string"
}
]
},
"externalDataSource": {
"description": "External datasource name",
"type": "string"
},
"query": {
"description": "A self-supplied SELECT statement of the dataset you wish to score. Helpful for supplying a more fine-grained selection of data not achievable through specification of \"table\" and/or \"schema\" parameters exclusively.If this job is executed with a job definition, then template variables are available which will be substituted for timestamps: {{ current_run_timestamp }}, {{ last_completed_run_time }}, {{ last_scheduled_run_time }}, {{ next_scheduled_run_time }}, {{ current_run_time }}",
"type": "string"
},
"schema": {
"description": "The name of the specified database schema to read input data from.",
"type": "string"
},
"table": {
"description": "The name of the specified database table to read input data from.",
"type": "string"
},
"type": {
"description": "Type name for this intake type",
"enum": [
"synapse"
],
"type": "string"
}
},
"required": [
"dataStoreId",
"externalDataSource",
"type"
],
"type": "object"
},
{
"description": "Stream CSV data chunks from DSS dataset",
"properties": {
"datasetId": {
"description": "Either the populated value of the field or [redacted] due to permission settings",
"oneOf": [
{
"description": "The ID of the dataset",
"type": "string"
},
{
"enum": [
"[redacted]"
],
"type": "string"
}
]
},
"partition": {
"default": null,
"description": "Partition used to predict",
"enum": [
"holdout",
"validation",
"allBacktests",
null
],
"type": [
"string",
"null"
]
},
"projectId": {
"description": "The ID of the project",
"type": "string"
},
"type": {
"description": "Type name for this intake type",
"enum": [
"dss"
],
"type": "string"
}
},
"required": [
"projectId",
"type"
],
"type": "object"
},
{
"properties": {
"path": {
"description": "Path to data on host filesystem",
"type": "string"
},
"type": {
"description": "Type name for this intake type",
"enum": [
"filesystem"
],
"type": "string"
}
},
"required": [
"path",
"type"
],
"type": "object"
},
{
"description": "Stream CSV data chunks from HTTP",
"properties": {
"type": {
"description": "Type name for this intake type",
"enum": [
"http"
],
"type": "string"
},
"url": {
"description": "URL for the CSV file",
"format": "url",
"type": "string"
}
},
"required": [
"type",
"url"
],
"type": "object"
},
{
"description": "Stream CSV data chunks from JDBC",
"properties": {
"catalog": {
"description": "The name of the specified database catalog to read input data from.",
"type": "string",
"x-versionadded": "v2.22"
},
"credentialId": {
"description": "Either the populated value of the field or [redacted] due to permission settings",
"oneOf": [
{
"description": "The ID of the credential holding information about a user with read access to the JDBC data source.",
"type": [
"string",
"null"
]
},
{
"enum": [
"[redacted]"
],
"type": "string"
}
]
},
"dataStoreId": {
"description": "Either the populated value of the field or [redacted] due to permission settings",
"oneOf": [
{
"description": "ID of the data store to connect to",
"type": "string"
},
{
"enum": [
"[redacted]"
],
"type": "string"
}
]
},
"fetchSize": {
"description": "A user specified fetch size. Changing it can be used to balance throughput and memory usage. Deprecated and ignored since v2.21.",
"maximum": 1000000,
"minimum": 1,
"type": "integer"
},
"query": {
"description": "A self-supplied SELECT statement of the dataset you wish to score. Helpful for supplying a more fine-grained selection of data not achievable through specification of \"table\" and/or \"schema\" parameters exclusively.If this job is executed with a job definition, then template variables are available which will be substituted for timestamps: {{ current_run_timestamp }}, {{ last_completed_run_time }}, {{ last_scheduled_run_time }}, {{ next_scheduled_run_time }}, {{ current_run_time }}",
"type": "string"
},
"schema": {
"description": "The name of the specified database schema to read input data from.",
"type": "string"
},
"table": {
"description": "The name of the specified database table to read input data from.",
"type": "string"
},
"type": {
"description": "Type name for this intake type",
"enum": [
"jdbc"
],
"type": "string"
}
},
"required": [
"dataStoreId",
"type"
],
"type": "object"
},
{
"description": "Stream CSV data chunks from local file storage",
"properties": {
"async": {
"description": "The default behavior (async: true) will still submit the job to the queue and start processing as soon as the upload is started.Setting it to false will postpone submitting the job to the queue until all data has been uploaded.This is helpful if the user is on a bad connection and bottlednecked by the upload speed. Instead of blocking the queue this will allow others to submit to the queue until the upload has finished.",
"type": [
"boolean",
"null"
],
"x-versionadded": "v2.28"
},
"multipart": {
"description": "specify if the data will be uploaded in multiple parts instead of a single file",
"type": "boolean",
"x-versionadded": "v2.27"
},
"type": {
"description": "Type name for this intake type",
"enum": [
"local_file",
"localFile"
],
"type": "string"
}
},
"required": [
"type"
],
"type": "object"
},
{
"description": "Stream CSV data chunks from Datasphere using browser-datasphere",
"properties": {
"catalog": {
"description": "The name of the specified database catalog to read input data from.",
"type": "string"
},
"credentialId": {
"description": "Either the populated value of the field or [redacted] due to permission settings",
"oneOf": [
{
"description": "The ID of the credential holding information about a user with read access to the data source.",
"type": "string"
},
{
"enum": [
"[redacted]"
],
"type": "string"
}
]
},
"dataStoreId": {
"description": "Either the populated value of the field or [redacted] due to permission settings",
"oneOf": [
{
"description": "ID of the data store to connect to",
"type": "string"
},
{
"enum": [
"[redacted]"
],
"type": "string"
}
]
},
"schema": {
"description": "The name of the specified database schema to read input data from.",
"type": "string"
},
"table": {
"description": "The name of the specified database table to read input data from.",
"type": "string"
},
"type": {
"description": "Type name for this intake type",
"enum": [
"datasphere"
],
"type": "string"
}
},
"required": [
"credentialId",
"dataStoreId",
"schema",
"table",
"type"
],
"type": "object",
"x-versionadded": "v2.35"
}
]
},
"maxExplanations": {
"default": 0,
"description": "Number of explanations requested. Will be ordered by strength.",
"maximum": 100,
"minimum": 0,
"type": "integer"
},
"maxNgramExplanations": {
"description": "The maximum number of text ngram explanations to supply per row of the dataset. The default recommended `maxNgramExplanations` is `all` (no limit)",
"oneOf": [
{
"minimum": 0,
"type": "integer"
},
{
"enum": [
"all"
],
"type": "string"
}
],
"x-versionadded": "v2.30"
},
"modelId": {
"description": "ID of leaderboard model which is used in job for processing predictions dataset",
"type": "string",
"x-versionadded": "v2.30"
},
"modelPackageId": {
"description": "ID of model package from registry is used in job for processing predictions dataset",
"type": "string",
"x-versionadded": "v2.30"
},
"monitoringAggregation": {
"description": "Defines the aggregation policy for monitoring jobs.",
"properties": {
"retentionPolicy": {
"default": "percentage",
"description": "Monitoring jobs retention policy for aggregation.",
"enum": [
"samples",
"percentage"
],
"type": "string"
},
"retentionValue": {
"default": 0,
"description": "Amount/percentage of samples to retain.",
"type": "integer"
}
},
"type": "object"
},
"monitoringBatchPrefix": {
"description": "Name of the batch to create with this job",
"type": [
"string",
"null"
]
},
"monitoringColumns": {
"description": "Column names mapping for monitoring",
"properties": {
"actedUponColumn": {
"description": "Name of column that contains value for acted_on.",
"type": "string"
},
"actualsTimestampColumn": {
"description": "Name of column that contains actual timestamps.",
"type": "string"
},
"actualsValueColumn": {
"description": "Name of column that contains actuals value.",
"type": "string"
},
"associationIdColumn": {
"description": "Name of column that contains association Id.",
"type": "string"
},
"customMetricId": {
"description": "Id of custom metric to process values for.",
"type": "string"
},
"customMetricTimestampColumn": {
"description": "Name of column that contains custom metric values timestamps.",
"type": "string"
},
"customMetricTimestampFormat": {
"description": "Format of timestamps from customMetricTimestampColumn.",
"type": "string"
},
"customMetricValueColumn": {
"description": "Name of column that contains values for custom metric.",
"type": "string"
},
"monitoredStatusColumn": {
"description": "Column name used to mark monitored rows.",
"type": "string"
},
"predictionsColumns": {
"description": "Name of the column(s) which contain prediction values.",
"oneOf": [
{
"description": "Map containing column name(s) and class name(s) for multiclass problem.",
"items": {
"properties": {
"className": {
"description": "Class name.",
"type": "string"
},
"columnName": {
"description": "Column name that contains the prediction for a specific class.",
"type": "string"
}
},
"required": [
"className",
"columnName"
],
"type": "object"
},
"maxItems": 100,
"type": "array"
},
{
"description": "Column name that contains the prediction for regressions problem.",
"type": "string"
}
]
},
"reportDrift": {
"description": "True to report drift, False otherwise.",
"type": "boolean"
},
"reportPredictions": {
"description": "True to report prediction, False otherwise.",
"type": "boolean"
},
"uniqueRowIdentifierColumns": {
"description": "Column(s) name of unique row identifiers.",
"items": {
"type": "string"
},
"maxItems": 100,
"type": "array"
}
},
"type": "object"
},
"monitoringOutputSettings": {
"description": "Output settings for monitoring jobs",
"properties": {
"monitoredStatusColumn": {
"description": "Column name used to mark monitored rows.",
"type": "string"
},
"uniqueRowIdentifierColumns": {
"description": "Column(s) name of unique row identifiers.",
"items": {
"type": "string"
},
"maxItems": 100,
"type": "array"
}
},
"required": [
"monitoredStatusColumn",
"uniqueRowIdentifierColumns"
],
"type": "object"
},
"numConcurrent": {
"description": "Number of simultaneous requests to run against the prediction instance",
"minimum": 0,
"type": "integer"
},
"outputSettings": {
"description": "The response option configured for this job",
"oneOf": [
{
"description": "Save CSV data chunks to Azure Blob Storage",
"properties": {
"credentialId": {
"description": "Either the populated value of the field or [redacted] due to permission settings",
"oneOf": [
{
"description": "Use the specified credential to access the url",
"type": [
"string",
"null"
]
},
{
"enum": [
"[redacted]"
],
"type": "string"
}
]
},
"format": {
"default": "csv",
"description": "Type of output file format",
"enum": [
"csv",
"parquet"
],
"type": "string",
"x-versionadded": "v2.25"
},
"partitionColumns": {
"description": "For Parquet directory-scoring only. The column names of the intake data of which to partition the dataset. Columns are partitioned in the order they are given. At least one value is required if scoring to a directory (meaning the output url ends with a slash (\"/\").",
"items": {
"type": "string"
},
"maxItems": 100,
"type": "array",
"x-versionadded": "v2.26"
},
"type": {
"description": "Type name for this output type",
"enum": [
"azure"
],
"type": "string"
},
"url": {
"description": "URL for the file or directory",
"format": "url",
"type": "string"
}
},
"required": [
"type",
"url"
],
"type": "object"
},
{
"description": "Save CSV data chunks to Google Storage",
"properties": {
"credentialId": {
"description": "Either the populated value of the field or [redacted] due to permission settings",
"oneOf": [
{
"description": "Use the specified credential to access the url",
"type": [
"string",
"null"
]
},
{
"enum": [
"[redacted]"
],
"type": "string"
}
]
},
"format": {
"default": "csv",
"description": "Type of input file format",
"enum": [
"csv",
"parquet"
],
"type": "string",
"x-versionadded": "v2.25"
},
"partitionColumns": {
"description": "For Parquet directory-scoring only. The column names of the intake data of which to partition the dataset. Columns are partitioned in the order they are given. At least one value is required if scoring to a directory (meaning the output url ends with a slash (\"/\").",
"items": {
"type": "string"
},
"maxItems": 100,
"type": "array",
"x-versionadded": "v2.26"
},
"type": {
"description": "Type name for this output type",
"enum": [
"gcp"
],
"type": "string"
},
"url": {
"description": "URL for the CSV file",
"format": "url",
"type": "string"
}
},
"required": [
"type",
"url"
],
"type": "object"
},
{
"description": "Save CSV data chunks to Google BigQuery in bulk",
"properties": {
"bucket": {
"description": "The name of gcs bucket for data loading",
"type": "string"
},
"credentialId": {
"description": "Either the populated value of the field or [redacted] due to permission settings",
"oneOf": [
{
"description": "The ID of the GCP credentials",
"type": "string"
},
{
"enum": [
"[redacted]"
],
"type": "string"
}
]
},
"dataset": {
"description": "The name of the specified big query dataset to write data back",
"type": "string"
},
"table": {
"description": "The name of the specified big query table to write data back",
"type": "string"
},
"type": {
"description": "Type name for this output type",
"enum": [
"bigquery"
],
"type": "string"
}
},
"required": [
"bucket",
"credentialId",
"dataset",
"table",
"type"
],
"type": "object"
},
{
"description": "Saves CSV data chunks to Amazon Cloud Storage S3",
"properties": {
"credentialId": {
"description": "Either the populated value of the field or [redacted] due to permission settings",
"oneOf": [
{
"description": "Use the specified credential to access the url",
"type": [
"string",
"null"
]
},
{
"enum": [
"[redacted]"
],
"type": "string"
}
]
},
"endpointUrl": {
"description": "Endpoint URL for the S3 connection (omit to use the default)",
"format": "url",
"type": "string",
"x-versionadded": "v2.29"
},
"format": {
"default": "csv",
"description": "Type of output file format",
"enum": [
"csv",
"parquet"
],
"type": "string",
"x-versionadded": "v2.25"
},
"partitionColumns": {
"description": "For Parquet directory-scoring only. The column names of the intake data of which to partition the dataset. Columns are partitioned in the order they are given. At least one value is required if scoring to a directory (meaning the output url ends with a slash (\"/\").",
"items": {
"type": "string"
},
"maxItems": 100,
"type": "array",
"x-versionadded": "v2.26"
},
"serverSideEncryption": {
"description": "Configure Server-Side Encryption for S3 output",
"properties": {
"algorithm": {
"description": "The server-side encryption algorithm used when storing this object in Amazon S3 (for example, AES256, aws:kms).",
"type": "string"
},
"customerAlgorithm": {
"description": "Specifies the algorithm to use to when encrypting the object (for example, AES256).",
"type": "string"
},
"customerKey": {
"description": "Specifies the customer-provided encryption key for Amazon S3 to use in encrypting data. This value is used to store the object and then it is discarded; Amazon S3 does not store the encryption key. The key must be appropriate for use with the algorithm specified in customerAlgorithm. The key must be sent as an base64 encoded string.",
"type": "string"
},
"kmsEncryptionContext": {
"description": "Specifies the Amazon Web Services KMS Encryption Context to use for object encryption. The value of this header is a base64-encoded UTF-8 string holding JSON with the encryption context key-value pairs.",
"type": "string"
},
"kmsKeyId": {
"description": "Specifies the ID of the symmetric customer managed key to use for object encryption.",
"type": "string"
}
},
"type": "object"
},
"type": {
"description": "Type name for this output type",
"enum": [
"s3"
],
"type": "string"
},
"url": {
"description": "URL for the CSV file",
"format": "url",
"type": "string"
}
},
"required": [
"type",
"url"
],
"type": "object"
},
{
"description": "Save CSV data chunks to Snowflake in bulk",
"properties": {
"catalog": {
"description": "The name of the specified database catalog to write output data to.",
"type": "string",
"x-versionadded": "v2.28"
},
"cloudStorageCredentialId": {
"description": "Either the populated value of the field or [redacted] due to permission settings",
"oneOf": [
{
"description": "The ID of the credential holding information about a user with write access to the cloud storage.",
"type": [
"string",
"null"
]
},
{
"enum": [
"[redacted]"
],
"type": "string"
}
]
},
"cloudStorageType": {
"default": "s3",
"description": "Type name for cloud storage",
"enum": [
"azure",
"gcp",
"s3"
],
"type": "string"
},
"createTableIfNotExists": {
"default": false,
"description": "Attempt to create the table first if no existing one is detected, before writing data with the strategy defined in the `statementType` parameter.",
"type": "boolean",
"x-versionadded": "v2.25"
},
"credentialId": {
"description": "Either the populated value of the field or [redacted] due to permission settings",
"oneOf": [
{
"description": "The ID of the credential holding information about a user with write access to the Snowflake data source.",
"type": [
"string",
"null"
]
},
{
"enum": [
"[redacted]"
],
"type": "string"
}
]
},
"dataStoreId": {
"description": "Either the populated value of the field or [redacted] due to permission settings",
"oneOf": [
{
"description": "ID of the data store to connect to",
"type": "string"
},
{
"enum": [
"[redacted]"
],
"type": "string"
}
]
},
"externalStage": {
"description": "External storage",
"type": "string"
},
"schema": {
"description": "The name of the specified database schema to write results to.",
"type": "string"
},
"statementType": {
"description": "The statement type to use when writing the results.",
"enum": [
"insert",
"create_table",
"createTable"
],
"type": "string"
},
"table": {
"description": "The name of the specified database table to write results to.",
"type": "string"
},
"type": {
"description": "Type name for this output type",
"enum": [
"snowflake"
],
"type": "string"
}
},
"required": [
"dataStoreId",
"externalStage",
"statementType",
"table",
"type"
],
"type": "object"
},
{
"description": "Save CSV data chunks to Azure Synapse in bulk",
"properties": {
"cloudStorageCredentialId": {
"description": "Either the populated value of the field or [redacted] due to permission settings",
"oneOf": [
{
"description": "The ID of the credential holding information about a user with write access to the cloud storage.",
"type": [
"string",
"null"
]
},
{
"enum": [
"[redacted]"
],
"type": "string"
}
]
},
"createTableIfNotExists": {
"default": false,
"description": "Attempt to create the table first if no existing one is detected, before writing data with the strategy defined in the `statementType` parameter.",
"type": "boolean",
"x-versionadded": "v2.25"
},
"credentialId": {
"description": "Either the populated value of the field or [redacted] due to permission settings",
"oneOf": [
{
"description": "The ID of the credential holding information about a user with write access to the JDBC data source.",
"type": [
"string",
"null"
]
},
{
"enum": [
"[redacted]"
],
"type": "string"
}
]
},
"dataStoreId": {
"description": "Either the populated value of the field or [redacted] due to permission settings",
"oneOf": [
{
"description": "ID of the data store to connect to",
"type": "string"
},
{
"enum": [
"[redacted]"
],
"type": "string"
}
]
},
"externalDataSource": {
"description": "External data source name",
"type": "string"
},
"schema": {
"description": "The name of the specified database schema to write results to.",
"type": "string"
},
"statementType": {
"description": "The statement type to use when writing the results.",
"enum": [
"insert",
"create_table",
"createTable"
],
"type": "string"
},
"table": {
"description": "The name of the specified database table to write results to.",
"type": "string"
},
"type": {
"description": "Type name for this output type",
"enum": [
"synapse"
],
"type": "string"
}
},
"required": [
"dataStoreId",
"externalDataSource",
"statementType",
"table",
"type"
],
"type": "object"
},
{
"properties": {
"path": {
"description": "Path to results on host filesystem",
"type": "string"
},
"type": {
"description": "Type name for this output type",
"enum": [
"filesystem"
],
"type": "string"
}
},
"required": [
"path",
"type"
],
"type": "object"
},
{
"description": "Save CSV data chunks to HTTP data endpoint",
"properties": {
"headers": {
"description": "Extra headers to send with the request",
"type": "object"
},
"method": {
"description": "Method to use when saving the CSV file",
"enum": [
"POST",
"PUT"
],
"type": "string"
},
"type": {
"description": "Type name for this output type",
"enum": [
"http"
],
"type": "string"
},
"url": {
"description": "URL for the CSV file",
"format": "url",
"type": "string"
}
},
"required": [
"method",
"type",
"url"
],
"type": "object"
},
{
"description": "Save CSV data chunks via JDBC",
"properties": {
"catalog": {
"description": "The name of the specified database catalog to write output data to.",
"type": "string",
"x-versionadded": "v2.22"
},
"commitInterval": {
"default": 600,
"description": "Defines a time interval in seconds between each commit is done to the JDBC source. If set to 0, the batch prediction operation will write the entire job before committing.",
"maximum": 86400,
"minimum": 0,
"type": "integer",
"x-versionadded": "v2.21"
},
"createTableIfNotExists": {
"default": false,
"description": "Attempt to create the table first if no existing one is detected, before writing data with the strategy defined in the `statementType` parameter.",
"type": "boolean",
"x-versionadded": "v2.24"
},
"credentialId": {
"description": "Either the populated value of the field or [redacted] due to permission settings",
"oneOf": [
{
"description": "The ID of the credential holding information about a user with write access to the JDBC data source.",
"type": [
"string",
"null"
]
},
{
"enum": [
"[redacted]"
],
"type": "string"
}
]
},
"dataStoreId": {
"description": "Either the populated value of the field or [redacted] due to permission settings",
"oneOf": [
{
"description": "ID of the data store to connect to",
"type": "string"
},
{
"enum": [
"[redacted]"
],
"type": "string"
}
]
},
"schema": {
"description": "The name of the specified database schema to write the results to.",
"type": "string"
},
"statementType": {
"description": "The statement type to use when writing the results. Deprecation Warning: Use of `create_table` is now discouraged. Use one of the other possibilities along with the parameter `createTableIfNotExists` set to `true`.",
"enum": [
"createTable",
"create_table",
"insert",
"insertUpdate",
"insert_update",
"update"
],
"type": "string"
},
"table": {
"description": "The name of the specified database table to write the results to.If this job is executed with a job definition, then template variables are available which will be substituted for timestamps: {{ current_run_timestamp }}, {{ last_completed_run_time }}, {{ last_scheduled_run_time }}, {{ next_scheduled_run_time }}, {{ current_run_time }}",
"type": "string"
},
"type": {
"description": "Type name for this intake type",
"enum": [
"jdbc"
],
"type": "string"
},
"updateColumns": {
"description": "The column names to be updated if statementType is set to either update or upsert.",
"items": {
"type": "string"
},
"maxItems": 100,
"type": "array"
},
"whereColumns": {
"description": "The column names to be used in the where clause if statementType is set to update or upsert.",
"items": {
"type": "string"
},
"maxItems": 100,
"type": "array"
}
},
"required": [
"dataStoreId",
"statementType",
"table",
"type"
],
"type": "object"
},
{
"description": "Save CSV data chunks to local file storage",
"properties": {
"type": {
"description": "Type name for this output type",
"enum": [
"local_file",
"localFile"
],
"type": "string"
}
},
"required": [
"type"
],
"type": "object"
},
{
"description": "Saves CSV data chunks to Datasphere using browser-datasphere",
"properties": {
"catalog": {
"description": "The name of the specified database catalog to read input data from.",
"type": "string"
},
"credentialId": {
"description": "Either the populated value of the field or [redacted] due to permission settings",
"oneOf": [
{
"description": "The ID of the credential holding information about a user with write access to the data destination.",
"type": "string"
},
{
"enum": [
"[redacted]"
],
"type": "string"
}
]
},
"dataStoreId": {
"description": "Either the populated value of the field or [redacted] due to permission settings",
"oneOf": [
{
"description": "The ID of the data store to connect to",
"type": "string"
},
{
"enum": [
"[redacted]"
],
"type": "string"
}
]
},
"schema": {
"description": "The name of the specified database schema to read input data from.",
"type": "string"
},
"table": {
"description": "The name of the specified database table to read input data from.",
"type": "string"
},
"type": {
"description": "The type name for this output type",
"enum": [
"datasphere"
],
"type": "string"
}
},
"required": [
"credentialId",
"dataStoreId",
"schema",
"table",
"type"
],
"type": "object",
"x-versionadded": "v2.35"
}
]
},
"passthroughColumns": {
"description": "Pass through columns from the original dataset",
"items": {
"description": "A column name from the original dataset to pass through to the resulting predictions",
"maxLength": 50,
"minLength": 1,
"type": "string"
},
"maxItems": 100,
"type": "array"
},
"passthroughColumnsSet": {
"description": "Pass through all columns from the original dataset",
"enum": [
"all"
],
"type": "string"
},
"pinnedModelId": {
"description": "Specify a model ID used for scoring",
"type": "string"
},
"predictionInstance": {
"description": "Override the default prediction instance from the deployment when scoring this job.",
"properties": {
"apiKey": {
"description": "By default, prediction requests will use the API key of the user that created the job. This allows you to make requests on behalf of other users.",
"type": "string"
},
"datarobotKey": {
"description": "If running a job against a prediction instance in the Managed AI Cloud, you must provide the organization level DataRobot-Key.",
"type": "string"
},
"hostName": {
"description": "Override the default host name of the deployment with this.",
"type": "string"
},
"sslEnabled": {
"default": true,
"description": "Use SSL (HTTPS) when communicating with the overriden prediction server.",
"type": "boolean"
}
},
"required": [
"hostName",
"sslEnabled"
],
"type": "object"
},
"predictionWarningEnabled": {
"description": "Enable prediction warnings.",
"type": [
"boolean",
"null"
]
},
"redactedFields": {
"description": "A list of qualified field names from intake- and/or outputSettings that was redacted due to permissions and sharing settings. For example: intakeSettings.dataStoreId",
"items": {
"description": "Field names that are potentially redacted",
"type": "string"
},
"type": "array",
"x-versionadded": "v2.30"
},
"skipDriftTracking": {
"default": false,
"description": "Skip drift tracking for this job.",
"type": "boolean"
},
"thresholdHigh": {
"description": "Compute explanations for predictions above this threshold",
"type": "number"
},
"thresholdLow": {
"description": "Compute explanations for predictions below this threshold",
"type": "number"
},
"timeseriesSettings": {
"description": "Time Series settings included of this job is a Time Series job.",
"oneOf": [
{
"properties": {
"forecastPoint": {
"description": "Used for forecast predictions in order to override the inferred forecast point from the dataset.",
"format": "date-time",
"type": "string"
},
"relaxKnownInAdvanceFeaturesCheck": {
"default": false,
"description": "If activated, missing values in the known in advance features are allowed in the forecast window at prediction time. If omitted or false, missing values are not allowed.",
"type": "boolean"
},
"type": {
"description": "Forecast mode makes predictions using forecastPoint or rows in the dataset without target.",
"enum": [
"forecast"
],
"type": "string"
}
},
"required": [
"type"
],
"type": "object"
},
{
"properties": {
"forecastPointPolicy": {
"description": "Forecast point policy",
"properties": {
"configuration": {
"description": "Customize if forecast point based on job run time needs to be shifted.",
"properties": {
"offset": {
"description": "Offset to apply to scheduled run time of the job in a ISO-8601 format toobtain a relative forecast point. Example of the positive offset 'P2DT5H3M', example of the negative offset '-P2DT5H4M'",
"format": "offset",
"type": "string"
}
},
"required": [
"offset"
],
"type": "object"
},
"type": {
"description": "Type of the forecast point policy. Forecast point will be based on the scheduled run time of the job or the current moment in UTC if job was launched manually. Run time can be adjusted backwards or forwards.",
"enum": [
"jobRunTimeBased"
],
"type": "string"
}
},
"required": [
"type"
],
"type": "object"
},
"relaxKnownInAdvanceFeaturesCheck": {
"default": false,
"description": "If activated, missing values in the known in advance features are allowed in the forecast window at prediction time. If omitted or false, missing values are not allowed.",
"type": "boolean"
},
"type": {
"description": "Forecast mode makes predictions using forecastPoint or rows in the dataset without target.",
"enum": [
"forecast"
],
"type": "string"
}
},
"required": [
"forecastPointPolicy",
"type"
],
"type": "object"
},
{
"properties": {
"predictionsEndDate": {
"description": "Used for historical predictions in order to override date to which predictions should be calculated. By default value will be inferred automatically from the dataset.",
"format": "date-time",
"type": "string"
},
"predictionsStartDate": {
"description": "Used for historical predictions in order to override date from which predictions should be calculated. By default value will be inferred automatically from the dataset.",
"format": "date-time",
"type": "string"
},
"relaxKnownInAdvanceFeaturesCheck": {
"default": false,
"description": "If activated, missing values in the known in advance features are allowed in the forecast window at prediction time. If omitted or false, missing values are not allowed.",
"type": "boolean"
},
"type": {
"description": "Historical mode enables bulk predictions which calculates predictions for all possible forecast points and forecast distances in the dataset within the predictionsStartDate/predictionsEndDate range.",
"enum": [
"historical"
],
"type": "string"
}
},
"required": [
"type"
],
"type": "object"
}
],
"x-versionadded": "v2.20"
}
},
"required": [
"abortOnError",
"csvSettings",
"disableRowLevelErrorHandling",
"includePredictionStatus",
"includeProbabilities",
"includeProbabilitiesClasses",
"intakeSettings",
"maxExplanations",
"numConcurrent",
"redactedFields",
"skipDriftTracking"
],
"type": "object"
},
"created": {
"description": "When was this job created",
"format": "date-time",
"type": "string"
},
"createdBy": {
"description": "Who created this job",
"properties": {
"fullName": {
"description": "The full name of the user who created this job (if defined by the user)",
"type": [
"string",
"null"
]
},
"userId": {
"description": "The User ID of the user who created this job",
"type": "string"
},
"username": {
"description": "The username (e-mail address) of the user who created this job",
"type": "string"
}
},
"required": [
"fullName",
"userId",
"username"
],
"type": "object"
},
"enabled": {
"default": false,
"description": "If this job definition is enabled as a scheduled job.",
"type": "boolean"
},
"id": {
"description": "The ID of the Batch job definition",
"type": "string"
},
"lastFailedRunTime": {
"description": "Last time this job had a failed run",
"format": "date-time",
"type": [
"string",
"null"
]
},
"lastScheduledRunTime": {
"description": "Last time this job was scheduled to run (though not guaranteed it actually ran at that time)",
"format": "date-time",
"type": [
"string",
"null"
]
},
"lastStartedJobStatus": {
"description": "The status of the latest job launched to the queue (if any).",
"enum": [
"INITIALIZING",
"RUNNING",
"COMPLETED",
"ABORTED",
"FAILED"
],
"type": [
"string",
"null"
]
},
"lastStartedJobTime": {
"description": "The last time (if any) a job was launched.",
"format": "date-time",
"type": [
"string",
"null"
]
},
"lastSuccessfulRunTime": {
"description": "Last time this job had a successful run",
"format": "date-time",
"type": [
"string",
"null"
]
},
"name": {
"description": "A human-readable name for the definition, must be unique across organisations",
"type": "string"
},
"nextScheduledRunTime": {
"description": "Next time this job is scheduled to run",
"format": "date-time",
"type": [
"string",
"null"
]
},
"schedule": {
"description": "The scheduling information defining how often and when to execute this job to the Job Scheduling service. Optional if enabled = False.",
"properties": {
"dayOfMonth": {
"description": "The date(s) of the month that the job will run. Allowed values are either ``[1 ... 31]`` or ``[\"*\"]`` for all days of the month. This field is additive with ``dayOfWeek``, meaning the job will run both on the date(s) defined in this field and the day specified by ``dayOfWeek`` (for example, dates 1st, 2nd, 3rd, plus every Tuesday). If ``dayOfMonth`` is set to ``[\"*\"]`` and ``dayOfWeek`` is defined, the scheduler will trigger on every day of the month that matches ``dayOfWeek`` (for example, Tuesday the 2nd, 9th, 16th, 23rd, 30th). Invalid dates such as February 31st are ignored.",
"items": {
"enum": [
"*",
1,
2,
3,
4,
5,
6,
7,
8,
9,
10,
11,
12,
13,
14,
15,
16,
17,
18,
19,
20,
21,
22,
23,
24,
25,
26,
27,
28,
29,
30,
31
],
"type": [
"number",
"string"
]
},
"maxItems": 31,
"type": "array"
},
"dayOfWeek": {
"description": "The day(s) of the week that the job will run. Allowed values are ``[0 .. 6]``, where (Sunday=0), or ``[\"*\"]``, for all days of the week. Strings, either 3-letter abbreviations or the full name of the day, can be used interchangeably (e.g., \"sunday\", \"Sunday\", \"sun\", or \"Sun\", all map to ``[0]``. This field is additive with ``dayOfMonth``, meaning the job will run both on the date specified by ``dayOfMonth`` and the day defined in this field.",
"items": {
"enum": [
"*",
0,
1,
2,
3,
4,
5,
6,
"sunday",
"SUNDAY",
"Sunday",
"monday",
"MONDAY",
"Monday",
"tuesday",
"TUESDAY",
"Tuesday",
"wednesday",
"WEDNESDAY",
"Wednesday",
"thursday",
"THURSDAY",
"Thursday",
"friday",
"FRIDAY",
"Friday",
"saturday",
"SATURDAY",
"Saturday",
"sun",
"SUN",
"Sun",
"mon",
"MON",
"Mon",
"tue",
"TUE",
"Tue",
"wed",
"WED",
"Wed",
"thu",
"THU",
"Thu",
"fri",
"FRI",
"Fri",
"sat",
"SAT",
"Sat"
],
"type": [
"number",
"string"
]
},
"maxItems": 7,
"type": "array"
},
"hour": {
"description": "The hour(s) of the day that the job will run. Allowed values are either ``[\"*\"]`` meaning every hour of the day or ``[0 ... 23]``.",
"items": {
"enum": [
"*",
0,
1,
2,
3,
4,
5,
6,
7,
8,
9,
10,
11,
12,
13,
14,
15,
16,
17,
18,
19,
20,
21,
22,
23
],
"type": [
"number",
"string"
]
},
"maxItems": 24,
"type": "array"
},
"minute": {
"description": "The minute(s) of the day that the job will run. Allowed values are either ``[\"*\"]`` meaning every minute of the day or``[0 ... 59]``.",
"items": {
"enum": [
"*",
0,
1,
2,
3,
4,
5,
6,
7,
8,
9,
10,
11,
12,
13,
14,
15,
16,
17,
18,
19,
20,
21,
22,
23,
24,
25,
26,
27,
28,
29,
30,
31,
32,
33,
34,
35,
36,
37,
38,
39,
40,
41,
42,
43,
44,
45,
46,
47,
48,
49,
50,
51,
52,
53,
54,
55,
56,
57,
58,
59
],
"type": [
"number",
"string"
]
},
"maxItems": 60,
"type": "array"
},
"month": {
"description": "The month(s) of the year that the job will run. Allowed values are either ``[1 ... 12]`` or ``[\"*\"]`` for all months of the year. Strings, either 3-letter abbreviations or the full name of the month, can be used interchangeably (e.g., \"jan\" or \"october\"). Months that are not compatible with ``dayOfMonth`` are ignored, for example ``{\"dayOfMonth\": [31], \"month\":[\"feb\"]}``.",
"items": {
"enum": [
"*",
1,
2,
3,
4,
5,
6,
7,
8,
9,
10,
11,
12,
"january",
"JANUARY",
"January",
"february",
"FEBRUARY",
"February",
"march",
"MARCH",
"March",
"april",
"APRIL",
"April",
"may",
"MAY",
"May",
"june",
"JUNE",
"June",
"july",
"JULY",
"July",
"august",
"AUGUST",
"August",
"september",
"SEPTEMBER",
"September",
"october",
"OCTOBER",
"October",
"november",
"NOVEMBER",
"November",
"december",
"DECEMBER",
"December",
"jan",
"JAN",
"Jan",
"feb",
"FEB",
"Feb",
"mar",
"MAR",
"Mar",
"apr",
"APR",
"Apr",
"jun",
"JUN",
"Jun",
"jul",
"JUL",
"Jul",
"aug",
"AUG",
"Aug",
"sep",
"SEP",
"Sep",
"oct",
"OCT",
"Oct",
"nov",
"NOV",
"Nov",
"dec",
"DEC",
"Dec"
],
"type": [
"number",
"string"
]
},
"maxItems": 12,
"type": "array"
}
},
"required": [
"dayOfMonth",
"dayOfWeek",
"hour",
"minute",
"month"
],
"type": "object"
},
"updated": {
"description": "When was this job last updated",
"format": "date-time",
"type": "string"
},
"updatedBy": {
"description": "Who created this job",
"properties": {
"fullName": {
"description": "The full name of the user who created this job (if defined by the user)",
"type": [
"string",
"null"
]
},
"userId": {
"description": "The User ID of the user who created this job",
"type": "string"
},
"username": {
"description": "The username (e-mail address) of the user who created this job",
"type": "string"
}
},
"required": [
"fullName",
"userId",
"username"
],
"type": "object"
}
},
"required": [
"batchPredictionJob",
"created",
"createdBy",
"enabled",
"id",
"lastStartedJobStatus",
"lastStartedJobTime",
"name",
"updated",
"updatedBy"
],
"type": "object"
},
"type": "array"
},
"next": {
"description": "URL pointing to the next page (if null, there is no next page).",
"format": "uri",
"type": [
"string",
"null"
]
},
"previous": {
"description": "URL pointing to the previous page (if null, there is no previous page).",
"format": "uri",
"type": [
"string",
"null"
]
},
"totalCount": {
"description": "The total number of items across all pages.",
"type": "integer"
}
},
"required": [
"data",
"next",
"previous",
"totalCount"
],
"type": "object"
}
Responses
To perform this operation, you must be authenticated by means of one of the following methods:
BearerAuth
POST /api/v2/batchPredictionJobDefinitions/
Create a Batch Prediction Job definition. A configuration for a Batch Prediction job which can either be executed manually upon request or on scheduled intervals, if enabled. The API payload is the same as for /batchPredictions
along with optional enabled
and schedule
items.
Body parameter
{
"properties": {
"abortOnError": {
"default": true,
"description": "Should this job abort if too many errors are encountered",
"type": "boolean"
},
"batchJobType": {
"default": "prediction",
"description": "Batch job type.",
"enum": [
"monitoring",
"prediction"
],
"type": "string",
"x-versionadded": "v2.35"
},
"chunkSize": {
"default": "auto",
"description": "Which strategy should be used to determine the chunk size. Can be either a named strategy or a fixed size in bytes.",
"oneOf": [
{
"enum": [
"auto",
"fixed",
"dynamic"
],
"type": "string"
},
{
"maximum": 41943040,
"minimum": 20,
"type": "integer"
}
]
},
"columnNamesRemapping": {
"description": "Remap (rename or remove columns from) the output from this job",
"oneOf": [
{
"description": "Provide a dictionary with key/value pairs to remap (deprecated)",
"type": "object"
},
{
"description": "Provide a list of items to remap",
"items": {
"properties": {
"inputName": {
"description": "Rename column with this name",
"type": "string"
},
"outputName": {
"description": "Rename column to this name (leave as null to remove from the output)",
"type": [
"string",
"null"
]
}
},
"required": [
"inputName",
"outputName"
],
"type": "object"
},
"maxItems": 1000,
"type": "array"
}
]
},
"csvSettings": {
"description": "The CSV settings used for this job",
"properties": {
"delimiter": {
"default": ",",
"description": "CSV fields are delimited by this character. Use the string \"tab\" to denote TSV (TAB separated values).",
"oneOf": [
{
"enum": [
"tab"
],
"type": "string"
},
{
"maxLength": 1,
"minLength": 1,
"type": "string"
}
]
},
"encoding": {
"default": "utf-8",
"description": "The encoding to be used for intake and output. For example (but not limited to): \"shift_jis\", \"latin_1\" or \"mskanji\".",
"type": "string"
},
"quotechar": {
"default": "\"",
"description": "Fields containing the delimiter or newlines must be quoted using this character.",
"maxLength": 1,
"minLength": 1,
"type": "string"
}
},
"required": [
"delimiter",
"encoding",
"quotechar"
],
"type": "object"
},
"deploymentId": {
"description": "ID of deployment which is used in job for processing predictions dataset",
"type": "string"
},
"disableRowLevelErrorHandling": {
"default": false,
"description": "Skip row by row error handling",
"type": "boolean"
},
"enabled": {
"description": "If this job definition is enabled as a scheduled job. Optional if no schedule is supplied.",
"type": "boolean"
},
"explanationAlgorithm": {
"description": "Which algorithm will be used to calculate prediction explanations",
"enum": [
"shap",
"xemp"
],
"type": "string"
},
"explanationClassNames": {
"description": "List of class names that will be explained for each row for multiclass. Mutually exclusive with explanationNumTopClasses. If neither specified - we assume explanationNumTopClasses=1",
"items": {
"description": "Class name to explain",
"type": "string"
},
"maxItems": 10,
"minItems": 1,
"type": "array",
"x-versionadded": "v2.29"
},
"explanationNumTopClasses": {
"description": "Number of top predicted classes for each row that will be explained for multiclass. Mutually exclusive with explanationClassNames. If neither specified - we assume explanationNumTopClasses=1",
"maximum": 10,
"minimum": 1,
"type": "integer",
"x-versionadded": "v2.29"
},
"includePredictionStatus": {
"default": false,
"description": "Include prediction status column in the output",
"type": "boolean"
},
"includeProbabilities": {
"default": true,
"description": "Include probabilities for all classes",
"type": "boolean"
},
"includeProbabilitiesClasses": {
"default": [],
"description": "Include only probabilities for these specific class names.",
"items": {
"description": "Include probability for this class name",
"type": "string"
},
"maxItems": 100,
"type": "array"
},
"intakeSettings": {
"default": {
"type": "localFile"
},
"description": "The intake option configured for this job",
"oneOf": [
{
"description": "Stream CSV data chunks from Azure",
"properties": {
"credentialId": {
"description": "Use the specified credential to access the url",
"type": [
"string",
"null"
]
},
"format": {
"default": "csv",
"description": "Type of input file format",
"enum": [
"csv",
"parquet"
],
"type": "string",
"x-versionadded": "v2.25"
},
"type": {
"description": "Type name for this intake type",
"enum": [
"azure"
],
"type": "string"
},
"url": {
"description": "URL for the CSV file",
"format": "url",
"type": "string"
}
},
"required": [
"type",
"url"
],
"type": "object"
},
{
"description": "Stream CSV data chunks from Big Query using GCS",
"properties": {
"bucket": {
"description": "The name of gcs bucket for data export",
"type": "string"
},
"credentialId": {
"description": "The ID of the GCP credentials",
"type": "string"
},
"dataset": {
"description": "The name of the specified big query dataset to read input data from",
"type": "string"
},
"table": {
"description": "The name of the specified big query table to read input data from",
"type": "string"
},
"type": {
"description": "Type name for this intake type",
"enum": [
"bigquery"
],
"type": "string"
}
},
"required": [
"bucket",
"credentialId",
"dataset",
"table",
"type"
],
"type": "object"
},
{
"description": "Stream CSV data chunks from data stage storage",
"properties": {
"dataStageId": {
"description": "The ID of the data stage",
"type": "string"
},
"type": {
"description": "Type name for this intake type",
"enum": [
"dataStage"
],
"type": "string"
}
},
"required": [
"dataStageId",
"type"
],
"type": "object"
},
{
"description": "Stream CSV data chunks from AI catalog dataset",
"properties": {
"datasetId": {
"description": "The ID of the AI catalog dataset",
"type": "string"
},
"datasetVersionId": {
"description": "The ID of the AI catalog dataset version",
"type": "string"
},
"type": {
"description": "Type name for this intake type",
"enum": [
"dataset"
],
"type": "string"
}
},
"required": [
"datasetId",
"type"
],
"type": "object"
},
{
"description": "Stream CSV data chunks from Datasphere using browser-datasphere",
"properties": {
"catalog": {
"description": "The name of the specified database catalog to read input data from.",
"type": "string"
},
"credentialId": {
"description": "The ID of the credential holding information about a user with read access to the data source.",
"type": "string"
},
"dataStoreId": {
"description": "ID of the data store to connect to",
"type": "string"
},
"schema": {
"description": "The name of the specified database schema to read input data from.",
"type": "string"
},
"table": {
"description": "The name of the specified database table to read input data from.",
"type": "string"
},
"type": {
"description": "Type name for this intake type",
"enum": [
"datasphere"
],
"type": "string"
}
},
"required": [
"credentialId",
"dataStoreId",
"schema",
"table",
"type"
],
"type": "object",
"x-versionadded": "v2.35"
},
{
"description": "Stream CSV data chunks from DSS dataset",
"properties": {
"datasetId": {
"description": "The ID of the dataset",
"type": "string"
},
"partition": {
"default": null,
"description": "Partition used to predict",
"enum": [
"holdout",
"validation",
"allBacktests",
null
],
"type": [
"string",
"null"
]
},
"projectId": {
"description": "The ID of the project",
"type": "string"
},
"type": {
"description": "Type name for this intake type",
"enum": [
"dss"
],
"type": "string"
}
},
"required": [
"projectId",
"type"
],
"type": "object"
},
{
"properties": {
"path": {
"description": "Path to data on host filesystem",
"type": "string"
},
"type": {
"description": "Type name for this intake type",
"enum": [
"filesystem"
],
"type": "string"
}
},
"required": [
"path",
"type"
],
"type": "object"
},
{
"description": "Stream CSV data chunks from Google Storage",
"properties": {
"credentialId": {
"description": "Use the specified credential to access the url",
"type": [
"string",
"null"
]
},
"format": {
"default": "csv",
"description": "Type of input file format",
"enum": [
"csv",
"parquet"
],
"type": "string",
"x-versionadded": "v2.25"
},
"type": {
"description": "Type name for this intake type",
"enum": [
"gcp"
],
"type": "string"
},
"url": {
"description": "URL for the CSV file",
"format": "url",
"type": "string"
}
},
"required": [
"type",
"url"
],
"type": "object"
},
{
"description": "Stream CSV data chunks from HTTP",
"properties": {
"type": {
"description": "Type name for this intake type",
"enum": [
"http"
],
"type": "string"
},
"url": {
"description": "URL for the CSV file",
"format": "url",
"type": "string"
}
},
"required": [
"type",
"url"
],
"type": "object"
},
{
"description": "Stream CSV data chunks from JDBC",
"properties": {
"catalog": {
"description": "The name of the specified database catalog to read input data from.",
"type": "string",
"x-versionadded": "v2.22"
},
"credentialId": {
"description": "The ID of the credential holding information about a user with read access to the JDBC data source.",
"type": [
"string",
"null"
]
},
"dataStoreId": {
"description": "ID of the data store to connect to",
"type": "string"
},
"fetchSize": {
"description": "A user specified fetch size. Changing it can be used to balance throughput and memory usage. Deprecated and ignored since v2.21.",
"maximum": 1000000,
"minimum": 1,
"type": "integer"
},
"query": {
"description": "A self-supplied SELECT statement of the dataset you wish to score. Helpful for supplying a more fine-grained selection of data not achievable through specification of \"table\" and/or \"schema\" parameters exclusively.If this job is executed with a job definition, then template variables are available which will be substituted for timestamps: {{ current_run_timestamp }}, {{ last_completed_run_time }}, {{ last_scheduled_run_time }}, {{ next_scheduled_run_time }}, {{ current_run_time }}",
"type": "string"
},
"schema": {
"description": "The name of the specified database schema to read input data from.",
"type": "string"
},
"table": {
"description": "The name of the specified database table to read input data from.",
"type": "string"
},
"type": {
"description": "Type name for this intake type",
"enum": [
"jdbc"
],
"type": "string"
}
},
"required": [
"dataStoreId",
"type"
],
"type": "object"
},
{
"description": "Stream CSV data chunks from local file storage",
"properties": {
"async": {
"description": "The default behavior (async: true) will still submit the job to the queue and start processing as soon as the upload is started.Setting it to false will postpone submitting the job to the queue until all data has been uploaded.This is helpful if the user is on a bad connection and bottlednecked by the upload speed. Instead of blocking the queue this will allow others to submit to the queue until the upload has finished.",
"type": [
"boolean",
"null"
],
"x-versionadded": "v2.28"
},
"multipart": {
"description": "specify if the data will be uploaded in multiple parts instead of a single file",
"type": "boolean",
"x-versionadded": "v2.27"
},
"type": {
"description": "Type name for this intake type",
"enum": [
"local_file",
"localFile"
],
"type": "string"
}
},
"required": [
"type"
],
"type": "object"
},
{
"description": "Stream CSV data chunks from Amazon Cloud Storage S3",
"properties": {
"credentialId": {
"description": "Use the specified credential to access the url",
"type": [
"string",
"null"
]
},
"endpointUrl": {
"description": "Endpoint URL for the S3 connection (omit to use the default)",
"format": "url",
"type": "string",
"x-versionadded": "v2.29"
},
"format": {
"default": "csv",
"description": "Type of input file format",
"enum": [
"csv",
"parquet"
],
"type": "string",
"x-versionadded": "v2.25"
},
"type": {
"description": "Type name for this intake type",
"enum": [
"s3"
],
"type": "string"
},
"url": {
"description": "URL for the CSV file",
"format": "url",
"type": "string"
}
},
"required": [
"type",
"url"
],
"type": "object"
},
{
"description": "Stream CSV data chunks from Snowflake",
"properties": {
"catalog": {
"description": "The name of the specified database catalog to read input data from.",
"type": "string",
"x-versionadded": "v2.28"
},
"cloudStorageCredentialId": {
"description": "The ID of the credential holding information about a user with read access to the cloud storage.",
"type": [
"string",
"null"
]
},
"cloudStorageType": {
"default": "s3",
"description": "Type name for cloud storage",
"enum": [
"azure",
"gcp",
"s3"
],
"type": "string"
},
"credentialId": {
"description": "The ID of the credential holding information about a user with read access to the Snowflake data source.",
"type": [
"string",
"null"
]
},
"dataStoreId": {
"description": "ID of the data store to connect to",
"type": "string"
},
"externalStage": {
"description": "External storage",
"type": "string"
},
"query": {
"description": "A self-supplied SELECT statement of the dataset you wish to score. Helpful for supplying a more fine-grained selection of data not achievable through specification of \"table\" and/or \"schema\" parameters exclusively.If this job is executed with a job definition, then template variables are available which will be substituted for timestamps: {{ current_run_timestamp }}, {{ last_completed_run_time }}, {{ last_scheduled_run_time }}, {{ next_scheduled_run_time }}, {{ current_run_time }}",
"type": "string"
},
"schema": {
"description": "The name of the specified database schema to read input data from.",
"type": "string"
},
"table": {
"description": "The name of the specified database table to read input data from.",
"type": "string"
},
"type": {
"description": "Type name for this intake type",
"enum": [
"snowflake"
],
"type": "string"
}
},
"required": [
"dataStoreId",
"externalStage",
"type"
],
"type": "object"
},
{
"description": "Stream CSV data chunks from Azure Synapse",
"properties": {
"cloudStorageCredentialId": {
"description": "The ID of the Azure credential holding information about a user with read access to the cloud storage.",
"type": [
"string",
"null"
]
},
"credentialId": {
"description": "The ID of the credential holding information about a user with read access to the JDBC data source.",
"type": [
"string",
"null"
]
},
"dataStoreId": {
"description": "ID of the data store to connect to",
"type": "string"
},
"externalDataSource": {
"description": "External datasource name",
"type": "string"
},
"query": {
"description": "A self-supplied SELECT statement of the dataset you wish to score. Helpful for supplying a more fine-grained selection of data not achievable through specification of \"table\" and/or \"schema\" parameters exclusively.If this job is executed with a job definition, then template variables are available which will be substituted for timestamps: {{ current_run_timestamp }}, {{ last_completed_run_time }}, {{ last_scheduled_run_time }}, {{ next_scheduled_run_time }}, {{ current_run_time }}",
"type": "string"
},
"schema": {
"description": "The name of the specified database schema to read input data from.",
"type": "string"
},
"table": {
"description": "The name of the specified database table to read input data from.",
"type": "string"
},
"type": {
"description": "Type name for this intake type",
"enum": [
"synapse"
],
"type": "string"
}
},
"required": [
"dataStoreId",
"externalDataSource",
"type"
],
"type": "object"
}
]
},
"maxExplanations": {
"default": 0,
"description": "Number of explanations requested. Will be ordered by strength.",
"maximum": 100,
"minimum": 0,
"type": "integer"
},
"modelId": {
"description": "ID of leaderboard model which is used in job for processing predictions dataset",
"type": "string",
"x-versionadded": "v2.28"
},
"modelPackageId": {
"description": "ID of model package from registry is used in job for processing predictions dataset",
"type": "string",
"x-versionadded": "v2.28"
},
"monitoringBatchPrefix": {
"description": "Name of the batch to create with this job",
"type": [
"string",
"null"
]
},
"name": {
"description": "A human-readable name for the definition, must be unique across organisations, if left out the backend will generate one for you.",
"maxLength": 100,
"minLength": 1,
"type": "string"
},
"numConcurrent": {
"description": "Number of simultaneous requests to run against the prediction instance",
"minimum": 1,
"type": "integer"
},
"outputSettings": {
"description": "The output option configured for this job",
"oneOf": [
{
"description": "Save CSV data chunks to Azure Blob Storage",
"properties": {
"credentialId": {
"description": "Use the specified credential to access the url",
"type": [
"string",
"null"
]
},
"format": {
"default": "csv",
"description": "Type of output file format",
"enum": [
"csv",
"parquet"
],
"type": "string",
"x-versionadded": "v2.25"
},
"partitionColumns": {
"description": "For Parquet directory-scoring only. The column names of the intake data of which to partition the dataset. Columns are partitioned in the order they are given. At least one value is required if scoring to a directory (meaning the output url ends with a slash (\"/\").",
"items": {
"type": "string"
},
"maxItems": 100,
"type": "array",
"x-versionadded": "v2.26"
},
"type": {
"description": "Type name for this output type",
"enum": [
"azure"
],
"type": "string"
},
"url": {
"description": "URL for the file or directory",
"format": "url",
"type": "string"
}
},
"required": [
"type",
"url"
],
"type": "object"
},
{
"description": "Save CSV data chunks to Google BigQuery in bulk",
"properties": {
"bucket": {
"description": "The name of gcs bucket for data loading",
"type": "string"
},
"credentialId": {
"description": "The ID of the GCP credentials",
"type": "string"
},
"dataset": {
"description": "The name of the specified big query dataset to write data back",
"type": "string"
},
"table": {
"description": "The name of the specified big query table to write data back",
"type": "string"
},
"type": {
"description": "Type name for this output type",
"enum": [
"bigquery"
],
"type": "string"
}
},
"required": [
"bucket",
"credentialId",
"dataset",
"table",
"type"
],
"type": "object"
},
{
"description": "Saves CSV data chunks to Datasphere using browser-datasphere",
"properties": {
"catalog": {
"description": "The name of the specified database catalog to read input data from.",
"type": "string"
},
"credentialId": {
"description": "The ID of the credential holding information about a user with write access to the data destination.",
"type": "string"
},
"dataStoreId": {
"description": "The ID of the data store to connect to",
"type": "string"
},
"schema": {
"description": "The name of the specified database schema to read input data from.",
"type": "string"
},
"table": {
"description": "The name of the specified database table to read input data from.",
"type": "string"
},
"type": {
"description": "The type name for this output type",
"enum": [
"datasphere"
],
"type": "string"
}
},
"required": [
"credentialId",
"dataStoreId",
"schema",
"table",
"type"
],
"type": "object",
"x-versionadded": "v2.35"
},
{
"properties": {
"path": {
"description": "Path to results on host filesystem",
"type": "string"
},
"type": {
"description": "Type name for this output type",
"enum": [
"filesystem"
],
"type": "string"
}
},
"required": [
"path",
"type"
],
"type": "object"
},
{
"description": "Save CSV data chunks to Google Storage",
"properties": {
"credentialId": {
"description": "Use the specified credential to access the url",
"type": [
"string",
"null"
]
},
"format": {
"default": "csv",
"description": "Type of input file format",
"enum": [
"csv",
"parquet"
],
"type": "string",
"x-versionadded": "v2.25"
},
"partitionColumns": {
"description": "For Parquet directory-scoring only. The column names of the intake data of which to partition the dataset. Columns are partitioned in the order they are given. At least one value is required if scoring to a directory (meaning the output url ends with a slash (\"/\").",
"items": {
"type": "string"
},
"maxItems": 100,
"type": "array",
"x-versionadded": "v2.26"
},
"type": {
"description": "Type name for this output type",
"enum": [
"gcp"
],
"type": "string"
},
"url": {
"description": "URL for the CSV file",
"format": "url",
"type": "string"
}
},
"required": [
"type",
"url"
],
"type": "object"
},
{
"description": "Save CSV data chunks to HTTP data endpoint",
"properties": {
"headers": {
"description": "Extra headers to send with the request",
"type": "object"
},
"method": {
"description": "Method to use when saving the CSV file",
"enum": [
"POST",
"PUT"
],
"type": "string"
},
"type": {
"description": "Type name for this output type",
"enum": [
"http"
],
"type": "string"
},
"url": {
"description": "URL for the CSV file",
"format": "url",
"type": "string"
}
},
"required": [
"method",
"type",
"url"
],
"type": "object"
},
{
"description": "Save CSV data chunks via JDBC",
"properties": {
"catalog": {
"description": "The name of the specified database catalog to write output data to.",
"type": "string",
"x-versionadded": "v2.22"
},
"commitInterval": {
"default": 600,
"description": "Defines a time interval in seconds between each commit is done to the JDBC source. If set to 0, the batch prediction operation will write the entire job before committing.",
"maximum": 86400,
"minimum": 0,
"type": "integer",
"x-versionadded": "v2.21"
},
"createTableIfNotExists": {
"default": false,
"description": "Attempt to create the table first if no existing one is detected, before writing data with the strategy defined in the `statementType` parameter.",
"type": "boolean",
"x-versionadded": "v2.24"
},
"credentialId": {
"description": "The ID of the credential holding information about a user with write access to the JDBC data source.",
"type": [
"string",
"null"
]
},
"dataStoreId": {
"description": "ID of the data store to connect to",
"type": "string"
},
"schema": {
"description": "The name of the specified database schema to write the results to.",
"type": "string"
},
"statementType": {
"description": "The statement type to use when writing the results. Deprecation Warning: Use of `create_table` is now discouraged. Use one of the other possibilities along with the parameter `createTableIfNotExists` set to `true`.",
"enum": [
"createTable",
"create_table",
"insert",
"insertUpdate",
"insert_update",
"update"
],
"type": "string"
},
"table": {
"description": "The name of the specified database table to write the results to.If this job is executed with a job definition, then template variables are available which will be substituted for timestamps: {{ current_run_timestamp }}, {{ last_completed_run_time }}, {{ last_scheduled_run_time }}, {{ next_scheduled_run_time }}, {{ current_run_time }}",
"type": "string"
},
"type": {
"description": "Type name for this intake type",
"enum": [
"jdbc"
],
"type": "string"
},
"updateColumns": {
"description": "The column names to be updated if statementType is set to either update or upsert.",
"items": {
"type": "string"
},
"maxItems": 100,
"type": "array"
},
"whereColumns": {
"description": "The column names to be used in the where clause if statementType is set to update or upsert.",
"items": {
"type": "string"
},
"maxItems": 100,
"type": "array"
}
},
"required": [
"dataStoreId",
"statementType",
"table",
"type"
],
"type": "object"
},
{
"description": "Save CSV data chunks to local file storage",
"properties": {
"type": {
"description": "Type name for this output type",
"enum": [
"local_file",
"localFile"
],
"type": "string"
}
},
"required": [
"type"
],
"type": "object"
},
{
"description": "Saves CSV data chunks to Amazon Cloud Storage S3",
"properties": {
"credentialId": {
"description": "Use the specified credential to access the url",
"type": [
"string",
"null"
]
},
"endpointUrl": {
"description": "Endpoint URL for the S3 connection (omit to use the default)",
"format": "url",
"type": "string",
"x-versionadded": "v2.29"
},
"format": {
"default": "csv",
"description": "Type of output file format",
"enum": [
"csv",
"parquet"
],
"type": "string",
"x-versionadded": "v2.25"
},
"partitionColumns": {
"description": "For Parquet directory-scoring only. The column names of the intake data of which to partition the dataset. Columns are partitioned in the order they are given. At least one value is required if scoring to a directory (meaning the output url ends with a slash (\"/\").",
"items": {
"type": "string"
},
"maxItems": 100,
"type": "array",
"x-versionadded": "v2.26"
},
"serverSideEncryption": {
"description": "Configure Server-Side Encryption for S3 output",
"properties": {
"algorithm": {
"description": "The server-side encryption algorithm used when storing this object in Amazon S3 (for example, AES256, aws:kms).",
"type": "string"
},
"customerAlgorithm": {
"description": "Specifies the algorithm to use to when encrypting the object (for example, AES256).",
"type": "string"
},
"customerKey": {
"description": "Specifies the customer-provided encryption key for Amazon S3 to use in encrypting data. This value is used to store the object and then it is discarded; Amazon S3 does not store the encryption key. The key must be appropriate for use with the algorithm specified in customerAlgorithm. The key must be sent as an base64 encoded string.",
"type": "string"
},
"kmsEncryptionContext": {
"description": "Specifies the Amazon Web Services KMS Encryption Context to use for object encryption. The value of this header is a base64-encoded UTF-8 string holding JSON with the encryption context key-value pairs.",
"type": "string"
},
"kmsKeyId": {
"description": "Specifies the ID of the symmetric customer managed key to use for object encryption.",
"type": "string"
}
},
"type": "object"
},
"type": {
"description": "Type name for this output type",
"enum": [
"s3"
],
"type": "string"
},
"url": {
"description": "URL for the CSV file",
"format": "url",
"type": "string"
}
},
"required": [
"type",
"url"
],
"type": "object"
},
{
"description": "Save CSV data chunks to Snowflake in bulk",
"properties": {
"catalog": {
"description": "The name of the specified database catalog to write output data to.",
"type": "string",
"x-versionadded": "v2.28"
},
"cloudStorageCredentialId": {
"description": "The ID of the credential holding information about a user with write access to the cloud storage.",
"type": [
"string",
"null"
]
},
"cloudStorageType": {
"default": "s3",
"description": "Type name for cloud storage",
"enum": [
"azure",
"gcp",
"s3"
],
"type": "string"
},
"createTableIfNotExists": {
"default": false,
"description": "Attempt to create the table first if no existing one is detected, before writing data with the strategy defined in the `statementType` parameter.",
"type": "boolean",
"x-versionadded": "v2.25"
},
"credentialId": {
"description": "The ID of the credential holding information about a user with write access to the Snowflake data source.",
"type": [
"string",
"null"
]
},
"dataStoreId": {
"description": "ID of the data store to connect to",
"type": "string"
},
"externalStage": {
"description": "External storage",
"type": "string"
},
"schema": {
"description": "The name of the specified database schema to write results to.",
"type": "string"
},
"statementType": {
"description": "The statement type to use when writing the results.",
"enum": [
"insert",
"create_table",
"createTable"
],
"type": "string"
},
"table": {
"description": "The name of the specified database table to write results to.",
"type": "string"
},
"type": {
"description": "Type name for this output type",
"enum": [
"snowflake"
],
"type": "string"
}
},
"required": [
"dataStoreId",
"externalStage",
"statementType",
"table",
"type"
],
"type": "object"
},
{
"description": "Save CSV data chunks to Azure Synapse in bulk",
"properties": {
"cloudStorageCredentialId": {
"description": "The ID of the credential holding information about a user with write access to the cloud storage.",
"type": [
"string",
"null"
]
},
"createTableIfNotExists": {
"default": false,
"description": "Attempt to create the table first if no existing one is detected, before writing data with the strategy defined in the `statementType` parameter.",
"type": "boolean",
"x-versionadded": "v2.25"
},
"credentialId": {
"description": "The ID of the credential holding information about a user with write access to the JDBC data source.",
"type": [
"string",
"null"
]
},
"dataStoreId": {
"description": "ID of the data store to connect to",
"type": "string"
},
"externalDataSource": {
"description": "External data source name",
"type": "string"
},
"schema": {
"description": "The name of the specified database schema to write results to.",
"type": "string"
},
"statementType": {
"description": "The statement type to use when writing the results.",
"enum": [
"insert",
"create_table",
"createTable"
],
"type": "string"
},
"table": {
"description": "The name of the specified database table to write results to.",
"type": "string"
},
"type": {
"description": "Type name for this output type",
"enum": [
"synapse"
],
"type": "string"
}
},
"required": [
"dataStoreId",
"externalDataSource",
"statementType",
"table",
"type"
],
"type": "object"
}
]
},
"passthroughColumns": {
"description": "Pass through columns from the original dataset",
"items": {
"description": "A column name from the original dataset to pass through to the resulting predictions",
"maxLength": 50,
"minLength": 1,
"type": "string"
},
"maxItems": 100,
"type": "array"
},
"passthroughColumnsSet": {
"description": "Pass through all columns from the original dataset",
"enum": [
"all"
],
"type": "string"
},
"pinnedModelId": {
"description": "Specify a model ID used for scoring",
"type": "string"
},
"predictionInstance": {
"description": "Override the default prediction instance from the deployment when scoring this job.",
"properties": {
"apiKey": {
"description": "By default, prediction requests will use the API key of the user that created the job. This allows you to make requests on behalf of other users.",
"type": "string"
},
"datarobotKey": {
"description": "If running a job against a prediction instance in the Managed AI Cloud, you must provide the organization level DataRobot-Key.",
"type": "string"
},
"hostName": {
"description": "Override the default host name of the deployment with this.",
"type": "string"
},
"sslEnabled": {
"default": true,
"description": "Use SSL (HTTPS) when communicating with the overriden prediction server.",
"type": "boolean"
}
},
"required": [
"hostName",
"sslEnabled"
],
"type": "object"
},
"predictionThreshold": {
"description": "Threshold is the point that sets the class boundary for a predicted value. The model classifies an observation below the threshold as FALSE, and an observation above the threshold as TRUE. In other words, DataRobot automatically assigns the positive class label to any prediction exceeding the threshold. This value can be set between 0.0 and 1.0.",
"maximum": 1,
"minimum": 0,
"type": "number"
},
"predictionWarningEnabled": {
"description": "Enable prediction warnings.",
"type": [
"boolean",
"null"
]
},
"schedule": {
"description": "The scheduling information defining how often and when to execute this job to the Job Scheduling service. Optional if enabled = False.",
"properties": {
"dayOfMonth": {
"description": "The date(s) of the month that the job will run. Allowed values are either ``[1 ... 31]`` or ``[\"*\"]`` for all days of the month. This field is additive with ``dayOfWeek``, meaning the job will run both on the date(s) defined in this field and the day specified by ``dayOfWeek`` (for example, dates 1st, 2nd, 3rd, plus every Tuesday). If ``dayOfMonth`` is set to ``[\"*\"]`` and ``dayOfWeek`` is defined, the scheduler will trigger on every day of the month that matches ``dayOfWeek`` (for example, Tuesday the 2nd, 9th, 16th, 23rd, 30th). Invalid dates such as February 31st are ignored.",
"items": {
"enum": [
"*",
1,
2,
3,
4,
5,
6,
7,
8,
9,
10,
11,
12,
13,
14,
15,
16,
17,
18,
19,
20,
21,
22,
23,
24,
25,
26,
27,
28,
29,
30,
31
],
"type": [
"number",
"string"
]
},
"maxItems": 31,
"type": "array"
},
"dayOfWeek": {
"description": "The day(s) of the week that the job will run. Allowed values are ``[0 .. 6]``, where (Sunday=0), or ``[\"*\"]``, for all days of the week. Strings, either 3-letter abbreviations or the full name of the day, can be used interchangeably (e.g., \"sunday\", \"Sunday\", \"sun\", or \"Sun\", all map to ``[0]``. This field is additive with ``dayOfMonth``, meaning the job will run both on the date specified by ``dayOfMonth`` and the day defined in this field.",
"items": {
"enum": [
"*",
0,
1,
2,
3,
4,
5,
6,
"sunday",
"SUNDAY",
"Sunday",
"monday",
"MONDAY",
"Monday",
"tuesday",
"TUESDAY",
"Tuesday",
"wednesday",
"WEDNESDAY",
"Wednesday",
"thursday",
"THURSDAY",
"Thursday",
"friday",
"FRIDAY",
"Friday",
"saturday",
"SATURDAY",
"Saturday",
"sun",
"SUN",
"Sun",
"mon",
"MON",
"Mon",
"tue",
"TUE",
"Tue",
"wed",
"WED",
"Wed",
"thu",
"THU",
"Thu",
"fri",
"FRI",
"Fri",
"sat",
"SAT",
"Sat"
],
"type": [
"number",
"string"
]
},
"maxItems": 7,
"type": "array"
},
"hour": {
"description": "The hour(s) of the day that the job will run. Allowed values are either ``[\"*\"]`` meaning every hour of the day or ``[0 ... 23]``.",
"items": {
"enum": [
"*",
0,
1,
2,
3,
4,
5,
6,
7,
8,
9,
10,
11,
12,
13,
14,
15,
16,
17,
18,
19,
20,
21,
22,
23
],
"type": [
"number",
"string"
]
},
"maxItems": 24,
"type": "array"
},
"minute": {
"description": "The minute(s) of the day that the job will run. Allowed values are either ``[\"*\"]`` meaning every minute of the day or``[0 ... 59]``.",
"items": {
"enum": [
"*",
0,
1,
2,
3,
4,
5,
6,
7,
8,
9,
10,
11,
12,
13,
14,
15,
16,
17,
18,
19,
20,
21,
22,
23,
24,
25,
26,
27,
28,
29,
30,
31,
32,
33,
34,
35,
36,
37,
38,
39,
40,
41,
42,
43,
44,
45,
46,
47,
48,
49,
50,
51,
52,
53,
54,
55,
56,
57,
58,
59
],
"type": [
"number",
"string"
]
},
"maxItems": 60,
"type": "array"
},
"month": {
"description": "The month(s) of the year that the job will run. Allowed values are either ``[1 ... 12]`` or ``[\"*\"]`` for all months of the year. Strings, either 3-letter abbreviations or the full name of the month, can be used interchangeably (e.g., \"jan\" or \"october\"). Months that are not compatible with ``dayOfMonth`` are ignored, for example ``{\"dayOfMonth\": [31], \"month\":[\"feb\"]}``.",
"items": {
"enum": [
"*",
1,
2,
3,
4,
5,
6,
7,
8,
9,
10,
11,
12,
"january",
"JANUARY",
"January",
"february",
"FEBRUARY",
"February",
"march",
"MARCH",
"March",
"april",
"APRIL",
"April",
"may",
"MAY",
"May",
"june",
"JUNE",
"June",
"july",
"JULY",
"July",
"august",
"AUGUST",
"August",
"september",
"SEPTEMBER",
"September",
"october",
"OCTOBER",
"October",
"november",
"NOVEMBER",
"November",
"december",
"DECEMBER",
"December",
"jan",
"JAN",
"Jan",
"feb",
"FEB",
"Feb",
"mar",
"MAR",
"Mar",
"apr",
"APR",
"Apr",
"jun",
"JUN",
"Jun",
"jul",
"JUL",
"Jul",
"aug",
"AUG",
"Aug",
"sep",
"SEP",
"Sep",
"oct",
"OCT",
"Oct",
"nov",
"NOV",
"Nov",
"dec",
"DEC",
"Dec"
],
"type": [
"number",
"string"
]
},
"maxItems": 12,
"type": "array"
}
},
"required": [
"dayOfMonth",
"dayOfWeek",
"hour",
"minute",
"month"
],
"type": "object"
},
"secondaryDatasetsConfigId": {
"description": "Configuration id for secondary datasets to use when making a prediction.",
"type": "string",
"x-versionadded": "v2.33"
},
"skipDriftTracking": {
"default": false,
"description": "Skip drift tracking for this job.",
"type": "boolean"
},
"thresholdHigh": {
"description": "Compute explanations for predictions above this threshold",
"type": "number"
},
"thresholdLow": {
"description": "Compute explanations for predictions below this threshold",
"type": "number"
},
"timeseriesSettings": {
"description": "Time Series settings included of this job is a Time Series job.",
"oneOf": [
{
"properties": {
"forecastPoint": {
"description": "Used for forecast predictions in order to override the inferred forecast point from the dataset.",
"format": "date-time",
"type": "string"
},
"relaxKnownInAdvanceFeaturesCheck": {
"default": false,
"description": "If activated, missing values in the known in advance features are allowed in the forecast window at prediction time. If omitted or false, missing values are not allowed.",
"type": "boolean"
},
"type": {
"description": "Forecast mode makes predictions using forecastPoint or rows in the dataset without target.",
"enum": [
"forecast"
],
"type": "string"
}
},
"required": [
"type"
],
"type": "object"
},
{
"properties": {
"forecastPointPolicy": {
"description": "Forecast point policy",
"properties": {
"configuration": {
"description": "Customize if forecast point based on job run time needs to be shifted.",
"properties": {
"offset": {
"description": "Offset to apply to scheduled run time of the job in a ISO-8601 format toobtain a relative forecast point. Example of the positive offset 'P2DT5H3M', example of the negative offset '-P2DT5H4M'",
"format": "offset",
"type": "string"
}
},
"required": [
"offset"
],
"type": "object"
},
"type": {
"description": "Type of the forecast point policy. Forecast point will be based on the scheduled run time of the job or the current moment in UTC if job was launched manually. Run time can be adjusted backwards or forwards.",
"enum": [
"jobRunTimeBased"
],
"type": "string"
}
},
"required": [
"type"
],
"type": "object"
},
"relaxKnownInAdvanceFeaturesCheck": {
"default": false,
"description": "If activated, missing values in the known in advance features are allowed in the forecast window at prediction time. If omitted or false, missing values are not allowed.",
"type": "boolean"
},
"type": {
"description": "Forecast mode makes predictions using forecastPoint or rows in the dataset without target.",
"enum": [
"forecast"
],
"type": "string"
}
},
"required": [
"forecastPointPolicy",
"type"
],
"type": "object"
},
{
"properties": {
"predictionsEndDate": {
"description": "Used for historical predictions in order to override date to which predictions should be calculated. By default value will be inferred automatically from the dataset.",
"format": "date-time",
"type": "string"
},
"predictionsStartDate": {
"description": "Used for historical predictions in order to override date from which predictions should be calculated. By default value will be inferred automatically from the dataset.",
"format": "date-time",
"type": "string"
},
"relaxKnownInAdvanceFeaturesCheck": {
"default": false,
"description": "If activated, missing values in the known in advance features are allowed in the forecast window at prediction time. If omitted or false, missing values are not allowed.",
"type": "boolean"
},
"type": {
"description": "Historical mode enables bulk predictions which calculates predictions for all possible forecast points and forecast distances in the dataset within the predictionsStartDate/predictionsEndDate range.",
"enum": [
"historical"
],
"type": "string"
}
},
"required": [
"type"
],
"type": "object"
}
],
"x-versionadded": "v2.20"
}
},
"required": [
"abortOnError",
"csvSettings",
"deploymentId",
"disableRowLevelErrorHandling",
"includePredictionStatus",
"includeProbabilities",
"includeProbabilitiesClasses",
"intakeSettings",
"maxExplanations",
"skipDriftTracking"
],
"type": "object"
}
Parameters
Example responses
202 Response
{
"properties": {
"batchPredictionJob": {
"description": "The Batch Prediction Job specification to be put on the queue in intervals",
"properties": {
"abortOnError": {
"default": true,
"description": "Should this job abort if too many errors are encountered",
"type": "boolean"
},
"batchJobType": {
"description": "Batch job type.",
"enum": [
"monitoring",
"prediction"
],
"type": "string",
"x-versionadded": "v2.30"
},
"chunkSize": {
"default": "auto",
"description": "Which strategy should be used to determine the chunk size. Can be either a named strategy or a fixed size in bytes.",
"oneOf": [
{
"enum": [
"auto",
"fixed",
"dynamic"
],
"type": "string"
},
{
"maximum": 41943040,
"minimum": 20,
"type": "integer"
}
]
},
"columnNamesRemapping": {
"description": "Remap (rename or remove columns from) the output from this job",
"oneOf": [
{
"description": "Provide a dictionary with key/value pairs to remap (deprecated)",
"type": "object"
},
{
"description": "Provide a list of items to remap",
"items": {
"properties": {
"inputName": {
"description": "Rename column with this name",
"type": "string"
},
"outputName": {
"description": "Rename column to this name (leave as null to remove from the output)",
"type": [
"string",
"null"
]
}
},
"required": [
"inputName",
"outputName"
],
"type": "object"
},
"maxItems": 1000,
"type": "array"
}
]
},
"csvSettings": {
"description": "The CSV settings used for this job",
"properties": {
"delimiter": {
"default": ",",
"description": "CSV fields are delimited by this character. Use the string \"tab\" to denote TSV (TAB separated values).",
"oneOf": [
{
"enum": [
"tab"
],
"type": "string"
},
{
"maxLength": 1,
"minLength": 1,
"type": "string"
}
]
},
"encoding": {
"default": "utf-8",
"description": "The encoding to be used for intake and output. For example (but not limited to): \"shift_jis\", \"latin_1\" or \"mskanji\".",
"type": "string"
},
"quotechar": {
"default": "\"",
"description": "Fields containing the delimiter or newlines must be quoted using this character.",
"maxLength": 1,
"minLength": 1,
"type": "string"
}
},
"required": [
"delimiter",
"encoding",
"quotechar"
],
"type": "object"
},
"deploymentId": {
"description": "ID of deployment which is used in job for processing predictions dataset",
"type": "string"
},
"disableRowLevelErrorHandling": {
"default": false,
"description": "Skip row by row error handling",
"type": "boolean"
},
"explanationAlgorithm": {
"description": "Which algorithm will be used to calculate prediction explanations",
"enum": [
"shap",
"xemp"
],
"type": "string"
},
"explanationClassNames": {
"description": "List of class names that will be explained for each row for multiclass. Mutually exclusive with explanationNumTopClasses. If neither specified - we assume explanationNumTopClasses=1",
"items": {
"description": "Class name to explain",
"type": "string"
},
"maxItems": 10,
"minItems": 1,
"type": "array",
"x-versionadded": "v2.30"
},
"explanationNumTopClasses": {
"description": "Number of top predicted classes for each row that will be explained for multiclass. Mutually exclusive with explanationClassNames. If neither specified - we assume explanationNumTopClasses=1",
"maximum": 10,
"minimum": 1,
"type": "integer",
"x-versionadded": "v2.30"
},
"includePredictionStatus": {
"default": false,
"description": "Include prediction status column in the output",
"type": "boolean"
},
"includeProbabilities": {
"default": true,
"description": "Include probabilities for all classes",
"type": "boolean"
},
"includeProbabilitiesClasses": {
"default": [],
"description": "Include only probabilities for these specific class names.",
"items": {
"description": "Include probability for this class name",
"type": "string"
},
"maxItems": 100,
"type": "array"
},
"intakeSettings": {
"default": {
"type": "localFile"
},
"description": "The response option configured for this job",
"oneOf": [
{
"description": "Stream CSV data chunks from Azure",
"properties": {
"credentialId": {
"description": "Either the populated value of the field or [redacted] due to permission settings",
"oneOf": [
{
"description": "Use the specified credential to access the url",
"type": [
"string",
"null"
]
},
{
"enum": [
"[redacted]"
],
"type": "string"
}
]
},
"format": {
"default": "csv",
"description": "Type of input file format",
"enum": [
"csv",
"parquet"
],
"type": "string",
"x-versionadded": "v2.25"
},
"type": {
"description": "Type name for this intake type",
"enum": [
"azure"
],
"type": "string"
},
"url": {
"description": "URL for the CSV file",
"format": "url",
"type": "string"
}
},
"required": [
"type",
"url"
],
"type": "object"
},
{
"description": "Stream CSV data chunks from data stage storage",
"properties": {
"dataStageId": {
"description": "The ID of the data stage",
"type": "string"
},
"type": {
"description": "Type name for this intake type",
"enum": [
"dataStage"
],
"type": "string"
}
},
"required": [
"dataStageId",
"type"
],
"type": "object"
},
{
"description": "Stream CSV data chunks from AI catalog dataset",
"properties": {
"datasetId": {
"description": "Either the populated value of the field or [redacted] due to permission settings",
"oneOf": [
{
"description": "The ID of the AI catalog dataset",
"type": "string"
},
{
"enum": [
"[redacted]"
],
"type": "string"
}
]
},
"datasetVersionId": {
"description": "The ID of the AI catalog dataset version",
"type": "string"
},
"type": {
"description": "Type name for this intake type",
"enum": [
"dataset"
],
"type": "string"
}
},
"required": [
"datasetId",
"type"
],
"type": "object"
},
{
"description": "Stream CSV data chunks from Google Storage",
"properties": {
"credentialId": {
"description": "Either the populated value of the field or [redacted] due to permission settings",
"oneOf": [
{
"description": "Use the specified credential to access the url",
"type": [
"string",
"null"
]
},
{
"enum": [
"[redacted]"
],
"type": "string"
}
]
},
"format": {
"default": "csv",
"description": "Type of input file format",
"enum": [
"csv",
"parquet"
],
"type": "string",
"x-versionadded": "v2.25"
},
"type": {
"description": "Type name for this intake type",
"enum": [
"gcp"
],
"type": "string"
},
"url": {
"description": "URL for the CSV file",
"format": "url",
"type": "string"
}
},
"required": [
"type",
"url"
],
"type": "object"
},
{
"description": "Stream CSV data chunks from Big Query using GCS",
"properties": {
"bucket": {
"description": "The name of gcs bucket for data export",
"type": "string"
},
"credentialId": {
"description": "Either the populated value of the field or [redacted] due to permission settings",
"oneOf": [
{
"description": "The ID of the GCP credentials",
"type": "string"
},
{
"enum": [
"[redacted]"
],
"type": "string"
}
]
},
"dataset": {
"description": "The name of the specified big query dataset to read input data from",
"type": "string"
},
"table": {
"description": "The name of the specified big query table to read input data from",
"type": "string"
},
"type": {
"description": "Type name for this intake type",
"enum": [
"bigquery"
],
"type": "string"
}
},
"required": [
"bucket",
"credentialId",
"dataset",
"table",
"type"
],
"type": "object"
},
{
"description": "Stream CSV data chunks from Amazon Cloud Storage S3",
"properties": {
"credentialId": {
"description": "Either the populated value of the field or [redacted] due to permission settings",
"oneOf": [
{
"description": "Use the specified credential to access the url",
"type": [
"string",
"null"
]
},
{
"enum": [
"[redacted]"
],
"type": "string"
}
]
},
"endpointUrl": {
"description": "Endpoint URL for the S3 connection (omit to use the default)",
"format": "url",
"type": "string",
"x-versionadded": "v2.29"
},
"format": {
"default": "csv",
"description": "Type of input file format",
"enum": [
"csv",
"parquet"
],
"type": "string",
"x-versionadded": "v2.25"
},
"type": {
"description": "Type name for this intake type",
"enum": [
"s3"
],
"type": "string"
},
"url": {
"description": "URL for the CSV file",
"format": "url",
"type": "string"
}
},
"required": [
"type",
"url"
],
"type": "object"
},
{
"description": "Stream CSV data chunks from Snowflake",
"properties": {
"catalog": {
"description": "The name of the specified database catalog to read input data from.",
"type": "string",
"x-versionadded": "v2.28"
},
"cloudStorageCredentialId": {
"description": "Either the populated value of the field or [redacted] due to permission settings",
"oneOf": [
{
"description": "The ID of the credential holding information about a user with read access to the cloud storage.",
"type": [
"string",
"null"
]
},
{
"enum": [
"[redacted]"
],
"type": "string"
}
]
},
"cloudStorageType": {
"default": "s3",
"description": "Type name for cloud storage",
"enum": [
"azure",
"gcp",
"s3"
],
"type": "string"
},
"credentialId": {
"description": "Either the populated value of the field or [redacted] due to permission settings",
"oneOf": [
{
"description": "The ID of the credential holding information about a user with read access to the Snowflake data source.",
"type": [
"string",
"null"
]
},
{
"enum": [
"[redacted]"
],
"type": "string"
}
]
},
"dataStoreId": {
"description": "Either the populated value of the field or [redacted] due to permission settings",
"oneOf": [
{
"description": "ID of the data store to connect to",
"type": "string"
},
{
"enum": [
"[redacted]"
],
"type": "string"
}
]
},
"externalStage": {
"description": "External storage",
"type": "string"
},
"query": {
"description": "A self-supplied SELECT statement of the dataset you wish to score. Helpful for supplying a more fine-grained selection of data not achievable through specification of \"table\" and/or \"schema\" parameters exclusively.If this job is executed with a job definition, then template variables are available which will be substituted for timestamps: {{ current_run_timestamp }}, {{ last_completed_run_time }}, {{ last_scheduled_run_time }}, {{ next_scheduled_run_time }}, {{ current_run_time }}",
"type": "string"
},
"schema": {
"description": "The name of the specified database schema to read input data from.",
"type": "string"
},
"table": {
"description": "The name of the specified database table to read input data from.",
"type": "string"
},
"type": {
"description": "Type name for this intake type",
"enum": [
"snowflake"
],
"type": "string"
}
},
"required": [
"dataStoreId",
"externalStage",
"type"
],
"type": "object"
},
{
"description": "Stream CSV data chunks from Azure Synapse",
"properties": {
"cloudStorageCredentialId": {
"description": "Either the populated value of the field or [redacted] due to permission settings",
"oneOf": [
{
"description": "The ID of the Azure credential holding information about a user with read access to the cloud storage.",
"type": [
"string",
"null"
]
},
{
"enum": [
"[redacted]"
],
"type": "string"
}
]
},
"credentialId": {
"description": "Either the populated value of the field or [redacted] due to permission settings",
"oneOf": [
{
"description": "The ID of the credential holding information about a user with read access to the JDBC data source.",
"type": [
"string",
"null"
]
},
{
"enum": [
"[redacted]"
],
"type": "string"
}
]
},
"dataStoreId": {
"description": "Either the populated value of the field or [redacted] due to permission settings",
"oneOf": [
{
"description": "ID of the data store to connect to",
"type": "string"
},
{
"enum": [
"[redacted]"
],
"type": "string"
}
]
},
"externalDataSource": {
"description": "External datasource name",
"type": "string"
},
"query": {
"description": "A self-supplied SELECT statement of the dataset you wish to score. Helpful for supplying a more fine-grained selection of data not achievable through specification of \"table\" and/or \"schema\" parameters exclusively.If this job is executed with a job definition, then template variables are available which will be substituted for timestamps: {{ current_run_timestamp }}, {{ last_completed_run_time }}, {{ last_scheduled_run_time }}, {{ next_scheduled_run_time }}, {{ current_run_time }}",
"type": "string"
},
"schema": {
"description": "The name of the specified database schema to read input data from.",
"type": "string"
},
"table": {
"description": "The name of the specified database table to read input data from.",
"type": "string"
},
"type": {
"description": "Type name for this intake type",
"enum": [
"synapse"
],
"type": "string"
}
},
"required": [
"dataStoreId",
"externalDataSource",
"type"
],
"type": "object"
},
{
"description": "Stream CSV data chunks from DSS dataset",
"properties": {
"datasetId": {
"description": "Either the populated value of the field or [redacted] due to permission settings",
"oneOf": [
{
"description": "The ID of the dataset",
"type": "string"
},
{
"enum": [
"[redacted]"
],
"type": "string"
}
]
},
"partition": {
"default": null,
"description": "Partition used to predict",
"enum": [
"holdout",
"validation",
"allBacktests",
null
],
"type": [
"string",
"null"
]
},
"projectId": {
"description": "The ID of the project",
"type": "string"
},
"type": {
"description": "Type name for this intake type",
"enum": [
"dss"
],
"type": "string"
}
},
"required": [
"projectId",
"type"
],
"type": "object"
},
{
"properties": {
"path": {
"description": "Path to data on host filesystem",
"type": "string"
},
"type": {
"description": "Type name for this intake type",
"enum": [
"filesystem"
],
"type": "string"
}
},
"required": [
"path",
"type"
],
"type": "object"
},
{
"description": "Stream CSV data chunks from HTTP",
"properties": {
"type": {
"description": "Type name for this intake type",
"enum": [
"http"
],
"type": "string"
},
"url": {
"description": "URL for the CSV file",
"format": "url",
"type": "string"
}
},
"required": [
"type",
"url"
],
"type": "object"
},
{
"description": "Stream CSV data chunks from JDBC",
"properties": {
"catalog": {
"description": "The name of the specified database catalog to read input data from.",
"type": "string",
"x-versionadded": "v2.22"
},
"credentialId": {
"description": "Either the populated value of the field or [redacted] due to permission settings",
"oneOf": [
{
"description": "The ID of the credential holding information about a user with read access to the JDBC data source.",
"type": [
"string",
"null"
]
},
{
"enum": [
"[redacted]"
],
"type": "string"
}
]
},
"dataStoreId": {
"description": "Either the populated value of the field or [redacted] due to permission settings",
"oneOf": [
{
"description": "ID of the data store to connect to",
"type": "string"
},
{
"enum": [
"[redacted]"
],
"type": "string"
}
]
},
"fetchSize": {
"description": "A user specified fetch size. Changing it can be used to balance throughput and memory usage. Deprecated and ignored since v2.21.",
"maximum": 1000000,
"minimum": 1,
"type": "integer"
},
"query": {
"description": "A self-supplied SELECT statement of the dataset you wish to score. Helpful for supplying a more fine-grained selection of data not achievable through specification of \"table\" and/or \"schema\" parameters exclusively.If this job is executed with a job definition, then template variables are available which will be substituted for timestamps: {{ current_run_timestamp }}, {{ last_completed_run_time }}, {{ last_scheduled_run_time }}, {{ next_scheduled_run_time }}, {{ current_run_time }}",
"type": "string"
},
"schema": {
"description": "The name of the specified database schema to read input data from.",
"type": "string"
},
"table": {
"description": "The name of the specified database table to read input data from.",
"type": "string"
},
"type": {
"description": "Type name for this intake type",
"enum": [
"jdbc"
],
"type": "string"
}
},
"required": [
"dataStoreId",
"type"
],
"type": "object"
},
{
"description": "Stream CSV data chunks from local file storage",
"properties": {
"async": {
"description": "The default behavior (async: true) will still submit the job to the queue and start processing as soon as the upload is started.Setting it to false will postpone submitting the job to the queue until all data has been uploaded.This is helpful if the user is on a bad connection and bottlednecked by the upload speed. Instead of blocking the queue this will allow others to submit to the queue until the upload has finished.",
"type": [
"boolean",
"null"
],
"x-versionadded": "v2.28"
},
"multipart": {
"description": "specify if the data will be uploaded in multiple parts instead of a single file",
"type": "boolean",
"x-versionadded": "v2.27"
},
"type": {
"description": "Type name for this intake type",
"enum": [
"local_file",
"localFile"
],
"type": "string"
}
},
"required": [
"type"
],
"type": "object"
},
{
"description": "Stream CSV data chunks from Datasphere using browser-datasphere",
"properties": {
"catalog": {
"description": "The name of the specified database catalog to read input data from.",
"type": "string"
},
"credentialId": {
"description": "Either the populated value of the field or [redacted] due to permission settings",
"oneOf": [
{
"description": "The ID of the credential holding information about a user with read access to the data source.",
"type": "string"
},
{
"enum": [
"[redacted]"
],
"type": "string"
}
]
},
"dataStoreId": {
"description": "Either the populated value of the field or [redacted] due to permission settings",
"oneOf": [
{
"description": "ID of the data store to connect to",
"type": "string"
},
{
"enum": [
"[redacted]"
],
"type": "string"
}
]
},
"schema": {
"description": "The name of the specified database schema to read input data from.",
"type": "string"
},
"table": {
"description": "The name of the specified database table to read input data from.",
"type": "string"
},
"type": {
"description": "Type name for this intake type",
"enum": [
"datasphere"
],
"type": "string"
}
},
"required": [
"credentialId",
"dataStoreId",
"schema",
"table",
"type"
],
"type": "object",
"x-versionadded": "v2.35"
}
]
},
"maxExplanations": {
"default": 0,
"description": "Number of explanations requested. Will be ordered by strength.",
"maximum": 100,
"minimum": 0,
"type": "integer"
},
"maxNgramExplanations": {
"description": "The maximum number of text ngram explanations to supply per row of the dataset. The default recommended `maxNgramExplanations` is `all` (no limit)",
"oneOf": [
{
"minimum": 0,
"type": "integer"
},
{
"enum": [
"all"
],
"type": "string"
}
],
"x-versionadded": "v2.30"
},
"modelId": {
"description": "ID of leaderboard model which is used in job for processing predictions dataset",
"type": "string",
"x-versionadded": "v2.30"
},
"modelPackageId": {
"description": "ID of model package from registry is used in job for processing predictions dataset",
"type": "string",
"x-versionadded": "v2.30"
},
"monitoringAggregation": {
"description": "Defines the aggregation policy for monitoring jobs.",
"properties": {
"retentionPolicy": {
"default": "percentage",
"description": "Monitoring jobs retention policy for aggregation.",
"enum": [
"samples",
"percentage"
],
"type": "string"
},
"retentionValue": {
"default": 0,
"description": "Amount/percentage of samples to retain.",
"type": "integer"
}
},
"type": "object"
},
"monitoringBatchPrefix": {
"description": "Name of the batch to create with this job",
"type": [
"string",
"null"
]
},
"monitoringColumns": {
"description": "Column names mapping for monitoring",
"properties": {
"actedUponColumn": {
"description": "Name of column that contains value for acted_on.",
"type": "string"
},
"actualsTimestampColumn": {
"description": "Name of column that contains actual timestamps.",
"type": "string"
},
"actualsValueColumn": {
"description": "Name of column that contains actuals value.",
"type": "string"
},
"associationIdColumn": {
"description": "Name of column that contains association Id.",
"type": "string"
},
"customMetricId": {
"description": "Id of custom metric to process values for.",
"type": "string"
},
"customMetricTimestampColumn": {
"description": "Name of column that contains custom metric values timestamps.",
"type": "string"
},
"customMetricTimestampFormat": {
"description": "Format of timestamps from customMetricTimestampColumn.",
"type": "string"
},
"customMetricValueColumn": {
"description": "Name of column that contains values for custom metric.",
"type": "string"
},
"monitoredStatusColumn": {
"description": "Column name used to mark monitored rows.",
"type": "string"
},
"predictionsColumns": {
"description": "Name of the column(s) which contain prediction values.",
"oneOf": [
{
"description": "Map containing column name(s) and class name(s) for multiclass problem.",
"items": {
"properties": {
"className": {
"description": "Class name.",
"type": "string"
},
"columnName": {
"description": "Column name that contains the prediction for a specific class.",
"type": "string"
}
},
"required": [
"className",
"columnName"
],
"type": "object"
},
"maxItems": 100,
"type": "array"
},
{
"description": "Column name that contains the prediction for regressions problem.",
"type": "string"
}
]
},
"reportDrift": {
"description": "True to report drift, False otherwise.",
"type": "boolean"
},
"reportPredictions": {
"description": "True to report prediction, False otherwise.",
"type": "boolean"
},
"uniqueRowIdentifierColumns": {
"description": "Column(s) name of unique row identifiers.",
"items": {
"type": "string"
},
"maxItems": 100,
"type": "array"
}
},
"type": "object"
},
"monitoringOutputSettings": {
"description": "Output settings for monitoring jobs",
"properties": {
"monitoredStatusColumn": {
"description": "Column name used to mark monitored rows.",
"type": "string"
},
"uniqueRowIdentifierColumns": {
"description": "Column(s) name of unique row identifiers.",
"items": {
"type": "string"
},
"maxItems": 100,
"type": "array"
}
},
"required": [
"monitoredStatusColumn",
"uniqueRowIdentifierColumns"
],
"type": "object"
},
"numConcurrent": {
"description": "Number of simultaneous requests to run against the prediction instance",
"minimum": 0,
"type": "integer"
},
"outputSettings": {
"description": "The response option configured for this job",
"oneOf": [
{
"description": "Save CSV data chunks to Azure Blob Storage",
"properties": {
"credentialId": {
"description": "Either the populated value of the field or [redacted] due to permission settings",
"oneOf": [
{
"description": "Use the specified credential to access the url",
"type": [
"string",
"null"
]
},
{
"enum": [
"[redacted]"
],
"type": "string"
}
]
},
"format": {
"default": "csv",
"description": "Type of output file format",
"enum": [
"csv",
"parquet"
],
"type": "string",
"x-versionadded": "v2.25"
},
"partitionColumns": {
"description": "For Parquet directory-scoring only. The column names of the intake data of which to partition the dataset. Columns are partitioned in the order they are given. At least one value is required if scoring to a directory (meaning the output url ends with a slash (\"/\").",
"items": {
"type": "string"
},
"maxItems": 100,
"type": "array",
"x-versionadded": "v2.26"
},
"type": {
"description": "Type name for this output type",
"enum": [
"azure"
],
"type": "string"
},
"url": {
"description": "URL for the file or directory",
"format": "url",
"type": "string"
}
},
"required": [
"type",
"url"
],
"type": "object"
},
{
"description": "Save CSV data chunks to Google Storage",
"properties": {
"credentialId": {
"description": "Either the populated value of the field or [redacted] due to permission settings",
"oneOf": [
{
"description": "Use the specified credential to access the url",
"type": [
"string",
"null"
]
},
{
"enum": [
"[redacted]"
],
"type": "string"
}
]
},
"format": {
"default": "csv",
"description": "Type of input file format",
"enum": [
"csv",
"parquet"
],
"type": "string",
"x-versionadded": "v2.25"
},
"partitionColumns": {
"description": "For Parquet directory-scoring only. The column names of the intake data of which to partition the dataset. Columns are partitioned in the order they are given. At least one value is required if scoring to a directory (meaning the output url ends with a slash (\"/\").",
"items": {
"type": "string"
},
"maxItems": 100,
"type": "array",
"x-versionadded": "v2.26"
},
"type": {
"description": "Type name for this output type",
"enum": [
"gcp"
],
"type": "string"
},
"url": {
"description": "URL for the CSV file",
"format": "url",
"type": "string"
}
},
"required": [
"type",
"url"
],
"type": "object"
},
{
"description": "Save CSV data chunks to Google BigQuery in bulk",
"properties": {
"bucket": {
"description": "The name of gcs bucket for data loading",
"type": "string"
},
"credentialId": {
"description": "Either the populated value of the field or [redacted] due to permission settings",
"oneOf": [
{
"description": "The ID of the GCP credentials",
"type": "string"
},
{
"enum": [
"[redacted]"
],
"type": "string"
}
]
},
"dataset": {
"description": "The name of the specified big query dataset to write data back",
"type": "string"
},
"table": {
"description": "The name of the specified big query table to write data back",
"type": "string"
},
"type": {
"description": "Type name for this output type",
"enum": [
"bigquery"
],
"type": "string"
}
},
"required": [
"bucket",
"credentialId",
"dataset",
"table",
"type"
],
"type": "object"
},
{
"description": "Saves CSV data chunks to Amazon Cloud Storage S3",
"properties": {
"credentialId": {
"description": "Either the populated value of the field or [redacted] due to permission settings",
"oneOf": [
{
"description": "Use the specified credential to access the url",
"type": [
"string",
"null"
]
},
{
"enum": [
"[redacted]"
],
"type": "string"
}
]
},
"endpointUrl": {
"description": "Endpoint URL for the S3 connection (omit to use the default)",
"format": "url",
"type": "string",
"x-versionadded": "v2.29"
},
"format": {
"default": "csv",
"description": "Type of output file format",
"enum": [
"csv",
"parquet"
],
"type": "string",
"x-versionadded": "v2.25"
},
"partitionColumns": {
"description": "For Parquet directory-scoring only. The column names of the intake data of which to partition the dataset. Columns are partitioned in the order they are given. At least one value is required if scoring to a directory (meaning the output url ends with a slash (\"/\").",
"items": {
"type": "string"
},
"maxItems": 100,
"type": "array",
"x-versionadded": "v2.26"
},
"serverSideEncryption": {
"description": "Configure Server-Side Encryption for S3 output",
"properties": {
"algorithm": {
"description": "The server-side encryption algorithm used when storing this object in Amazon S3 (for example, AES256, aws:kms).",
"type": "string"
},
"customerAlgorithm": {
"description": "Specifies the algorithm to use to when encrypting the object (for example, AES256).",
"type": "string"
},
"customerKey": {
"description": "Specifies the customer-provided encryption key for Amazon S3 to use in encrypting data. This value is used to store the object and then it is discarded; Amazon S3 does not store the encryption key. The key must be appropriate for use with the algorithm specified in customerAlgorithm. The key must be sent as an base64 encoded string.",
"type": "string"
},
"kmsEncryptionContext": {
"description": "Specifies the Amazon Web Services KMS Encryption Context to use for object encryption. The value of this header is a base64-encoded UTF-8 string holding JSON with the encryption context key-value pairs.",
"type": "string"
},
"kmsKeyId": {
"description": "Specifies the ID of the symmetric customer managed key to use for object encryption.",
"type": "string"
}
},
"type": "object"
},
"type": {
"description": "Type name for this output type",
"enum": [
"s3"
],
"type": "string"
},
"url": {
"description": "URL for the CSV file",
"format": "url",
"type": "string"
}
},
"required": [
"type",
"url"
],
"type": "object"
},
{
"description": "Save CSV data chunks to Snowflake in bulk",
"properties": {
"catalog": {
"description": "The name of the specified database catalog to write output data to.",
"type": "string",
"x-versionadded": "v2.28"
},
"cloudStorageCredentialId": {
"description": "Either the populated value of the field or [redacted] due to permission settings",
"oneOf": [
{
"description": "The ID of the credential holding information about a user with write access to the cloud storage.",
"type": [
"string",
"null"
]
},
{
"enum": [
"[redacted]"
],
"type": "string"
}
]
},
"cloudStorageType": {
"default": "s3",
"description": "Type name for cloud storage",
"enum": [
"azure",
"gcp",
"s3"
],
"type": "string"
},
"createTableIfNotExists": {
"default": false,
"description": "Attempt to create the table first if no existing one is detected, before writing data with the strategy defined in the `statementType` parameter.",
"type": "boolean",
"x-versionadded": "v2.25"
},
"credentialId": {
"description": "Either the populated value of the field or [redacted] due to permission settings",
"oneOf": [
{
"description": "The ID of the credential holding information about a user with write access to the Snowflake data source.",
"type": [
"string",
"null"
]
},
{
"enum": [
"[redacted]"
],
"type": "string"
}
]
},
"dataStoreId": {
"description": "Either the populated value of the field or [redacted] due to permission settings",
"oneOf": [
{
"description": "ID of the data store to connect to",
"type": "string"
},
{
"enum": [
"[redacted]"
],
"type": "string"
}
]
},
"externalStage": {
"description": "External storage",
"type": "string"
},
"schema": {
"description": "The name of the specified database schema to write results to.",
"type": "string"
},
"statementType": {
"description": "The statement type to use when writing the results.",
"enum": [
"insert",
"create_table",
"createTable"
],
"type": "string"
},
"table": {
"description": "The name of the specified database table to write results to.",
"type": "string"
},
"type": {
"description": "Type name for this output type",
"enum": [
"snowflake"
],
"type": "string"
}
},
"required": [
"dataStoreId",
"externalStage",
"statementType",
"table",
"type"
],
"type": "object"
},
{
"description": "Save CSV data chunks to Azure Synapse in bulk",
"properties": {
"cloudStorageCredentialId": {
"description": "Either the populated value of the field or [redacted] due to permission settings",
"oneOf": [
{
"description": "The ID of the credential holding information about a user with write access to the cloud storage.",
"type": [
"string",
"null"
]
},
{
"enum": [
"[redacted]"
],
"type": "string"
}
]
},
"createTableIfNotExists": {
"default": false,
"description": "Attempt to create the table first if no existing one is detected, before writing data with the strategy defined in the `statementType` parameter.",
"type": "boolean",
"x-versionadded": "v2.25"
},
"credentialId": {
"description": "Either the populated value of the field or [redacted] due to permission settings",
"oneOf": [
{
"description": "The ID of the credential holding information about a user with write access to the JDBC data source.",
"type": [
"string",
"null"
]
},
{
"enum": [
"[redacted]"
],
"type": "string"
}
]
},
"dataStoreId": {
"description": "Either the populated value of the field or [redacted] due to permission settings",
"oneOf": [
{
"description": "ID of the data store to connect to",
"type": "string"
},
{
"enum": [
"[redacted]"
],
"type": "string"
}
]
},
"externalDataSource": {
"description": "External data source name",
"type": "string"
},
"schema": {
"description": "The name of the specified database schema to write results to.",
"type": "string"
},
"statementType": {
"description": "The statement type to use when writing the results.",
"enum": [
"insert",
"create_table",
"createTable"
],
"type": "string"
},
"table": {
"description": "The name of the specified database table to write results to.",
"type": "string"
},
"type": {
"description": "Type name for this output type",
"enum": [
"synapse"
],
"type": "string"
}
},
"required": [
"dataStoreId",
"externalDataSource",
"statementType",
"table",
"type"
],
"type": "object"
},
{
"properties": {
"path": {
"description": "Path to results on host filesystem",
"type": "string"
},
"type": {
"description": "Type name for this output type",
"enum": [
"filesystem"
],
"type": "string"
}
},
"required": [
"path",
"type"
],
"type": "object"
},
{
"description": "Save CSV data chunks to HTTP data endpoint",
"properties": {
"headers": {
"description": "Extra headers to send with the request",
"type": "object"
},
"method": {
"description": "Method to use when saving the CSV file",
"enum": [
"POST",
"PUT"
],
"type": "string"
},
"type": {
"description": "Type name for this output type",
"enum": [
"http"
],
"type": "string"
},
"url": {
"description": "URL for the CSV file",
"format": "url",
"type": "string"
}
},
"required": [
"method",
"type",
"url"
],
"type": "object"
},
{
"description": "Save CSV data chunks via JDBC",
"properties": {
"catalog": {
"description": "The name of the specified database catalog to write output data to.",
"type": "string",
"x-versionadded": "v2.22"
},
"commitInterval": {
"default": 600,
"description": "Defines a time interval in seconds between each commit is done to the JDBC source. If set to 0, the batch prediction operation will write the entire job before committing.",
"maximum": 86400,
"minimum": 0,
"type": "integer",
"x-versionadded": "v2.21"
},
"createTableIfNotExists": {
"default": false,
"description": "Attempt to create the table first if no existing one is detected, before writing data with the strategy defined in the `statementType` parameter.",
"type": "boolean",
"x-versionadded": "v2.24"
},
"credentialId": {
"description": "Either the populated value of the field or [redacted] due to permission settings",
"oneOf": [
{
"description": "The ID of the credential holding information about a user with write access to the JDBC data source.",
"type": [
"string",
"null"
]
},
{
"enum": [
"[redacted]"
],
"type": "string"
}
]
},
"dataStoreId": {
"description": "Either the populated value of the field or [redacted] due to permission settings",
"oneOf": [
{
"description": "ID of the data store to connect to",
"type": "string"
},
{
"enum": [
"[redacted]"
],
"type": "string"
}
]
},
"schema": {
"description": "The name of the specified database schema to write the results to.",
"type": "string"
},
"statementType": {
"description": "The statement type to use when writing the results. Deprecation Warning: Use of `create_table` is now discouraged. Use one of the other possibilities along with the parameter `createTableIfNotExists` set to `true`.",
"enum": [
"createTable",
"create_table",
"insert",
"insertUpdate",
"insert_update",
"update"
],
"type": "string"
},
"table": {
"description": "The name of the specified database table to write the results to.If this job is executed with a job definition, then template variables are available which will be substituted for timestamps: {{ current_run_timestamp }}, {{ last_completed_run_time }}, {{ last_scheduled_run_time }}, {{ next_scheduled_run_time }}, {{ current_run_time }}",
"type": "string"
},
"type": {
"description": "Type name for this intake type",
"enum": [
"jdbc"
],
"type": "string"
},
"updateColumns": {
"description": "The column names to be updated if statementType is set to either update or upsert.",
"items": {
"type": "string"
},
"maxItems": 100,
"type": "array"
},
"whereColumns": {
"description": "The column names to be used in the where clause if statementType is set to update or upsert.",
"items": {
"type": "string"
},
"maxItems": 100,
"type": "array"
}
},
"required": [
"dataStoreId",
"statementType",
"table",
"type"
],
"type": "object"
},
{
"description": "Save CSV data chunks to local file storage",
"properties": {
"type": {
"description": "Type name for this output type",
"enum": [
"local_file",
"localFile"
],
"type": "string"
}
},
"required": [
"type"
],
"type": "object"
},
{
"description": "Saves CSV data chunks to Datasphere using browser-datasphere",
"properties": {
"catalog": {
"description": "The name of the specified database catalog to read input data from.",
"type": "string"
},
"credentialId": {
"description": "Either the populated value of the field or [redacted] due to permission settings",
"oneOf": [
{
"description": "The ID of the credential holding information about a user with write access to the data destination.",
"type": "string"
},
{
"enum": [
"[redacted]"
],
"type": "string"
}
]
},
"dataStoreId": {
"description": "Either the populated value of the field or [redacted] due to permission settings",
"oneOf": [
{
"description": "The ID of the data store to connect to",
"type": "string"
},
{
"enum": [
"[redacted]"
],
"type": "string"
}
]
},
"schema": {
"description": "The name of the specified database schema to read input data from.",
"type": "string"
},
"table": {
"description": "The name of the specified database table to read input data from.",
"type": "string"
},
"type": {
"description": "The type name for this output type",
"enum": [
"datasphere"
],
"type": "string"
}
},
"required": [
"credentialId",
"dataStoreId",
"schema",
"table",
"type"
],
"type": "object",
"x-versionadded": "v2.35"
}
]
},
"passthroughColumns": {
"description": "Pass through columns from the original dataset",
"items": {
"description": "A column name from the original dataset to pass through to the resulting predictions",
"maxLength": 50,
"minLength": 1,
"type": "string"
},
"maxItems": 100,
"type": "array"
},
"passthroughColumnsSet": {
"description": "Pass through all columns from the original dataset",
"enum": [
"all"
],
"type": "string"
},
"pinnedModelId": {
"description": "Specify a model ID used for scoring",
"type": "string"
},
"predictionInstance": {
"description": "Override the default prediction instance from the deployment when scoring this job.",
"properties": {
"apiKey": {
"description": "By default, prediction requests will use the API key of the user that created the job. This allows you to make requests on behalf of other users.",
"type": "string"
},
"datarobotKey": {
"description": "If running a job against a prediction instance in the Managed AI Cloud, you must provide the organization level DataRobot-Key.",
"type": "string"
},
"hostName": {
"description": "Override the default host name of the deployment with this.",
"type": "string"
},
"sslEnabled": {
"default": true,
"description": "Use SSL (HTTPS) when communicating with the overriden prediction server.",
"type": "boolean"
}
},
"required": [
"hostName",
"sslEnabled"
],
"type": "object"
},
"predictionWarningEnabled": {
"description": "Enable prediction warnings.",
"type": [
"boolean",
"null"
]
},
"redactedFields": {
"description": "A list of qualified field names from intake- and/or outputSettings that was redacted due to permissions and sharing settings. For example: intakeSettings.dataStoreId",
"items": {
"description": "Field names that are potentially redacted",
"type": "string"
},
"type": "array",
"x-versionadded": "v2.30"
},
"skipDriftTracking": {
"default": false,
"description": "Skip drift tracking for this job.",
"type": "boolean"
},
"thresholdHigh": {
"description": "Compute explanations for predictions above this threshold",
"type": "number"
},
"thresholdLow": {
"description": "Compute explanations for predictions below this threshold",
"type": "number"
},
"timeseriesSettings": {
"description": "Time Series settings included of this job is a Time Series job.",
"oneOf": [
{
"properties": {
"forecastPoint": {
"description": "Used for forecast predictions in order to override the inferred forecast point from the dataset.",
"format": "date-time",
"type": "string"
},
"relaxKnownInAdvanceFeaturesCheck": {
"default": false,
"description": "If activated, missing values in the known in advance features are allowed in the forecast window at prediction time. If omitted or false, missing values are not allowed.",
"type": "boolean"
},
"type": {
"description": "Forecast mode makes predictions using forecastPoint or rows in the dataset without target.",
"enum": [
"forecast"
],
"type": "string"
}
},
"required": [
"type"
],
"type": "object"
},
{
"properties": {
"forecastPointPolicy": {
"description": "Forecast point policy",
"properties": {
"configuration": {
"description": "Customize if forecast point based on job run time needs to be shifted.",
"properties": {
"offset": {
"description": "Offset to apply to scheduled run time of the job in a ISO-8601 format toobtain a relative forecast point. Example of the positive offset 'P2DT5H3M', example of the negative offset '-P2DT5H4M'",
"format": "offset",
"type": "string"
}
},
"required": [
"offset"
],
"type": "object"
},
"type": {
"description": "Type of the forecast point policy. Forecast point will be based on the scheduled run time of the job or the current moment in UTC if job was launched manually. Run time can be adjusted backwards or forwards.",
"enum": [
"jobRunTimeBased"
],
"type": "string"
}
},
"required": [
"type"
],
"type": "object"
},
"relaxKnownInAdvanceFeaturesCheck": {
"default": false,
"description": "If activated, missing values in the known in advance features are allowed in the forecast window at prediction time. If omitted or false, missing values are not allowed.",
"type": "boolean"
},
"type": {
"description": "Forecast mode makes predictions using forecastPoint or rows in the dataset without target.",
"enum": [
"forecast"
],
"type": "string"
}
},
"required": [
"forecastPointPolicy",
"type"
],
"type": "object"
},
{
"properties": {
"predictionsEndDate": {
"description": "Used for historical predictions in order to override date to which predictions should be calculated. By default value will be inferred automatically from the dataset.",
"format": "date-time",
"type": "string"
},
"predictionsStartDate": {
"description": "Used for historical predictions in order to override date from which predictions should be calculated. By default value will be inferred automatically from the dataset.",
"format": "date-time",
"type": "string"
},
"relaxKnownInAdvanceFeaturesCheck": {
"default": false,
"description": "If activated, missing values in the known in advance features are allowed in the forecast window at prediction time. If omitted or false, missing values are not allowed.",
"type": "boolean"
},
"type": {
"description": "Historical mode enables bulk predictions which calculates predictions for all possible forecast points and forecast distances in the dataset within the predictionsStartDate/predictionsEndDate range.",
"enum": [
"historical"
],
"type": "string"
}
},
"required": [
"type"
],
"type": "object"
}
],
"x-versionadded": "v2.20"
}
},
"required": [
"abortOnError",
"csvSettings",
"disableRowLevelErrorHandling",
"includePredictionStatus",
"includeProbabilities",
"includeProbabilitiesClasses",
"intakeSettings",
"maxExplanations",
"numConcurrent",
"redactedFields",
"skipDriftTracking"
],
"type": "object"
},
"created": {
"description": "When was this job created",
"format": "date-time",
"type": "string"
},
"createdBy": {
"description": "Who created this job",
"properties": {
"fullName": {
"description": "The full name of the user who created this job (if defined by the user)",
"type": [
"string",
"null"
]
},
"userId": {
"description": "The User ID of the user who created this job",
"type": "string"
},
"username": {
"description": "The username (e-mail address) of the user who created this job",
"type": "string"
}
},
"required": [
"fullName",
"userId",
"username"
],
"type": "object"
},
"enabled": {
"default": false,
"description": "If this job definition is enabled as a scheduled job.",
"type": "boolean"
},
"id": {
"description": "The ID of the Batch job definition",
"type": "string"
},
"lastFailedRunTime": {
"description": "Last time this job had a failed run",
"format": "date-time",
"type": [
"string",
"null"
]
},
"lastScheduledRunTime": {
"description": "Last time this job was scheduled to run (though not guaranteed it actually ran at that time)",
"format": "date-time",
"type": [
"string",
"null"
]
},
"lastStartedJobStatus": {
"description": "The status of the latest job launched to the queue (if any).",
"enum": [
"INITIALIZING",
"RUNNING",
"COMPLETED",
"ABORTED",
"FAILED"
],
"type": [
"string",
"null"
]
},
"lastStartedJobTime": {
"description": "The last time (if any) a job was launched.",
"format": "date-time",
"type": [
"string",
"null"
]
},
"lastSuccessfulRunTime": {
"description": "Last time this job had a successful run",
"format": "date-time",
"type": [
"string",
"null"
]
},
"name": {
"description": "A human-readable name for the definition, must be unique across organisations",
"type": "string"
},
"nextScheduledRunTime": {
"description": "Next time this job is scheduled to run",
"format": "date-time",
"type": [
"string",
"null"
]
},
"schedule": {
"description": "The scheduling information defining how often and when to execute this job to the Job Scheduling service. Optional if enabled = False.",
"properties": {
"dayOfMonth": {
"description": "The date(s) of the month that the job will run. Allowed values are either ``[1 ... 31]`` or ``[\"*\"]`` for all days of the month. This field is additive with ``dayOfWeek``, meaning the job will run both on the date(s) defined in this field and the day specified by ``dayOfWeek`` (for example, dates 1st, 2nd, 3rd, plus every Tuesday). If ``dayOfMonth`` is set to ``[\"*\"]`` and ``dayOfWeek`` is defined, the scheduler will trigger on every day of the month that matches ``dayOfWeek`` (for example, Tuesday the 2nd, 9th, 16th, 23rd, 30th). Invalid dates such as February 31st are ignored.",
"items": {
"enum": [
"*",
1,
2,
3,
4,
5,
6,
7,
8,
9,
10,
11,
12,
13,
14,
15,
16,
17,
18,
19,
20,
21,
22,
23,
24,
25,
26,
27,
28,
29,
30,
31
],
"type": [
"number",
"string"
]
},
"maxItems": 31,
"type": "array"
},
"dayOfWeek": {
"description": "The day(s) of the week that the job will run. Allowed values are ``[0 .. 6]``, where (Sunday=0), or ``[\"*\"]``, for all days of the week. Strings, either 3-letter abbreviations or the full name of the day, can be used interchangeably (e.g., \"sunday\", \"Sunday\", \"sun\", or \"Sun\", all map to ``[0]``. This field is additive with ``dayOfMonth``, meaning the job will run both on the date specified by ``dayOfMonth`` and the day defined in this field.",
"items": {
"enum": [
"*",
0,
1,
2,
3,
4,
5,
6,
"sunday",
"SUNDAY",
"Sunday",
"monday",
"MONDAY",
"Monday",
"tuesday",
"TUESDAY",
"Tuesday",
"wednesday",
"WEDNESDAY",
"Wednesday",
"thursday",
"THURSDAY",
"Thursday",
"friday",
"FRIDAY",
"Friday",
"saturday",
"SATURDAY",
"Saturday",
"sun",
"SUN",
"Sun",
"mon",
"MON",
"Mon",
"tue",
"TUE",
"Tue",
"wed",
"WED",
"Wed",
"thu",
"THU",
"Thu",
"fri",
"FRI",
"Fri",
"sat",
"SAT",
"Sat"
],
"type": [
"number",
"string"
]
},
"maxItems": 7,
"type": "array"
},
"hour": {
"description": "The hour(s) of the day that the job will run. Allowed values are either ``[\"*\"]`` meaning every hour of the day or ``[0 ... 23]``.",
"items": {
"enum": [
"*",
0,
1,
2,
3,
4,
5,
6,
7,
8,
9,
10,
11,
12,
13,
14,
15,
16,
17,
18,
19,
20,
21,
22,
23
],
"type": [
"number",
"string"
]
},
"maxItems": 24,
"type": "array"
},
"minute": {
"description": "The minute(s) of the day that the job will run. Allowed values are either ``[\"*\"]`` meaning every minute of the day or``[0 ... 59]``.",
"items": {
"enum": [
"*",
0,
1,
2,
3,
4,
5,
6,
7,
8,
9,
10,
11,
12,
13,
14,
15,
16,
17,
18,
19,
20,
21,
22,
23,
24,
25,
26,
27,
28,
29,
30,
31,
32,
33,
34,
35,
36,
37,
38,
39,
40,
41,
42,
43,
44,
45,
46,
47,
48,
49,
50,
51,
52,
53,
54,
55,
56,
57,
58,
59
],
"type": [
"number",
"string"
]
},
"maxItems": 60,
"type": "array"
},
"month": {
"description": "The month(s) of the year that the job will run. Allowed values are either ``[1 ... 12]`` or ``[\"*\"]`` for all months of the year. Strings, either 3-letter abbreviations or the full name of the month, can be used interchangeably (e.g., \"jan\" or \"october\"). Months that are not compatible with ``dayOfMonth`` are ignored, for example ``{\"dayOfMonth\": [31], \"month\":[\"feb\"]}``.",
"items": {
"enum": [
"*",
1,
2,
3,
4,
5,
6,
7,
8,
9,
10,
11,
12,
"january",
"JANUARY",
"January",
"february",
"FEBRUARY",
"February",
"march",
"MARCH",
"March",
"april",
"APRIL",
"April",
"may",
"MAY",
"May",
"june",
"JUNE",
"June",
"july",
"JULY",
"July",
"august",
"AUGUST",
"August",
"september",
"SEPTEMBER",
"September",
"october",
"OCTOBER",
"October",
"november",
"NOVEMBER",
"November",
"december",
"DECEMBER",
"December",
"jan",
"JAN",
"Jan",
"feb",
"FEB",
"Feb",
"mar",
"MAR",
"Mar",
"apr",
"APR",
"Apr",
"jun",
"JUN",
"Jun",
"jul",
"JUL",
"Jul",
"aug",
"AUG",
"Aug",
"sep",
"SEP",
"Sep",
"oct",
"OCT",
"Oct",
"nov",
"NOV",
"Nov",
"dec",
"DEC",
"Dec"
],
"type": [
"number",
"string"
]
},
"maxItems": 12,
"type": "array"
}
},
"required": [
"dayOfMonth",
"dayOfWeek",
"hour",
"minute",
"month"
],
"type": "object"
},
"updated": {
"description": "When was this job last updated",
"format": "date-time",
"type": "string"
},
"updatedBy": {
"description": "Who created this job",
"properties": {
"fullName": {
"description": "The full name of the user who created this job (if defined by the user)",
"type": [
"string",
"null"
]
},
"userId": {
"description": "The User ID of the user who created this job",
"type": "string"
},
"username": {
"description": "The username (e-mail address) of the user who created this job",
"type": "string"
}
},
"required": [
"fullName",
"userId",
"username"
],
"type": "object"
}
},
"required": [
"batchPredictionJob",
"created",
"createdBy",
"enabled",
"id",
"lastStartedJobStatus",
"lastStartedJobTime",
"name",
"updated",
"updatedBy"
],
"type": "object"
}
Responses
Status |
Meaning |
Description |
Schema |
202 |
Accepted |
Job details for the created Batch Prediction job definition |
BatchPredictionJobDefinitionsResponse |
403 |
Forbidden |
You are not authorized to create a job definition on this deployment due to your permissions role |
None |
422 |
Unprocessable Entity |
You tried to create a job definition with uncompatible or missing parameters to create a fully functioning job definition |
None |
To perform this operation, you must be authenticated by means of one of the following methods:
BearerAuth
DELETE /api/v2/batchPredictionJobDefinitions/{jobDefinitionId}/
Delete a Batch Prediction job definition
Parameters
Name |
In |
Type |
Required |
Description |
jobDefinitionId |
path |
string |
true |
ID of the Batch Prediction job definition |
Responses
Status |
Meaning |
Description |
Schema |
204 |
No Content |
none |
None |
403 |
Forbidden |
You are not authorized to delete this job definition due to your permissions role |
None |
404 |
Not Found |
Job was deleted, never existed or you do not have access to it |
None |
409 |
Conflict |
Job could not be deleted, as there are currently running jobs in the queue. |
None |
To perform this operation, you must be authenticated by means of one of the following methods:
BearerAuth
GET /api/v2/batchPredictionJobDefinitions/{jobDefinitionId}/
Retrieve a Batch Prediction job definition
Parameters
Name |
In |
Type |
Required |
Description |
jobDefinitionId |
path |
string |
true |
ID of the Batch Prediction job definition |
Example responses
200 Response
{
"properties": {
"batchPredictionJob": {
"description": "The Batch Prediction Job specification to be put on the queue in intervals",
"properties": {
"abortOnError": {
"default": true,
"description": "Should this job abort if too many errors are encountered",
"type": "boolean"
},
"batchJobType": {
"description": "Batch job type.",
"enum": [
"monitoring",
"prediction"
],
"type": "string",
"x-versionadded": "v2.30"
},
"chunkSize": {
"default": "auto",
"description": "Which strategy should be used to determine the chunk size. Can be either a named strategy or a fixed size in bytes.",
"oneOf": [
{
"enum": [
"auto",
"fixed",
"dynamic"
],
"type": "string"
},
{
"maximum": 41943040,
"minimum": 20,
"type": "integer"
}
]
},
"columnNamesRemapping": {
"description": "Remap (rename or remove columns from) the output from this job",
"oneOf": [
{
"description": "Provide a dictionary with key/value pairs to remap (deprecated)",
"type": "object"
},
{
"description": "Provide a list of items to remap",
"items": {
"properties": {
"inputName": {
"description": "Rename column with this name",
"type": "string"
},
"outputName": {
"description": "Rename column to this name (leave as null to remove from the output)",
"type": [
"string",
"null"
]
}
},
"required": [
"inputName",
"outputName"
],
"type": "object"
},
"maxItems": 1000,
"type": "array"
}
]
},
"csvSettings": {
"description": "The CSV settings used for this job",
"properties": {
"delimiter": {
"default": ",",
"description": "CSV fields are delimited by this character. Use the string \"tab\" to denote TSV (TAB separated values).",
"oneOf": [
{
"enum": [
"tab"
],
"type": "string"
},
{
"maxLength": 1,
"minLength": 1,
"type": "string"
}
]
},
"encoding": {
"default": "utf-8",
"description": "The encoding to be used for intake and output. For example (but not limited to): \"shift_jis\", \"latin_1\" or \"mskanji\".",
"type": "string"
},
"quotechar": {
"default": "\"",
"description": "Fields containing the delimiter or newlines must be quoted using this character.",
"maxLength": 1,
"minLength": 1,
"type": "string"
}
},
"required": [
"delimiter",
"encoding",
"quotechar"
],
"type": "object"
},
"deploymentId": {
"description": "ID of deployment which is used in job for processing predictions dataset",
"type": "string"
},
"disableRowLevelErrorHandling": {
"default": false,
"description": "Skip row by row error handling",
"type": "boolean"
},
"explanationAlgorithm": {
"description": "Which algorithm will be used to calculate prediction explanations",
"enum": [
"shap",
"xemp"
],
"type": "string"
},
"explanationClassNames": {
"description": "List of class names that will be explained for each row for multiclass. Mutually exclusive with explanationNumTopClasses. If neither specified - we assume explanationNumTopClasses=1",
"items": {
"description": "Class name to explain",
"type": "string"
},
"maxItems": 10,
"minItems": 1,
"type": "array",
"x-versionadded": "v2.30"
},
"explanationNumTopClasses": {
"description": "Number of top predicted classes for each row that will be explained for multiclass. Mutually exclusive with explanationClassNames. If neither specified - we assume explanationNumTopClasses=1",
"maximum": 10,
"minimum": 1,
"type": "integer",
"x-versionadded": "v2.30"
},
"includePredictionStatus": {
"default": false,
"description": "Include prediction status column in the output",
"type": "boolean"
},
"includeProbabilities": {
"default": true,
"description": "Include probabilities for all classes",
"type": "boolean"
},
"includeProbabilitiesClasses": {
"default": [],
"description": "Include only probabilities for these specific class names.",
"items": {
"description": "Include probability for this class name",
"type": "string"
},
"maxItems": 100,
"type": "array"
},
"intakeSettings": {
"default": {
"type": "localFile"
},
"description": "The response option configured for this job",
"oneOf": [
{
"description": "Stream CSV data chunks from Azure",
"properties": {
"credentialId": {
"description": "Either the populated value of the field or [redacted] due to permission settings",
"oneOf": [
{
"description": "Use the specified credential to access the url",
"type": [
"string",
"null"
]
},
{
"enum": [
"[redacted]"
],
"type": "string"
}
]
},
"format": {
"default": "csv",
"description": "Type of input file format",
"enum": [
"csv",
"parquet"
],
"type": "string",
"x-versionadded": "v2.25"
},
"type": {
"description": "Type name for this intake type",
"enum": [
"azure"
],
"type": "string"
},
"url": {
"description": "URL for the CSV file",
"format": "url",
"type": "string"
}
},
"required": [
"type",
"url"
],
"type": "object"
},
{
"description": "Stream CSV data chunks from data stage storage",
"properties": {
"dataStageId": {
"description": "The ID of the data stage",
"type": "string"
},
"type": {
"description": "Type name for this intake type",
"enum": [
"dataStage"
],
"type": "string"
}
},
"required": [
"dataStageId",
"type"
],
"type": "object"
},
{
"description": "Stream CSV data chunks from AI catalog dataset",
"properties": {
"datasetId": {
"description": "Either the populated value of the field or [redacted] due to permission settings",
"oneOf": [
{
"description": "The ID of the AI catalog dataset",
"type": "string"
},
{
"enum": [
"[redacted]"
],
"type": "string"
}
]
},
"datasetVersionId": {
"description": "The ID of the AI catalog dataset version",
"type": "string"
},
"type": {
"description": "Type name for this intake type",
"enum": [
"dataset"
],
"type": "string"
}
},
"required": [
"datasetId",
"type"
],
"type": "object"
},
{
"description": "Stream CSV data chunks from Google Storage",
"properties": {
"credentialId": {
"description": "Either the populated value of the field or [redacted] due to permission settings",
"oneOf": [
{
"description": "Use the specified credential to access the url",
"type": [
"string",
"null"
]
},
{
"enum": [
"[redacted]"
],
"type": "string"
}
]
},
"format": {
"default": "csv",
"description": "Type of input file format",
"enum": [
"csv",
"parquet"
],
"type": "string",
"x-versionadded": "v2.25"
},
"type": {
"description": "Type name for this intake type",
"enum": [
"gcp"
],
"type": "string"
},
"url": {
"description": "URL for the CSV file",
"format": "url",
"type": "string"
}
},
"required": [
"type",
"url"
],
"type": "object"
},
{
"description": "Stream CSV data chunks from Big Query using GCS",
"properties": {
"bucket": {
"description": "The name of gcs bucket for data export",
"type": "string"
},
"credentialId": {
"description": "Either the populated value of the field or [redacted] due to permission settings",
"oneOf": [
{
"description": "The ID of the GCP credentials",
"type": "string"
},
{
"enum": [
"[redacted]"
],
"type": "string"
}
]
},
"dataset": {
"description": "The name of the specified big query dataset to read input data from",
"type": "string"
},
"table": {
"description": "The name of the specified big query table to read input data from",
"type": "string"
},
"type": {
"description": "Type name for this intake type",
"enum": [
"bigquery"
],
"type": "string"
}
},
"required": [
"bucket",
"credentialId",
"dataset",
"table",
"type"
],
"type": "object"
},
{
"description": "Stream CSV data chunks from Amazon Cloud Storage S3",
"properties": {
"credentialId": {
"description": "Either the populated value of the field or [redacted] due to permission settings",
"oneOf": [
{
"description": "Use the specified credential to access the url",
"type": [
"string",
"null"
]
},
{
"enum": [
"[redacted]"
],
"type": "string"
}
]
},
"endpointUrl": {
"description": "Endpoint URL for the S3 connection (omit to use the default)",
"format": "url",
"type": "string",
"x-versionadded": "v2.29"
},
"format": {
"default": "csv",
"description": "Type of input file format",
"enum": [
"csv",
"parquet"
],
"type": "string",
"x-versionadded": "v2.25"
},
"type": {
"description": "Type name for this intake type",
"enum": [
"s3"
],
"type": "string"
},
"url": {
"description": "URL for the CSV file",
"format": "url",
"type": "string"
}
},
"required": [
"type",
"url"
],
"type": "object"
},
{
"description": "Stream CSV data chunks from Snowflake",
"properties": {
"catalog": {
"description": "The name of the specified database catalog to read input data from.",
"type": "string",
"x-versionadded": "v2.28"
},
"cloudStorageCredentialId": {
"description": "Either the populated value of the field or [redacted] due to permission settings",
"oneOf": [
{
"description": "The ID of the credential holding information about a user with read access to the cloud storage.",
"type": [
"string",
"null"
]
},
{
"enum": [
"[redacted]"
],
"type": "string"
}
]
},
"cloudStorageType": {
"default": "s3",
"description": "Type name for cloud storage",
"enum": [
"azure",
"gcp",
"s3"
],
"type": "string"
},
"credentialId": {
"description": "Either the populated value of the field or [redacted] due to permission settings",
"oneOf": [
{
"description": "The ID of the credential holding information about a user with read access to the Snowflake data source.",
"type": [
"string",
"null"
]
},
{
"enum": [
"[redacted]"
],
"type": "string"
}
]
},
"dataStoreId": {
"description": "Either the populated value of the field or [redacted] due to permission settings",
"oneOf": [
{
"description": "ID of the data store to connect to",
"type": "string"
},
{
"enum": [
"[redacted]"
],
"type": "string"
}
]
},
"externalStage": {
"description": "External storage",
"type": "string"
},
"query": {
"description": "A self-supplied SELECT statement of the dataset you wish to score. Helpful for supplying a more fine-grained selection of data not achievable through specification of \"table\" and/or \"schema\" parameters exclusively.If this job is executed with a job definition, then template variables are available which will be substituted for timestamps: {{ current_run_timestamp }}, {{ last_completed_run_time }}, {{ last_scheduled_run_time }}, {{ next_scheduled_run_time }}, {{ current_run_time }}",
"type": "string"
},
"schema": {
"description": "The name of the specified database schema to read input data from.",
"type": "string"
},
"table": {
"description": "The name of the specified database table to read input data from.",
"type": "string"
},
"type": {
"description": "Type name for this intake type",
"enum": [
"snowflake"
],
"type": "string"
}
},
"required": [
"dataStoreId",
"externalStage",
"type"
],
"type": "object"
},
{
"description": "Stream CSV data chunks from Azure Synapse",
"properties": {
"cloudStorageCredentialId": {
"description": "Either the populated value of the field or [redacted] due to permission settings",
"oneOf": [
{
"description": "The ID of the Azure credential holding information about a user with read access to the cloud storage.",
"type": [
"string",
"null"
]
},
{
"enum": [
"[redacted]"
],
"type": "string"
}
]
},
"credentialId": {
"description": "Either the populated value of the field or [redacted] due to permission settings",
"oneOf": [
{
"description": "The ID of the credential holding information about a user with read access to the JDBC data source.",
"type": [
"string",
"null"
]
},
{
"enum": [
"[redacted]"
],
"type": "string"
}
]
},
"dataStoreId": {
"description": "Either the populated value of the field or [redacted] due to permission settings",
"oneOf": [
{
"description": "ID of the data store to connect to",
"type": "string"
},
{
"enum": [
"[redacted]"
],
"type": "string"
}
]
},
"externalDataSource": {
"description": "External datasource name",
"type": "string"
},
"query": {
"description": "A self-supplied SELECT statement of the dataset you wish to score. Helpful for supplying a more fine-grained selection of data not achievable through specification of \"table\" and/or \"schema\" parameters exclusively.If this job is executed with a job definition, then template variables are available which will be substituted for timestamps: {{ current_run_timestamp }}, {{ last_completed_run_time }}, {{ last_scheduled_run_time }}, {{ next_scheduled_run_time }}, {{ current_run_time }}",
"type": "string"
},
"schema": {
"description": "The name of the specified database schema to read input data from.",
"type": "string"
},
"table": {
"description": "The name of the specified database table to read input data from.",
"type": "string"
},
"type": {
"description": "Type name for this intake type",
"enum": [
"synapse"
],
"type": "string"
}
},
"required": [
"dataStoreId",
"externalDataSource",
"type"
],
"type": "object"
},
{
"description": "Stream CSV data chunks from DSS dataset",
"properties": {
"datasetId": {
"description": "Either the populated value of the field or [redacted] due to permission settings",
"oneOf": [
{
"description": "The ID of the dataset",
"type": "string"
},
{
"enum": [
"[redacted]"
],
"type": "string"
}
]
},
"partition": {
"default": null,
"description": "Partition used to predict",
"enum": [
"holdout",
"validation",
"allBacktests",
null
],
"type": [
"string",
"null"
]
},
"projectId": {
"description": "The ID of the project",
"type": "string"
},
"type": {
"description": "Type name for this intake type",
"enum": [
"dss"
],
"type": "string"
}
},
"required": [
"projectId",
"type"
],
"type": "object"
},
{
"properties": {
"path": {
"description": "Path to data on host filesystem",
"type": "string"
},
"type": {
"description": "Type name for this intake type",
"enum": [
"filesystem"
],
"type": "string"
}
},
"required": [
"path",
"type"
],
"type": "object"
},
{
"description": "Stream CSV data chunks from HTTP",
"properties": {
"type": {
"description": "Type name for this intake type",
"enum": [
"http"
],
"type": "string"
},
"url": {
"description": "URL for the CSV file",
"format": "url",
"type": "string"
}
},
"required": [
"type",
"url"
],
"type": "object"
},
{
"description": "Stream CSV data chunks from JDBC",
"properties": {
"catalog": {
"description": "The name of the specified database catalog to read input data from.",
"type": "string",
"x-versionadded": "v2.22"
},
"credentialId": {
"description": "Either the populated value of the field or [redacted] due to permission settings",
"oneOf": [
{
"description": "The ID of the credential holding information about a user with read access to the JDBC data source.",
"type": [
"string",
"null"
]
},
{
"enum": [
"[redacted]"
],
"type": "string"
}
]
},
"dataStoreId": {
"description": "Either the populated value of the field or [redacted] due to permission settings",
"oneOf": [
{
"description": "ID of the data store to connect to",
"type": "string"
},
{
"enum": [
"[redacted]"
],
"type": "string"
}
]
},
"fetchSize": {
"description": "A user specified fetch size. Changing it can be used to balance throughput and memory usage. Deprecated and ignored since v2.21.",
"maximum": 1000000,
"minimum": 1,
"type": "integer"
},
"query": {
"description": "A self-supplied SELECT statement of the dataset you wish to score. Helpful for supplying a more fine-grained selection of data not achievable through specification of \"table\" and/or \"schema\" parameters exclusively.If this job is executed with a job definition, then template variables are available which will be substituted for timestamps: {{ current_run_timestamp }}, {{ last_completed_run_time }}, {{ last_scheduled_run_time }}, {{ next_scheduled_run_time }}, {{ current_run_time }}",
"type": "string"
},
"schema": {
"description": "The name of the specified database schema to read input data from.",
"type": "string"
},
"table": {
"description": "The name of the specified database table to read input data from.",
"type": "string"
},
"type": {
"description": "Type name for this intake type",
"enum": [
"jdbc"
],
"type": "string"
}
},
"required": [
"dataStoreId",
"type"
],
"type": "object"
},
{
"description": "Stream CSV data chunks from local file storage",
"properties": {
"async": {
"description": "The default behavior (async: true) will still submit the job to the queue and start processing as soon as the upload is started.Setting it to false will postpone submitting the job to the queue until all data has been uploaded.This is helpful if the user is on a bad connection and bottlednecked by the upload speed. Instead of blocking the queue this will allow others to submit to the queue until the upload has finished.",
"type": [
"boolean",
"null"
],
"x-versionadded": "v2.28"
},
"multipart": {
"description": "specify if the data will be uploaded in multiple parts instead of a single file",
"type": "boolean",
"x-versionadded": "v2.27"
},
"type": {
"description": "Type name for this intake type",
"enum": [
"local_file",
"localFile"
],
"type": "string"
}
},
"required": [
"type"
],
"type": "object"
},
{
"description": "Stream CSV data chunks from Datasphere using browser-datasphere",
"properties": {
"catalog": {
"description": "The name of the specified database catalog to read input data from.",
"type": "string"
},
"credentialId": {
"description": "Either the populated value of the field or [redacted] due to permission settings",
"oneOf": [
{
"description": "The ID of the credential holding information about a user with read access to the data source.",
"type": "string"
},
{
"enum": [
"[redacted]"
],
"type": "string"
}
]
},
"dataStoreId": {
"description": "Either the populated value of the field or [redacted] due to permission settings",
"oneOf": [
{
"description": "ID of the data store to connect to",
"type": "string"
},
{
"enum": [
"[redacted]"
],
"type": "string"
}
]
},
"schema": {
"description": "The name of the specified database schema to read input data from.",
"type": "string"
},
"table": {
"description": "The name of the specified database table to read input data from.",
"type": "string"
},
"type": {
"description": "Type name for this intake type",
"enum": [
"datasphere"
],
"type": "string"
}
},
"required": [
"credentialId",
"dataStoreId",
"schema",
"table",
"type"
],
"type": "object",
"x-versionadded": "v2.35"
}
]
},
"maxExplanations": {
"default": 0,
"description": "Number of explanations requested. Will be ordered by strength.",
"maximum": 100,
"minimum": 0,
"type": "integer"
},
"maxNgramExplanations": {
"description": "The maximum number of text ngram explanations to supply per row of the dataset. The default recommended `maxNgramExplanations` is `all` (no limit)",
"oneOf": [
{
"minimum": 0,
"type": "integer"
},
{
"enum": [
"all"
],
"type": "string"
}
],
"x-versionadded": "v2.30"
},
"modelId": {
"description": "ID of leaderboard model which is used in job for processing predictions dataset",
"type": "string",
"x-versionadded": "v2.30"
},
"modelPackageId": {
"description": "ID of model package from registry is used in job for processing predictions dataset",
"type": "string",
"x-versionadded": "v2.30"
},
"monitoringAggregation": {
"description": "Defines the aggregation policy for monitoring jobs.",
"properties": {
"retentionPolicy": {
"default": "percentage",
"description": "Monitoring jobs retention policy for aggregation.",
"enum": [
"samples",
"percentage"
],
"type": "string"
},
"retentionValue": {
"default": 0,
"description": "Amount/percentage of samples to retain.",
"type": "integer"
}
},
"type": "object"
},
"monitoringBatchPrefix": {
"description": "Name of the batch to create with this job",
"type": [
"string",
"null"
]
},
"monitoringColumns": {
"description": "Column names mapping for monitoring",
"properties": {
"actedUponColumn": {
"description": "Name of column that contains value for acted_on.",
"type": "string"
},
"actualsTimestampColumn": {
"description": "Name of column that contains actual timestamps.",
"type": "string"
},
"actualsValueColumn": {
"description": "Name of column that contains actuals value.",
"type": "string"
},
"associationIdColumn": {
"description": "Name of column that contains association Id.",
"type": "string"
},
"customMetricId": {
"description": "Id of custom metric to process values for.",
"type": "string"
},
"customMetricTimestampColumn": {
"description": "Name of column that contains custom metric values timestamps.",
"type": "string"
},
"customMetricTimestampFormat": {
"description": "Format of timestamps from customMetricTimestampColumn.",
"type": "string"
},
"customMetricValueColumn": {
"description": "Name of column that contains values for custom metric.",
"type": "string"
},
"monitoredStatusColumn": {
"description": "Column name used to mark monitored rows.",
"type": "string"
},
"predictionsColumns": {
"description": "Name of the column(s) which contain prediction values.",
"oneOf": [
{
"description": "Map containing column name(s) and class name(s) for multiclass problem.",
"items": {
"properties": {
"className": {
"description": "Class name.",
"type": "string"
},
"columnName": {
"description": "Column name that contains the prediction for a specific class.",
"type": "string"
}
},
"required": [
"className",
"columnName"
],
"type": "object"
},
"maxItems": 100,
"type": "array"
},
{
"description": "Column name that contains the prediction for regressions problem.",
"type": "string"
}
]
},
"reportDrift": {
"description": "True to report drift, False otherwise.",
"type": "boolean"
},
"reportPredictions": {
"description": "True to report prediction, False otherwise.",
"type": "boolean"
},
"uniqueRowIdentifierColumns": {
"description": "Column(s) name of unique row identifiers.",
"items": {
"type": "string"
},
"maxItems": 100,
"type": "array"
}
},
"type": "object"
},
"monitoringOutputSettings": {
"description": "Output settings for monitoring jobs",
"properties": {
"monitoredStatusColumn": {
"description": "Column name used to mark monitored rows.",
"type": "string"
},
"uniqueRowIdentifierColumns": {
"description": "Column(s) name of unique row identifiers.",
"items": {
"type": "string"
},
"maxItems": 100,
"type": "array"
}
},
"required": [
"monitoredStatusColumn",
"uniqueRowIdentifierColumns"
],
"type": "object"
},
"numConcurrent": {
"description": "Number of simultaneous requests to run against the prediction instance",
"minimum": 0,
"type": "integer"
},
"outputSettings": {
"description": "The response option configured for this job",
"oneOf": [
{
"description": "Save CSV data chunks to Azure Blob Storage",
"properties": {
"credentialId": {
"description": "Either the populated value of the field or [redacted] due to permission settings",
"oneOf": [
{
"description": "Use the specified credential to access the url",
"type": [
"string",
"null"
]
},
{
"enum": [
"[redacted]"
],
"type": "string"
}
]
},
"format": {
"default": "csv",
"description": "Type of output file format",
"enum": [
"csv",
"parquet"
],
"type": "string",
"x-versionadded": "v2.25"
},
"partitionColumns": {
"description": "For Parquet directory-scoring only. The column names of the intake data of which to partition the dataset. Columns are partitioned in the order they are given. At least one value is required if scoring to a directory (meaning the output url ends with a slash (\"/\").",
"items": {
"type": "string"
},
"maxItems": 100,
"type": "array",
"x-versionadded": "v2.26"
},
"type": {
"description": "Type name for this output type",
"enum": [
"azure"
],
"type": "string"
},
"url": {
"description": "URL for the file or directory",
"format": "url",
"type": "string"
}
},
"required": [
"type",
"url"
],
"type": "object"
},
{
"description": "Save CSV data chunks to Google Storage",
"properties": {
"credentialId": {
"description": "Either the populated value of the field or [redacted] due to permission settings",
"oneOf": [
{
"description": "Use the specified credential to access the url",
"type": [
"string",
"null"
]
},
{
"enum": [
"[redacted]"
],
"type": "string"
}
]
},
"format": {
"default": "csv",
"description": "Type of input file format",
"enum": [
"csv",
"parquet"
],
"type": "string",
"x-versionadded": "v2.25"
},
"partitionColumns": {
"description": "For Parquet directory-scoring only. The column names of the intake data of which to partition the dataset. Columns are partitioned in the order they are given. At least one value is required if scoring to a directory (meaning the output url ends with a slash (\"/\").",
"items": {
"type": "string"
},
"maxItems": 100,
"type": "array",
"x-versionadded": "v2.26"
},
"type": {
"description": "Type name for this output type",
"enum": [
"gcp"
],
"type": "string"
},
"url": {
"description": "URL for the CSV file",
"format": "url",
"type": "string"
}
},
"required": [
"type",
"url"
],
"type": "object"
},
{
"description": "Save CSV data chunks to Google BigQuery in bulk",
"properties": {
"bucket": {
"description": "The name of gcs bucket for data loading",
"type": "string"
},
"credentialId": {
"description": "Either the populated value of the field or [redacted] due to permission settings",
"oneOf": [
{
"description": "The ID of the GCP credentials",
"type": "string"
},
{
"enum": [
"[redacted]"
],
"type": "string"
}
]
},
"dataset": {
"description": "The name of the specified big query dataset to write data back",
"type": "string"
},
"table": {
"description": "The name of the specified big query table to write data back",
"type": "string"
},
"type": {
"description": "Type name for this output type",
"enum": [
"bigquery"
],
"type": "string"
}
},
"required": [
"bucket",
"credentialId",
"dataset",
"table",
"type"
],
"type": "object"
},
{
"description": "Saves CSV data chunks to Amazon Cloud Storage S3",
"properties": {
"credentialId": {
"description": "Either the populated value of the field or [redacted] due to permission settings",
"oneOf": [
{
"description": "Use the specified credential to access the url",
"type": [
"string",
"null"
]
},
{
"enum": [
"[redacted]"
],
"type": "string"
}
]
},
"endpointUrl": {
"description": "Endpoint URL for the S3 connection (omit to use the default)",
"format": "url",
"type": "string",
"x-versionadded": "v2.29"
},
"format": {
"default": "csv",
"description": "Type of output file format",
"enum": [
"csv",
"parquet"
],
"type": "string",
"x-versionadded": "v2.25"
},
"partitionColumns": {
"description": "For Parquet directory-scoring only. The column names of the intake data of which to partition the dataset. Columns are partitioned in the order they are given. At least one value is required if scoring to a directory (meaning the output url ends with a slash (\"/\").",
"items": {
"type": "string"
},
"maxItems": 100,
"type": "array",
"x-versionadded": "v2.26"
},
"serverSideEncryption": {
"description": "Configure Server-Side Encryption for S3 output",
"properties": {
"algorithm": {
"description": "The server-side encryption algorithm used when storing this object in Amazon S3 (for example, AES256, aws:kms).",
"type": "string"
},
"customerAlgorithm": {
"description": "Specifies the algorithm to use to when encrypting the object (for example, AES256).",
"type": "string"
},
"customerKey": {
"description": "Specifies the customer-provided encryption key for Amazon S3 to use in encrypting data. This value is used to store the object and then it is discarded; Amazon S3 does not store the encryption key. The key must be appropriate for use with the algorithm specified in customerAlgorithm. The key must be sent as an base64 encoded string.",
"type": "string"
},
"kmsEncryptionContext": {
"description": "Specifies the Amazon Web Services KMS Encryption Context to use for object encryption. The value of this header is a base64-encoded UTF-8 string holding JSON with the encryption context key-value pairs.",
"type": "string"
},
"kmsKeyId": {
"description": "Specifies the ID of the symmetric customer managed key to use for object encryption.",
"type": "string"
}
},
"type": "object"
},
"type": {
"description": "Type name for this output type",
"enum": [
"s3"
],
"type": "string"
},
"url": {
"description": "URL for the CSV file",
"format": "url",
"type": "string"
}
},
"required": [
"type",
"url"
],
"type": "object"
},
{
"description": "Save CSV data chunks to Snowflake in bulk",
"properties": {
"catalog": {
"description": "The name of the specified database catalog to write output data to.",
"type": "string",
"x-versionadded": "v2.28"
},
"cloudStorageCredentialId": {
"description": "Either the populated value of the field or [redacted] due to permission settings",
"oneOf": [
{
"description": "The ID of the credential holding information about a user with write access to the cloud storage.",
"type": [
"string",
"null"
]
},
{
"enum": [
"[redacted]"
],
"type": "string"
}
]
},
"cloudStorageType": {
"default": "s3",
"description": "Type name for cloud storage",
"enum": [
"azure",
"gcp",
"s3"
],
"type": "string"
},
"createTableIfNotExists": {
"default": false,
"description": "Attempt to create the table first if no existing one is detected, before writing data with the strategy defined in the `statementType` parameter.",
"type": "boolean",
"x-versionadded": "v2.25"
},
"credentialId": {
"description": "Either the populated value of the field or [redacted] due to permission settings",
"oneOf": [
{
"description": "The ID of the credential holding information about a user with write access to the Snowflake data source.",
"type": [
"string",
"null"
]
},
{
"enum": [
"[redacted]"
],
"type": "string"
}
]
},
"dataStoreId": {
"description": "Either the populated value of the field or [redacted] due to permission settings",
"oneOf": [
{
"description": "ID of the data store to connect to",
"type": "string"
},
{
"enum": [
"[redacted]"
],
"type": "string"
}
]
},
"externalStage": {
"description": "External storage",
"type": "string"
},
"schema": {
"description": "The name of the specified database schema to write results to.",
"type": "string"
},
"statementType": {
"description": "The statement type to use when writing the results.",
"enum": [
"insert",
"create_table",
"createTable"
],
"type": "string"
},
"table": {
"description": "The name of the specified database table to write results to.",
"type": "string"
},
"type": {
"description": "Type name for this output type",
"enum": [
"snowflake"
],
"type": "string"
}
},
"required": [
"dataStoreId",
"externalStage",
"statementType",
"table",
"type"
],
"type": "object"
},
{
"description": "Save CSV data chunks to Azure Synapse in bulk",
"properties": {
"cloudStorageCredentialId": {
"description": "Either the populated value of the field or [redacted] due to permission settings",
"oneOf": [
{
"description": "The ID of the credential holding information about a user with write access to the cloud storage.",
"type": [
"string",
"null"
]
},
{
"enum": [
"[redacted]"
],
"type": "string"
}
]
},
"createTableIfNotExists": {
"default": false,
"description": "Attempt to create the table first if no existing one is detected, before writing data with the strategy defined in the `statementType` parameter.",
"type": "boolean",
"x-versionadded": "v2.25"
},
"credentialId": {
"description": "Either the populated value of the field or [redacted] due to permission settings",
"oneOf": [
{
"description": "The ID of the credential holding information about a user with write access to the JDBC data source.",
"type": [
"string",
"null"
]
},
{
"enum": [
"[redacted]"
],
"type": "string"
}
]
},
"dataStoreId": {
"description": "Either the populated value of the field or [redacted] due to permission settings",
"oneOf": [
{
"description": "ID of the data store to connect to",
"type": "string"
},
{
"enum": [
"[redacted]"
],
"type": "string"
}
]
},
"externalDataSource": {
"description": "External data source name",
"type": "string"
},
"schema": {
"description": "The name of the specified database schema to write results to.",
"type": "string"
},
"statementType": {
"description": "The statement type to use when writing the results.",
"enum": [
"insert",
"create_table",
"createTable"
],
"type": "string"
},
"table": {
"description": "The name of the specified database table to write results to.",
"type": "string"
},
"type": {
"description": "Type name for this output type",
"enum": [
"synapse"
],
"type": "string"
}
},
"required": [
"dataStoreId",
"externalDataSource",
"statementType",
"table",
"type"
],
"type": "object"
},
{
"properties": {
"path": {
"description": "Path to results on host filesystem",
"type": "string"
},
"type": {
"description": "Type name for this output type",
"enum": [
"filesystem"
],
"type": "string"
}
},
"required": [
"path",
"type"
],
"type": "object"
},
{
"description": "Save CSV data chunks to HTTP data endpoint",
"properties": {
"headers": {
"description": "Extra headers to send with the request",
"type": "object"
},
"method": {
"description": "Method to use when saving the CSV file",
"enum": [
"POST",
"PUT"
],
"type": "string"
},
"type": {
"description": "Type name for this output type",
"enum": [
"http"
],
"type": "string"
},
"url": {
"description": "URL for the CSV file",
"format": "url",
"type": "string"
}
},
"required": [
"method",
"type",
"url"
],
"type": "object"
},
{
"description": "Save CSV data chunks via JDBC",
"properties": {
"catalog": {
"description": "The name of the specified database catalog to write output data to.",
"type": "string",
"x-versionadded": "v2.22"
},
"commitInterval": {
"default": 600,
"description": "Defines a time interval in seconds between each commit is done to the JDBC source. If set to 0, the batch prediction operation will write the entire job before committing.",
"maximum": 86400,
"minimum": 0,
"type": "integer",
"x-versionadded": "v2.21"
},
"createTableIfNotExists": {
"default": false,
"description": "Attempt to create the table first if no existing one is detected, before writing data with the strategy defined in the `statementType` parameter.",
"type": "boolean",
"x-versionadded": "v2.24"
},
"credentialId": {
"description": "Either the populated value of the field or [redacted] due to permission settings",
"oneOf": [
{
"description": "The ID of the credential holding information about a user with write access to the JDBC data source.",
"type": [
"string",
"null"
]
},
{
"enum": [
"[redacted]"
],
"type": "string"
}
]
},
"dataStoreId": {
"description": "Either the populated value of the field or [redacted] due to permission settings",
"oneOf": [
{
"description": "ID of the data store to connect to",
"type": "string"
},
{
"enum": [
"[redacted]"
],
"type": "string"
}
]
},
"schema": {
"description": "The name of the specified database schema to write the results to.",
"type": "string"
},
"statementType": {
"description": "The statement type to use when writing the results. Deprecation Warning: Use of `create_table` is now discouraged. Use one of the other possibilities along with the parameter `createTableIfNotExists` set to `true`.",
"enum": [
"createTable",
"create_table",
"insert",
"insertUpdate",
"insert_update",
"update"
],
"type": "string"
},
"table": {
"description": "The name of the specified database table to write the results to.If this job is executed with a job definition, then template variables are available which will be substituted for timestamps: {{ current_run_timestamp }}, {{ last_completed_run_time }}, {{ last_scheduled_run_time }}, {{ next_scheduled_run_time }}, {{ current_run_time }}",
"type": "string"
},
"type": {
"description": "Type name for this intake type",
"enum": [
"jdbc"
],
"type": "string"
},
"updateColumns": {
"description": "The column names to be updated if statementType is set to either update or upsert.",
"items": {
"type": "string"
},
"maxItems": 100,
"type": "array"
},
"whereColumns": {
"description": "The column names to be used in the where clause if statementType is set to update or upsert.",
"items": {
"type": "string"
},
"maxItems": 100,
"type": "array"
}
},
"required": [
"dataStoreId",
"statementType",
"table",
"type"
],
"type": "object"
},
{
"description": "Save CSV data chunks to local file storage",
"properties": {
"type": {
"description": "Type name for this output type",
"enum": [
"local_file",
"localFile"
],
"type": "string"
}
},
"required": [
"type"
],
"type": "object"
},
{
"description": "Saves CSV data chunks to Datasphere using browser-datasphere",
"properties": {
"catalog": {
"description": "The name of the specified database catalog to read input data from.",
"type": "string"
},
"credentialId": {
"description": "Either the populated value of the field or [redacted] due to permission settings",
"oneOf": [
{
"description": "The ID of the credential holding information about a user with write access to the data destination.",
"type": "string"
},
{
"enum": [
"[redacted]"
],
"type": "string"
}
]
},
"dataStoreId": {
"description": "Either the populated value of the field or [redacted] due to permission settings",
"oneOf": [
{
"description": "The ID of the data store to connect to",
"type": "string"
},
{
"enum": [
"[redacted]"
],
"type": "string"
}
]
},
"schema": {
"description": "The name of the specified database schema to read input data from.",
"type": "string"
},
"table": {
"description": "The name of the specified database table to read input data from.",
"type": "string"
},
"type": {
"description": "The type name for this output type",
"enum": [
"datasphere"
],
"type": "string"
}
},
"required": [
"credentialId",
"dataStoreId",
"schema",
"table",
"type"
],
"type": "object",
"x-versionadded": "v2.35"
}
]
},
"passthroughColumns": {
"description": "Pass through columns from the original dataset",
"items": {
"description": "A column name from the original dataset to pass through to the resulting predictions",
"maxLength": 50,
"minLength": 1,
"type": "string"
},
"maxItems": 100,
"type": "array"
},
"passthroughColumnsSet": {
"description": "Pass through all columns from the original dataset",
"enum": [
"all"
],
"type": "string"
},
"pinnedModelId": {
"description": "Specify a model ID used for scoring",
"type": "string"
},
"predictionInstance": {
"description": "Override the default prediction instance from the deployment when scoring this job.",
"properties": {
"apiKey": {
"description": "By default, prediction requests will use the API key of the user that created the job. This allows you to make requests on behalf of other users.",
"type": "string"
},
"datarobotKey": {
"description": "If running a job against a prediction instance in the Managed AI Cloud, you must provide the organization level DataRobot-Key.",
"type": "string"
},
"hostName": {
"description": "Override the default host name of the deployment with this.",
"type": "string"
},
"sslEnabled": {
"default": true,
"description": "Use SSL (HTTPS) when communicating with the overriden prediction server.",
"type": "boolean"
}
},
"required": [
"hostName",
"sslEnabled"
],
"type": "object"
},
"predictionWarningEnabled": {
"description": "Enable prediction warnings.",
"type": [
"boolean",
"null"
]
},
"redactedFields": {
"description": "A list of qualified field names from intake- and/or outputSettings that was redacted due to permissions and sharing settings. For example: intakeSettings.dataStoreId",
"items": {
"description": "Field names that are potentially redacted",
"type": "string"
},
"type": "array",
"x-versionadded": "v2.30"
},
"skipDriftTracking": {
"default": false,
"description": "Skip drift tracking for this job.",
"type": "boolean"
},
"thresholdHigh": {
"description": "Compute explanations for predictions above this threshold",
"type": "number"
},
"thresholdLow": {
"description": "Compute explanations for predictions below this threshold",
"type": "number"
},
"timeseriesSettings": {
"description": "Time Series settings included of this job is a Time Series job.",
"oneOf": [
{
"properties": {
"forecastPoint": {
"description": "Used for forecast predictions in order to override the inferred forecast point from the dataset.",
"format": "date-time",
"type": "string"
},
"relaxKnownInAdvanceFeaturesCheck": {
"default": false,
"description": "If activated, missing values in the known in advance features are allowed in the forecast window at prediction time. If omitted or false, missing values are not allowed.",
"type": "boolean"
},
"type": {
"description": "Forecast mode makes predictions using forecastPoint or rows in the dataset without target.",
"enum": [
"forecast"
],
"type": "string"
}
},
"required": [
"type"
],
"type": "object"
},
{
"properties": {
"forecastPointPolicy": {
"description": "Forecast point policy",
"properties": {
"configuration": {
"description": "Customize if forecast point based on job run time needs to be shifted.",
"properties": {
"offset": {
"description": "Offset to apply to scheduled run time of the job in a ISO-8601 format toobtain a relative forecast point. Example of the positive offset 'P2DT5H3M', example of the negative offset '-P2DT5H4M'",
"format": "offset",
"type": "string"
}
},
"required": [
"offset"
],
"type": "object"
},
"type": {
"description": "Type of the forecast point policy. Forecast point will be based on the scheduled run time of the job or the current moment in UTC if job was launched manually. Run time can be adjusted backwards or forwards.",
"enum": [
"jobRunTimeBased"
],
"type": "string"
}
},
"required": [
"type"
],
"type": "object"
},
"relaxKnownInAdvanceFeaturesCheck": {
"default": false,
"description": "If activated, missing values in the known in advance features are allowed in the forecast window at prediction time. If omitted or false, missing values are not allowed.",
"type": "boolean"
},
"type": {
"description": "Forecast mode makes predictions using forecastPoint or rows in the dataset without target.",
"enum": [
"forecast"
],
"type": "string"
}
},
"required": [
"forecastPointPolicy",
"type"
],
"type": "object"
},
{
"properties": {
"predictionsEndDate": {
"description": "Used for historical predictions in order to override date to which predictions should be calculated. By default value will be inferred automatically from the dataset.",
"format": "date-time",
"type": "string"
},
"predictionsStartDate": {
"description": "Used for historical predictions in order to override date from which predictions should be calculated. By default value will be inferred automatically from the dataset.",
"format": "date-time",
"type": "string"
},
"relaxKnownInAdvanceFeaturesCheck": {
"default": false,
"description": "If activated, missing values in the known in advance features are allowed in the forecast window at prediction time. If omitted or false, missing values are not allowed.",
"type": "boolean"
},
"type": {
"description": "Historical mode enables bulk predictions which calculates predictions for all possible forecast points and forecast distances in the dataset within the predictionsStartDate/predictionsEndDate range.",
"enum": [
"historical"
],
"type": "string"
}
},
"required": [
"type"
],
"type": "object"
}
],
"x-versionadded": "v2.20"
}
},
"required": [
"abortOnError",
"csvSettings",
"disableRowLevelErrorHandling",
"includePredictionStatus",
"includeProbabilities",
"includeProbabilitiesClasses",
"intakeSettings",
"maxExplanations",
"numConcurrent",
"redactedFields",
"skipDriftTracking"
],
"type": "object"
},
"created": {
"description": "When was this job created",
"format": "date-time",
"type": "string"
},
"createdBy": {
"description": "Who created this job",
"properties": {
"fullName": {
"description": "The full name of the user who created this job (if defined by the user)",
"type": [
"string",
"null"
]
},
"userId": {
"description": "The User ID of the user who created this job",
"type": "string"
},
"username": {
"description": "The username (e-mail address) of the user who created this job",
"type": "string"
}
},
"required": [
"fullName",
"userId",
"username"
],
"type": "object"
},
"enabled": {
"default": false,
"description": "If this job definition is enabled as a scheduled job.",
"type": "boolean"
},
"id": {
"description": "The ID of the Batch job definition",
"type": "string"
},
"lastFailedRunTime": {
"description": "Last time this job had a failed run",
"format": "date-time",
"type": [
"string",
"null"
]
},
"lastScheduledRunTime": {
"description": "Last time this job was scheduled to run (though not guaranteed it actually ran at that time)",
"format": "date-time",
"type": [
"string",
"null"
]
},
"lastStartedJobStatus": {
"description": "The status of the latest job launched to the queue (if any).",
"enum": [
"INITIALIZING",
"RUNNING",
"COMPLETED",
"ABORTED",
"FAILED"
],
"type": [
"string",
"null"
]
},
"lastStartedJobTime": {
"description": "The last time (if any) a job was launched.",
"format": "date-time",
"type": [
"string",
"null"
]
},
"lastSuccessfulRunTime": {
"description": "Last time this job had a successful run",
"format": "date-time",
"type": [
"string",
"null"
]
},
"name": {
"description": "A human-readable name for the definition, must be unique across organisations",
"type": "string"
},
"nextScheduledRunTime": {
"description": "Next time this job is scheduled to run",
"format": "date-time",
"type": [
"string",
"null"
]
},
"schedule": {
"description": "The scheduling information defining how often and when to execute this job to the Job Scheduling service. Optional if enabled = False.",
"properties": {
"dayOfMonth": {
"description": "The date(s) of the month that the job will run. Allowed values are either ``[1 ... 31]`` or ``[\"*\"]`` for all days of the month. This field is additive with ``dayOfWeek``, meaning the job will run both on the date(s) defined in this field and the day specified by ``dayOfWeek`` (for example, dates 1st, 2nd, 3rd, plus every Tuesday). If ``dayOfMonth`` is set to ``[\"*\"]`` and ``dayOfWeek`` is defined, the scheduler will trigger on every day of the month that matches ``dayOfWeek`` (for example, Tuesday the 2nd, 9th, 16th, 23rd, 30th). Invalid dates such as February 31st are ignored.",
"items": {
"enum": [
"*",
1,
2,
3,
4,
5,
6,
7,
8,
9,
10,
11,
12,
13,
14,
15,
16,
17,
18,
19,
20,
21,
22,
23,
24,
25,
26,
27,
28,
29,
30,
31
],
"type": [
"number",
"string"
]
},
"maxItems": 31,
"type": "array"
},
"dayOfWeek": {
"description": "The day(s) of the week that the job will run. Allowed values are ``[0 .. 6]``, where (Sunday=0), or ``[\"*\"]``, for all days of the week. Strings, either 3-letter abbreviations or the full name of the day, can be used interchangeably (e.g., \"sunday\", \"Sunday\", \"sun\", or \"Sun\", all map to ``[0]``. This field is additive with ``dayOfMonth``, meaning the job will run both on the date specified by ``dayOfMonth`` and the day defined in this field.",
"items": {
"enum": [
"*",
0,
1,
2,
3,
4,
5,
6,
"sunday",
"SUNDAY",
"Sunday",
"monday",
"MONDAY",
"Monday",
"tuesday",
"TUESDAY",
"Tuesday",
"wednesday",
"WEDNESDAY",
"Wednesday",
"thursday",
"THURSDAY",
"Thursday",
"friday",
"FRIDAY",
"Friday",
"saturday",
"SATURDAY",
"Saturday",
"sun",
"SUN",
"Sun",
"mon",
"MON",
"Mon",
"tue",
"TUE",
"Tue",
"wed",
"WED",
"Wed",
"thu",
"THU",
"Thu",
"fri",
"FRI",
"Fri",
"sat",
"SAT",
"Sat"
],
"type": [
"number",
"string"
]
},
"maxItems": 7,
"type": "array"
},
"hour": {
"description": "The hour(s) of the day that the job will run. Allowed values are either ``[\"*\"]`` meaning every hour of the day or ``[0 ... 23]``.",
"items": {
"enum": [
"*",
0,
1,
2,
3,
4,
5,
6,
7,
8,
9,
10,
11,
12,
13,
14,
15,
16,
17,
18,
19,
20,
21,
22,
23
],
"type": [
"number",
"string"
]
},
"maxItems": 24,
"type": "array"
},
"minute": {
"description": "The minute(s) of the day that the job will run. Allowed values are either ``[\"*\"]`` meaning every minute of the day or``[0 ... 59]``.",
"items": {
"enum": [
"*",
0,
1,
2,
3,
4,
5,
6,
7,
8,
9,
10,
11,
12,
13,
14,
15,
16,
17,
18,
19,
20,
21,
22,
23,
24,
25,
26,
27,
28,
29,
30,
31,
32,
33,
34,
35,
36,
37,
38,
39,
40,
41,
42,
43,
44,
45,
46,
47,
48,
49,
50,
51,
52,
53,
54,
55,
56,
57,
58,
59
],
"type": [
"number",
"string"
]
},
"maxItems": 60,
"type": "array"
},
"month": {
"description": "The month(s) of the year that the job will run. Allowed values are either ``[1 ... 12]`` or ``[\"*\"]`` for all months of the year. Strings, either 3-letter abbreviations or the full name of the month, can be used interchangeably (e.g., \"jan\" or \"october\"). Months that are not compatible with ``dayOfMonth`` are ignored, for example ``{\"dayOfMonth\": [31], \"month\":[\"feb\"]}``.",
"items": {
"enum": [
"*",
1,
2,
3,
4,
5,
6,
7,
8,
9,
10,
11,
12,
"january",
"JANUARY",
"January",
"february",
"FEBRUARY",
"February",
"march",
"MARCH",
"March",
"april",
"APRIL",
"April",
"may",
"MAY",
"May",
"june",
"JUNE",
"June",
"july",
"JULY",
"July",
"august",
"AUGUST",
"August",
"september",
"SEPTEMBER",
"September",
"october",
"OCTOBER",
"October",
"november",
"NOVEMBER",
"November",
"december",
"DECEMBER",
"December",
"jan",
"JAN",
"Jan",
"feb",
"FEB",
"Feb",
"mar",
"MAR",
"Mar",
"apr",
"APR",
"Apr",
"jun",
"JUN",
"Jun",
"jul",
"JUL",
"Jul",
"aug",
"AUG",
"Aug",
"sep",
"SEP",
"Sep",
"oct",
"OCT",
"Oct",
"nov",
"NOV",
"Nov",
"dec",
"DEC",
"Dec"
],
"type": [
"number",
"string"
]
},
"maxItems": 12,
"type": "array"
}
},
"required": [
"dayOfMonth",
"dayOfWeek",
"hour",
"minute",
"month"
],
"type": "object"
},
"updated": {
"description": "When was this job last updated",
"format": "date-time",
"type": "string"
},
"updatedBy": {
"description": "Who created this job",
"properties": {
"fullName": {
"description": "The full name of the user who created this job (if defined by the user)",
"type": [
"string",
"null"
]
},
"userId": {
"description": "The User ID of the user who created this job",
"type": "string"
},
"username": {
"description": "The username (e-mail address) of the user who created this job",
"type": "string"
}
},
"required": [
"fullName",
"userId",
"username"
],
"type": "object"
}
},
"required": [
"batchPredictionJob",
"created",
"createdBy",
"enabled",
"id",
"lastStartedJobStatus",
"lastStartedJobTime",
"name",
"updated",
"updatedBy"
],
"type": "object"
}
Responses
To perform this operation, you must be authenticated by means of one of the following methods:
BearerAuth
PATCH /api/v2/batchPredictionJobDefinitions/{jobDefinitionId}/
Update a Batch Prediction job definition
Body parameter
{
"properties": {
"abortOnError": {
"default": true,
"description": "Should this job abort if too many errors are encountered",
"type": "boolean"
},
"batchJobType": {
"default": "prediction",
"description": "Batch job type.",
"enum": [
"monitoring",
"prediction"
],
"type": "string",
"x-versionadded": "v2.35"
},
"chunkSize": {
"default": "auto",
"description": "Which strategy should be used to determine the chunk size. Can be either a named strategy or a fixed size in bytes.",
"oneOf": [
{
"enum": [
"auto",
"fixed",
"dynamic"
],
"type": "string"
},
{
"maximum": 41943040,
"minimum": 20,
"type": "integer"
}
]
},
"columnNamesRemapping": {
"description": "Remap (rename or remove columns from) the output from this job",
"oneOf": [
{
"description": "Provide a dictionary with key/value pairs to remap (deprecated)",
"type": "object"
},
{
"description": "Provide a list of items to remap",
"items": {
"properties": {
"inputName": {
"description": "Rename column with this name",
"type": "string"
},
"outputName": {
"description": "Rename column to this name (leave as null to remove from the output)",
"type": [
"string",
"null"
]
}
},
"required": [
"inputName",
"outputName"
],
"type": "object"
},
"maxItems": 1000,
"type": "array"
}
]
},
"csvSettings": {
"description": "The CSV settings used for this job",
"properties": {
"delimiter": {
"default": ",",
"description": "CSV fields are delimited by this character. Use the string \"tab\" to denote TSV (TAB separated values).",
"oneOf": [
{
"enum": [
"tab"
],
"type": "string"
},
{
"maxLength": 1,
"minLength": 1,
"type": "string"
}
]
},
"encoding": {
"default": "utf-8",
"description": "The encoding to be used for intake and output. For example (but not limited to): \"shift_jis\", \"latin_1\" or \"mskanji\".",
"type": "string"
},
"quotechar": {
"default": "\"",
"description": "Fields containing the delimiter or newlines must be quoted using this character.",
"maxLength": 1,
"minLength": 1,
"type": "string"
}
},
"required": [
"delimiter",
"encoding",
"quotechar"
],
"type": "object"
},
"deploymentId": {
"description": "ID of deployment which is used in job for processing predictions dataset",
"type": "string"
},
"disableRowLevelErrorHandling": {
"default": false,
"description": "Skip row by row error handling",
"type": "boolean"
},
"enabled": {
"description": "If this job definition is enabled as a scheduled job. Optional if no schedule is supplied.",
"type": "boolean"
},
"explanationAlgorithm": {
"description": "Which algorithm will be used to calculate prediction explanations",
"enum": [
"shap",
"xemp"
],
"type": "string"
},
"explanationClassNames": {
"description": "List of class names that will be explained for each row for multiclass. Mutually exclusive with explanationNumTopClasses. If neither specified - we assume explanationNumTopClasses=1",
"items": {
"description": "Class name to explain",
"type": "string"
},
"maxItems": 10,
"minItems": 1,
"type": "array",
"x-versionadded": "v2.29"
},
"explanationNumTopClasses": {
"description": "Number of top predicted classes for each row that will be explained for multiclass. Mutually exclusive with explanationClassNames. If neither specified - we assume explanationNumTopClasses=1",
"maximum": 10,
"minimum": 1,
"type": "integer",
"x-versionadded": "v2.29"
},
"includePredictionStatus": {
"default": false,
"description": "Include prediction status column in the output",
"type": "boolean"
},
"includeProbabilities": {
"default": true,
"description": "Include probabilities for all classes",
"type": "boolean"
},
"includeProbabilitiesClasses": {
"default": [],
"description": "Include only probabilities for these specific class names.",
"items": {
"description": "Include probability for this class name",
"type": "string"
},
"maxItems": 100,
"type": "array"
},
"intakeSettings": {
"default": {
"type": "localFile"
},
"description": "The intake option configured for this job",
"oneOf": [
{
"description": "Stream CSV data chunks from Azure",
"properties": {
"credentialId": {
"description": "Use the specified credential to access the url",
"type": [
"string",
"null"
]
},
"format": {
"default": "csv",
"description": "Type of input file format",
"enum": [
"csv",
"parquet"
],
"type": "string",
"x-versionadded": "v2.25"
},
"type": {
"description": "Type name for this intake type",
"enum": [
"azure"
],
"type": "string"
},
"url": {
"description": "URL for the CSV file",
"format": "url",
"type": "string"
}
},
"required": [
"type",
"url"
],
"type": "object"
},
{
"description": "Stream CSV data chunks from Big Query using GCS",
"properties": {
"bucket": {
"description": "The name of gcs bucket for data export",
"type": "string"
},
"credentialId": {
"description": "The ID of the GCP credentials",
"type": "string"
},
"dataset": {
"description": "The name of the specified big query dataset to read input data from",
"type": "string"
},
"table": {
"description": "The name of the specified big query table to read input data from",
"type": "string"
},
"type": {
"description": "Type name for this intake type",
"enum": [
"bigquery"
],
"type": "string"
}
},
"required": [
"bucket",
"credentialId",
"dataset",
"table",
"type"
],
"type": "object"
},
{
"description": "Stream CSV data chunks from data stage storage",
"properties": {
"dataStageId": {
"description": "The ID of the data stage",
"type": "string"
},
"type": {
"description": "Type name for this intake type",
"enum": [
"dataStage"
],
"type": "string"
}
},
"required": [
"dataStageId",
"type"
],
"type": "object"
},
{
"description": "Stream CSV data chunks from AI catalog dataset",
"properties": {
"datasetId": {
"description": "The ID of the AI catalog dataset",
"type": "string"
},
"datasetVersionId": {
"description": "The ID of the AI catalog dataset version",
"type": "string"
},
"type": {
"description": "Type name for this intake type",
"enum": [
"dataset"
],
"type": "string"
}
},
"required": [
"datasetId",
"type"
],
"type": "object"
},
{
"description": "Stream CSV data chunks from Datasphere using browser-datasphere",
"properties": {
"catalog": {
"description": "The name of the specified database catalog to read input data from.",
"type": "string"
},
"credentialId": {
"description": "The ID of the credential holding information about a user with read access to the data source.",
"type": "string"
},
"dataStoreId": {
"description": "ID of the data store to connect to",
"type": "string"
},
"schema": {
"description": "The name of the specified database schema to read input data from.",
"type": "string"
},
"table": {
"description": "The name of the specified database table to read input data from.",
"type": "string"
},
"type": {
"description": "Type name for this intake type",
"enum": [
"datasphere"
],
"type": "string"
}
},
"required": [
"credentialId",
"dataStoreId",
"schema",
"table",
"type"
],
"type": "object",
"x-versionadded": "v2.35"
},
{
"description": "Stream CSV data chunks from DSS dataset",
"properties": {
"datasetId": {
"description": "The ID of the dataset",
"type": "string"
},
"partition": {
"default": null,
"description": "Partition used to predict",
"enum": [
"holdout",
"validation",
"allBacktests",
null
],
"type": [
"string",
"null"
]
},
"projectId": {
"description": "The ID of the project",
"type": "string"
},
"type": {
"description": "Type name for this intake type",
"enum": [
"dss"
],
"type": "string"
}
},
"required": [
"projectId",
"type"
],
"type": "object"
},
{
"properties": {
"path": {
"description": "Path to data on host filesystem",
"type": "string"
},
"type": {
"description": "Type name for this intake type",
"enum": [
"filesystem"
],
"type": "string"
}
},
"required": [
"path",
"type"
],
"type": "object"
},
{
"description": "Stream CSV data chunks from Google Storage",
"properties": {
"credentialId": {
"description": "Use the specified credential to access the url",
"type": [
"string",
"null"
]
},
"format": {
"default": "csv",
"description": "Type of input file format",
"enum": [
"csv",
"parquet"
],
"type": "string",
"x-versionadded": "v2.25"
},
"type": {
"description": "Type name for this intake type",
"enum": [
"gcp"
],
"type": "string"
},
"url": {
"description": "URL for the CSV file",
"format": "url",
"type": "string"
}
},
"required": [
"type",
"url"
],
"type": "object"
},
{
"description": "Stream CSV data chunks from HTTP",
"properties": {
"type": {
"description": "Type name for this intake type",
"enum": [
"http"
],
"type": "string"
},
"url": {
"description": "URL for the CSV file",
"format": "url",
"type": "string"
}
},
"required": [
"type",
"url"
],
"type": "object"
},
{
"description": "Stream CSV data chunks from JDBC",
"properties": {
"catalog": {
"description": "The name of the specified database catalog to read input data from.",
"type": "string",
"x-versionadded": "v2.22"
},
"credentialId": {
"description": "The ID of the credential holding information about a user with read access to the JDBC data source.",
"type": [
"string",
"null"
]
},
"dataStoreId": {
"description": "ID of the data store to connect to",
"type": "string"
},
"fetchSize": {
"description": "A user specified fetch size. Changing it can be used to balance throughput and memory usage. Deprecated and ignored since v2.21.",
"maximum": 1000000,
"minimum": 1,
"type": "integer"
},
"query": {
"description": "A self-supplied SELECT statement of the dataset you wish to score. Helpful for supplying a more fine-grained selection of data not achievable through specification of \"table\" and/or \"schema\" parameters exclusively.If this job is executed with a job definition, then template variables are available which will be substituted for timestamps: {{ current_run_timestamp }}, {{ last_completed_run_time }}, {{ last_scheduled_run_time }}, {{ next_scheduled_run_time }}, {{ current_run_time }}",
"type": "string"
},
"schema": {
"description": "The name of the specified database schema to read input data from.",
"type": "string"
},
"table": {
"description": "The name of the specified database table to read input data from.",
"type": "string"
},
"type": {
"description": "Type name for this intake type",
"enum": [
"jdbc"
],
"type": "string"
}
},
"required": [
"dataStoreId",
"type"
],
"type": "object"
},
{
"description": "Stream CSV data chunks from local file storage",
"properties": {
"async": {
"description": "The default behavior (async: true) will still submit the job to the queue and start processing as soon as the upload is started.Setting it to false will postpone submitting the job to the queue until all data has been uploaded.This is helpful if the user is on a bad connection and bottlednecked by the upload speed. Instead of blocking the queue this will allow others to submit to the queue until the upload has finished.",
"type": [
"boolean",
"null"
],
"x-versionadded": "v2.28"
},
"multipart": {
"description": "specify if the data will be uploaded in multiple parts instead of a single file",
"type": "boolean",
"x-versionadded": "v2.27"
},
"type": {
"description": "Type name for this intake type",
"enum": [
"local_file",
"localFile"
],
"type": "string"
}
},
"required": [
"type"
],
"type": "object"
},
{
"description": "Stream CSV data chunks from Amazon Cloud Storage S3",
"properties": {
"credentialId": {
"description": "Use the specified credential to access the url",
"type": [
"string",
"null"
]
},
"endpointUrl": {
"description": "Endpoint URL for the S3 connection (omit to use the default)",
"format": "url",
"type": "string",
"x-versionadded": "v2.29"
},
"format": {
"default": "csv",
"description": "Type of input file format",
"enum": [
"csv",
"parquet"
],
"type": "string",
"x-versionadded": "v2.25"
},
"type": {
"description": "Type name for this intake type",
"enum": [
"s3"
],
"type": "string"
},
"url": {
"description": "URL for the CSV file",
"format": "url",
"type": "string"
}
},
"required": [
"type",
"url"
],
"type": "object"
},
{
"description": "Stream CSV data chunks from Snowflake",
"properties": {
"catalog": {
"description": "The name of the specified database catalog to read input data from.",
"type": "string",
"x-versionadded": "v2.28"
},
"cloudStorageCredentialId": {
"description": "The ID of the credential holding information about a user with read access to the cloud storage.",
"type": [
"string",
"null"
]
},
"cloudStorageType": {
"default": "s3",
"description": "Type name for cloud storage",
"enum": [
"azure",
"gcp",
"s3"
],
"type": "string"
},
"credentialId": {
"description": "The ID of the credential holding information about a user with read access to the Snowflake data source.",
"type": [
"string",
"null"
]
},
"dataStoreId": {
"description": "ID of the data store to connect to",
"type": "string"
},
"externalStage": {
"description": "External storage",
"type": "string"
},
"query": {
"description": "A self-supplied SELECT statement of the dataset you wish to score. Helpful for supplying a more fine-grained selection of data not achievable through specification of \"table\" and/or \"schema\" parameters exclusively.If this job is executed with a job definition, then template variables are available which will be substituted for timestamps: {{ current_run_timestamp }}, {{ last_completed_run_time }}, {{ last_scheduled_run_time }}, {{ next_scheduled_run_time }}, {{ current_run_time }}",
"type": "string"
},
"schema": {
"description": "The name of the specified database schema to read input data from.",
"type": "string"
},
"table": {
"description": "The name of the specified database table to read input data from.",
"type": "string"
},
"type": {
"description": "Type name for this intake type",
"enum": [
"snowflake"
],
"type": "string"
}
},
"required": [
"dataStoreId",
"externalStage",
"type"
],
"type": "object"
},
{
"description": "Stream CSV data chunks from Azure Synapse",
"properties": {
"cloudStorageCredentialId": {
"description": "The ID of the Azure credential holding information about a user with read access to the cloud storage.",
"type": [
"string",
"null"
]
},
"credentialId": {
"description": "The ID of the credential holding information about a user with read access to the JDBC data source.",
"type": [
"string",
"null"
]
},
"dataStoreId": {
"description": "ID of the data store to connect to",
"type": "string"
},
"externalDataSource": {
"description": "External datasource name",
"type": "string"
},
"query": {
"description": "A self-supplied SELECT statement of the dataset you wish to score. Helpful for supplying a more fine-grained selection of data not achievable through specification of \"table\" and/or \"schema\" parameters exclusively.If this job is executed with a job definition, then template variables are available which will be substituted for timestamps: {{ current_run_timestamp }}, {{ last_completed_run_time }}, {{ last_scheduled_run_time }}, {{ next_scheduled_run_time }}, {{ current_run_time }}",
"type": "string"
},
"schema": {
"description": "The name of the specified database schema to read input data from.",
"type": "string"
},
"table": {
"description": "The name of the specified database table to read input data from.",
"type": "string"
},
"type": {
"description": "Type name for this intake type",
"enum": [
"synapse"
],
"type": "string"
}
},
"required": [
"dataStoreId",
"externalDataSource",
"type"
],
"type": "object"
}
]
},
"maxExplanations": {
"default": 0,
"description": "Number of explanations requested. Will be ordered by strength.",
"maximum": 100,
"minimum": 0,
"type": "integer"
},
"modelId": {
"description": "ID of leaderboard model which is used in job for processing predictions dataset",
"type": "string",
"x-versionadded": "v2.28"
},
"modelPackageId": {
"description": "ID of model package from registry is used in job for processing predictions dataset",
"type": "string",
"x-versionadded": "v2.28"
},
"monitoringBatchPrefix": {
"description": "Name of the batch to create with this job",
"type": [
"string",
"null"
]
},
"name": {
"description": "A human-readable name for the definition, must be unique across organisations, if left out the backend will generate one for you.",
"maxLength": 100,
"minLength": 1,
"type": "string"
},
"numConcurrent": {
"description": "Number of simultaneous requests to run against the prediction instance",
"minimum": 1,
"type": "integer"
},
"outputSettings": {
"description": "The output option configured for this job",
"oneOf": [
{
"description": "Save CSV data chunks to Azure Blob Storage",
"properties": {
"credentialId": {
"description": "Use the specified credential to access the url",
"type": [
"string",
"null"
]
},
"format": {
"default": "csv",
"description": "Type of output file format",
"enum": [
"csv",
"parquet"
],
"type": "string",
"x-versionadded": "v2.25"
},
"partitionColumns": {
"description": "For Parquet directory-scoring only. The column names of the intake data of which to partition the dataset. Columns are partitioned in the order they are given. At least one value is required if scoring to a directory (meaning the output url ends with a slash (\"/\").",
"items": {
"type": "string"
},
"maxItems": 100,
"type": "array",
"x-versionadded": "v2.26"
},
"type": {
"description": "Type name for this output type",
"enum": [
"azure"
],
"type": "string"
},
"url": {
"description": "URL for the file or directory",
"format": "url",
"type": "string"
}
},
"required": [
"type",
"url"
],
"type": "object"
},
{
"description": "Save CSV data chunks to Google BigQuery in bulk",
"properties": {
"bucket": {
"description": "The name of gcs bucket for data loading",
"type": "string"
},
"credentialId": {
"description": "The ID of the GCP credentials",
"type": "string"
},
"dataset": {
"description": "The name of the specified big query dataset to write data back",
"type": "string"
},
"table": {
"description": "The name of the specified big query table to write data back",
"type": "string"
},
"type": {
"description": "Type name for this output type",
"enum": [
"bigquery"
],
"type": "string"
}
},
"required": [
"bucket",
"credentialId",
"dataset",
"table",
"type"
],
"type": "object"
},
{
"description": "Saves CSV data chunks to Datasphere using browser-datasphere",
"properties": {
"catalog": {
"description": "The name of the specified database catalog to read input data from.",
"type": "string"
},
"credentialId": {
"description": "The ID of the credential holding information about a user with write access to the data destination.",
"type": "string"
},
"dataStoreId": {
"description": "The ID of the data store to connect to",
"type": "string"
},
"schema": {
"description": "The name of the specified database schema to read input data from.",
"type": "string"
},
"table": {
"description": "The name of the specified database table to read input data from.",
"type": "string"
},
"type": {
"description": "The type name for this output type",
"enum": [
"datasphere"
],
"type": "string"
}
},
"required": [
"credentialId",
"dataStoreId",
"schema",
"table",
"type"
],
"type": "object",
"x-versionadded": "v2.35"
},
{
"properties": {
"path": {
"description": "Path to results on host filesystem",
"type": "string"
},
"type": {
"description": "Type name for this output type",
"enum": [
"filesystem"
],
"type": "string"
}
},
"required": [
"path",
"type"
],
"type": "object"
},
{
"description": "Save CSV data chunks to Google Storage",
"properties": {
"credentialId": {
"description": "Use the specified credential to access the url",
"type": [
"string",
"null"
]
},
"format": {
"default": "csv",
"description": "Type of input file format",
"enum": [
"csv",
"parquet"
],
"type": "string",
"x-versionadded": "v2.25"
},
"partitionColumns": {
"description": "For Parquet directory-scoring only. The column names of the intake data of which to partition the dataset. Columns are partitioned in the order they are given. At least one value is required if scoring to a directory (meaning the output url ends with a slash (\"/\").",
"items": {
"type": "string"
},
"maxItems": 100,
"type": "array",
"x-versionadded": "v2.26"
},
"type": {
"description": "Type name for this output type",
"enum": [
"gcp"
],
"type": "string"
},
"url": {
"description": "URL for the CSV file",
"format": "url",
"type": "string"
}
},
"required": [
"type",
"url"
],
"type": "object"
},
{
"description": "Save CSV data chunks to HTTP data endpoint",
"properties": {
"headers": {
"description": "Extra headers to send with the request",
"type": "object"
},
"method": {
"description": "Method to use when saving the CSV file",
"enum": [
"POST",
"PUT"
],
"type": "string"
},
"type": {
"description": "Type name for this output type",
"enum": [
"http"
],
"type": "string"
},
"url": {
"description": "URL for the CSV file",
"format": "url",
"type": "string"
}
},
"required": [
"method",
"type",
"url"
],
"type": "object"
},
{
"description": "Save CSV data chunks via JDBC",
"properties": {
"catalog": {
"description": "The name of the specified database catalog to write output data to.",
"type": "string",
"x-versionadded": "v2.22"
},
"commitInterval": {
"default": 600,
"description": "Defines a time interval in seconds between each commit is done to the JDBC source. If set to 0, the batch prediction operation will write the entire job before committing.",
"maximum": 86400,
"minimum": 0,
"type": "integer",
"x-versionadded": "v2.21"
},
"createTableIfNotExists": {
"default": false,
"description": "Attempt to create the table first if no existing one is detected, before writing data with the strategy defined in the `statementType` parameter.",
"type": "boolean",
"x-versionadded": "v2.24"
},
"credentialId": {
"description": "The ID of the credential holding information about a user with write access to the JDBC data source.",
"type": [
"string",
"null"
]
},
"dataStoreId": {
"description": "ID of the data store to connect to",
"type": "string"
},
"schema": {
"description": "The name of the specified database schema to write the results to.",
"type": "string"
},
"statementType": {
"description": "The statement type to use when writing the results. Deprecation Warning: Use of `create_table` is now discouraged. Use one of the other possibilities along with the parameter `createTableIfNotExists` set to `true`.",
"enum": [
"createTable",
"create_table",
"insert",
"insertUpdate",
"insert_update",
"update"
],
"type": "string"
},
"table": {
"description": "The name of the specified database table to write the results to.If this job is executed with a job definition, then template variables are available which will be substituted for timestamps: {{ current_run_timestamp }}, {{ last_completed_run_time }}, {{ last_scheduled_run_time }}, {{ next_scheduled_run_time }}, {{ current_run_time }}",
"type": "string"
},
"type": {
"description": "Type name for this intake type",
"enum": [
"jdbc"
],
"type": "string"
},
"updateColumns": {
"description": "The column names to be updated if statementType is set to either update or upsert.",
"items": {
"type": "string"
},
"maxItems": 100,
"type": "array"
},
"whereColumns": {
"description": "The column names to be used in the where clause if statementType is set to update or upsert.",
"items": {
"type": "string"
},
"maxItems": 100,
"type": "array"
}
},
"required": [
"dataStoreId",
"statementType",
"table",
"type"
],
"type": "object"
},
{
"description": "Save CSV data chunks to local file storage",
"properties": {
"type": {
"description": "Type name for this output type",
"enum": [
"local_file",
"localFile"
],
"type": "string"
}
},
"required": [
"type"
],
"type": "object"
},
{
"description": "Saves CSV data chunks to Amazon Cloud Storage S3",
"properties": {
"credentialId": {
"description": "Use the specified credential to access the url",
"type": [
"string",
"null"
]
},
"endpointUrl": {
"description": "Endpoint URL for the S3 connection (omit to use the default)",
"format": "url",
"type": "string",
"x-versionadded": "v2.29"
},
"format": {
"default": "csv",
"description": "Type of output file format",
"enum": [
"csv",
"parquet"
],
"type": "string",
"x-versionadded": "v2.25"
},
"partitionColumns": {
"description": "For Parquet directory-scoring only. The column names of the intake data of which to partition the dataset. Columns are partitioned in the order they are given. At least one value is required if scoring to a directory (meaning the output url ends with a slash (\"/\").",
"items": {
"type": "string"
},
"maxItems": 100,
"type": "array",
"x-versionadded": "v2.26"
},
"serverSideEncryption": {
"description": "Configure Server-Side Encryption for S3 output",
"properties": {
"algorithm": {
"description": "The server-side encryption algorithm used when storing this object in Amazon S3 (for example, AES256, aws:kms).",
"type": "string"
},
"customerAlgorithm": {
"description": "Specifies the algorithm to use to when encrypting the object (for example, AES256).",
"type": "string"
},
"customerKey": {
"description": "Specifies the customer-provided encryption key for Amazon S3 to use in encrypting data. This value is used to store the object and then it is discarded; Amazon S3 does not store the encryption key. The key must be appropriate for use with the algorithm specified in customerAlgorithm. The key must be sent as an base64 encoded string.",
"type": "string"
},
"kmsEncryptionContext": {
"description": "Specifies the Amazon Web Services KMS Encryption Context to use for object encryption. The value of this header is a base64-encoded UTF-8 string holding JSON with the encryption context key-value pairs.",
"type": "string"
},
"kmsKeyId": {
"description": "Specifies the ID of the symmetric customer managed key to use for object encryption.",
"type": "string"
}
},
"type": "object"
},
"type": {
"description": "Type name for this output type",
"enum": [
"s3"
],
"type": "string"
},
"url": {
"description": "URL for the CSV file",
"format": "url",
"type": "string"
}
},
"required": [
"type",
"url"
],
"type": "object"
},
{
"description": "Save CSV data chunks to Snowflake in bulk",
"properties": {
"catalog": {
"description": "The name of the specified database catalog to write output data to.",
"type": "string",
"x-versionadded": "v2.28"
},
"cloudStorageCredentialId": {
"description": "The ID of the credential holding information about a user with write access to the cloud storage.",
"type": [
"string",
"null"
]
},
"cloudStorageType": {
"default": "s3",
"description": "Type name for cloud storage",
"enum": [
"azure",
"gcp",
"s3"
],
"type": "string"
},
"createTableIfNotExists": {
"default": false,
"description": "Attempt to create the table first if no existing one is detected, before writing data with the strategy defined in the `statementType` parameter.",
"type": "boolean",
"x-versionadded": "v2.25"
},
"credentialId": {
"description": "The ID of the credential holding information about a user with write access to the Snowflake data source.",
"type": [
"string",
"null"
]
},
"dataStoreId": {
"description": "ID of the data store to connect to",
"type": "string"
},
"externalStage": {
"description": "External storage",
"type": "string"
},
"schema": {
"description": "The name of the specified database schema to write results to.",
"type": "string"
},
"statementType": {
"description": "The statement type to use when writing the results.",
"enum": [
"insert",
"create_table",
"createTable"
],
"type": "string"
},
"table": {
"description": "The name of the specified database table to write results to.",
"type": "string"
},
"type": {
"description": "Type name for this output type",
"enum": [
"snowflake"
],
"type": "string"
}
},
"required": [
"dataStoreId",
"externalStage",
"statementType",
"table",
"type"
],
"type": "object"
},
{
"description": "Save CSV data chunks to Azure Synapse in bulk",
"properties": {
"cloudStorageCredentialId": {
"description": "The ID of the credential holding information about a user with write access to the cloud storage.",
"type": [
"string",
"null"
]
},
"createTableIfNotExists": {
"default": false,
"description": "Attempt to create the table first if no existing one is detected, before writing data with the strategy defined in the `statementType` parameter.",
"type": "boolean",
"x-versionadded": "v2.25"
},
"credentialId": {
"description": "The ID of the credential holding information about a user with write access to the JDBC data source.",
"type": [
"string",
"null"
]
},
"dataStoreId": {
"description": "ID of the data store to connect to",
"type": "string"
},
"externalDataSource": {
"description": "External data source name",
"type": "string"
},
"schema": {
"description": "The name of the specified database schema to write results to.",
"type": "string"
},
"statementType": {
"description": "The statement type to use when writing the results.",
"enum": [
"insert",
"create_table",
"createTable"
],
"type": "string"
},
"table": {
"description": "The name of the specified database table to write results to.",
"type": "string"
},
"type": {
"description": "Type name for this output type",
"enum": [
"synapse"
],
"type": "string"
}
},
"required": [
"dataStoreId",
"externalDataSource",
"statementType",
"table",
"type"
],
"type": "object"
}
]
},
"passthroughColumns": {
"description": "Pass through columns from the original dataset",
"items": {
"description": "A column name from the original dataset to pass through to the resulting predictions",
"maxLength": 50,
"minLength": 1,
"type": "string"
},
"maxItems": 100,
"type": "array"
},
"passthroughColumnsSet": {
"description": "Pass through all columns from the original dataset",
"enum": [
"all"
],
"type": "string"
},
"pinnedModelId": {
"description": "Specify a model ID used for scoring",
"type": "string"
},
"predictionInstance": {
"description": "Override the default prediction instance from the deployment when scoring this job.",
"properties": {
"apiKey": {
"description": "By default, prediction requests will use the API key of the user that created the job. This allows you to make requests on behalf of other users.",
"type": "string"
},
"datarobotKey": {
"description": "If running a job against a prediction instance in the Managed AI Cloud, you must provide the organization level DataRobot-Key.",
"type": "string"
},
"hostName": {
"description": "Override the default host name of the deployment with this.",
"type": "string"
},
"sslEnabled": {
"default": true,
"description": "Use SSL (HTTPS) when communicating with the overriden prediction server.",
"type": "boolean"
}
},
"required": [
"hostName",
"sslEnabled"
],
"type": "object"
},
"predictionThreshold": {
"description": "Threshold is the point that sets the class boundary for a predicted value. The model classifies an observation below the threshold as FALSE, and an observation above the threshold as TRUE. In other words, DataRobot automatically assigns the positive class label to any prediction exceeding the threshold. This value can be set between 0.0 and 1.0.",
"maximum": 1,
"minimum": 0,
"type": "number"
},
"predictionWarningEnabled": {
"description": "Enable prediction warnings.",
"type": [
"boolean",
"null"
]
},
"schedule": {
"description": "The scheduling information defining how often and when to execute this job to the Job Scheduling service. Optional if enabled = False.",
"properties": {
"dayOfMonth": {
"description": "The date(s) of the month that the job will run. Allowed values are either ``[1 ... 31]`` or ``[\"*\"]`` for all days of the month. This field is additive with ``dayOfWeek``, meaning the job will run both on the date(s) defined in this field and the day specified by ``dayOfWeek`` (for example, dates 1st, 2nd, 3rd, plus every Tuesday). If ``dayOfMonth`` is set to ``[\"*\"]`` and ``dayOfWeek`` is defined, the scheduler will trigger on every day of the month that matches ``dayOfWeek`` (for example, Tuesday the 2nd, 9th, 16th, 23rd, 30th). Invalid dates such as February 31st are ignored.",
"items": {
"enum": [
"*",
1,
2,
3,
4,
5,
6,
7,
8,
9,
10,
11,
12,
13,
14,
15,
16,
17,
18,
19,
20,
21,
22,
23,
24,
25,
26,
27,
28,
29,
30,
31
],
"type": [
"number",
"string"
]
},
"maxItems": 31,
"type": "array"
},
"dayOfWeek": {
"description": "The day(s) of the week that the job will run. Allowed values are ``[0 .. 6]``, where (Sunday=0), or ``[\"*\"]``, for all days of the week. Strings, either 3-letter abbreviations or the full name of the day, can be used interchangeably (e.g., \"sunday\", \"Sunday\", \"sun\", or \"Sun\", all map to ``[0]``. This field is additive with ``dayOfMonth``, meaning the job will run both on the date specified by ``dayOfMonth`` and the day defined in this field.",
"items": {
"enum": [
"*",
0,
1,
2,
3,
4,
5,
6,
"sunday",
"SUNDAY",
"Sunday",
"monday",
"MONDAY",
"Monday",
"tuesday",
"TUESDAY",
"Tuesday",
"wednesday",
"WEDNESDAY",
"Wednesday",
"thursday",
"THURSDAY",
"Thursday",
"friday",
"FRIDAY",
"Friday",
"saturday",
"SATURDAY",
"Saturday",
"sun",
"SUN",
"Sun",
"mon",
"MON",
"Mon",
"tue",
"TUE",
"Tue",
"wed",
"WED",
"Wed",
"thu",
"THU",
"Thu",
"fri",
"FRI",
"Fri",
"sat",
"SAT",
"Sat"
],
"type": [
"number",
"string"
]
},
"maxItems": 7,
"type": "array"
},
"hour": {
"description": "The hour(s) of the day that the job will run. Allowed values are either ``[\"*\"]`` meaning every hour of the day or ``[0 ... 23]``.",
"items": {
"enum": [
"*",
0,
1,
2,
3,
4,
5,
6,
7,
8,
9,
10,
11,
12,
13,
14,
15,
16,
17,
18,
19,
20,
21,
22,
23
],
"type": [
"number",
"string"
]
},
"maxItems": 24,
"type": "array"
},
"minute": {
"description": "The minute(s) of the day that the job will run. Allowed values are either ``[\"*\"]`` meaning every minute of the day or``[0 ... 59]``.",
"items": {
"enum": [
"*",
0,
1,
2,
3,
4,
5,
6,
7,
8,
9,
10,
11,
12,
13,
14,
15,
16,
17,
18,
19,
20,
21,
22,
23,
24,
25,
26,
27,
28,
29,
30,
31,
32,
33,
34,
35,
36,
37,
38,
39,
40,
41,
42,
43,
44,
45,
46,
47,
48,
49,
50,
51,
52,
53,
54,
55,
56,
57,
58,
59
],
"type": [
"number",
"string"
]
},
"maxItems": 60,
"type": "array"
},
"month": {
"description": "The month(s) of the year that the job will run. Allowed values are either ``[1 ... 12]`` or ``[\"*\"]`` for all months of the year. Strings, either 3-letter abbreviations or the full name of the month, can be used interchangeably (e.g., \"jan\" or \"october\"). Months that are not compatible with ``dayOfMonth`` are ignored, for example ``{\"dayOfMonth\": [31], \"month\":[\"feb\"]}``.",
"items": {
"enum": [
"*",
1,
2,
3,
4,
5,
6,
7,
8,
9,
10,
11,
12,
"january",
"JANUARY",
"January",
"february",
"FEBRUARY",
"February",
"march",
"MARCH",
"March",
"april",
"APRIL",
"April",
"may",
"MAY",
"May",
"june",
"JUNE",
"June",
"july",
"JULY",
"July",
"august",
"AUGUST",
"August",
"september",
"SEPTEMBER",
"September",
"october",
"OCTOBER",
"October",
"november",
"NOVEMBER",
"November",
"december",
"DECEMBER",
"December",
"jan",
"JAN",
"Jan",
"feb",
"FEB",
"Feb",
"mar",
"MAR",
"Mar",
"apr",
"APR",
"Apr",
"jun",
"JUN",
"Jun",
"jul",
"JUL",
"Jul",
"aug",
"AUG",
"Aug",
"sep",
"SEP",
"Sep",
"oct",
"OCT",
"Oct",
"nov",
"NOV",
"Nov",
"dec",
"DEC",
"Dec"
],
"type": [
"number",
"string"
]
},
"maxItems": 12,
"type": "array"
}
},
"required": [
"dayOfMonth",
"dayOfWeek",
"hour",
"minute",
"month"
],
"type": "object"
},
"secondaryDatasetsConfigId": {
"description": "Configuration id for secondary datasets to use when making a prediction.",
"type": "string",
"x-versionadded": "v2.33"
},
"skipDriftTracking": {
"default": false,
"description": "Skip drift tracking for this job.",
"type": "boolean"
},
"thresholdHigh": {
"description": "Compute explanations for predictions above this threshold",
"type": "number"
},
"thresholdLow": {
"description": "Compute explanations for predictions below this threshold",
"type": "number"
},
"timeseriesSettings": {
"description": "Time Series settings included of this job is a Time Series job.",
"oneOf": [
{
"properties": {
"forecastPoint": {
"description": "Used for forecast predictions in order to override the inferred forecast point from the dataset.",
"format": "date-time",
"type": "string"
},
"relaxKnownInAdvanceFeaturesCheck": {
"default": false,
"description": "If activated, missing values in the known in advance features are allowed in the forecast window at prediction time. If omitted or false, missing values are not allowed.",
"type": "boolean"
},
"type": {
"description": "Forecast mode makes predictions using forecastPoint or rows in the dataset without target.",
"enum": [
"forecast"
],
"type": "string"
}
},
"required": [
"type"
],
"type": "object"
},
{
"properties": {
"forecastPointPolicy": {
"description": "Forecast point policy",
"properties": {
"configuration": {
"description": "Customize if forecast point based on job run time needs to be shifted.",
"properties": {
"offset": {
"description": "Offset to apply to scheduled run time of the job in a ISO-8601 format toobtain a relative forecast point. Example of the positive offset 'P2DT5H3M', example of the negative offset '-P2DT5H4M'",
"format": "offset",
"type": "string"
}
},
"required": [
"offset"
],
"type": "object"
},
"type": {
"description": "Type of the forecast point policy. Forecast point will be based on the scheduled run time of the job or the current moment in UTC if job was launched manually. Run time can be adjusted backwards or forwards.",
"enum": [
"jobRunTimeBased"
],
"type": "string"
}
},
"required": [
"type"
],
"type": "object"
},
"relaxKnownInAdvanceFeaturesCheck": {
"default": false,
"description": "If activated, missing values in the known in advance features are allowed in the forecast window at prediction time. If omitted or false, missing values are not allowed.",
"type": "boolean"
},
"type": {
"description": "Forecast mode makes predictions using forecastPoint or rows in the dataset without target.",
"enum": [
"forecast"
],
"type": "string"
}
},
"required": [
"forecastPointPolicy",
"type"
],
"type": "object"
},
{
"properties": {
"predictionsEndDate": {
"description": "Used for historical predictions in order to override date to which predictions should be calculated. By default value will be inferred automatically from the dataset.",
"format": "date-time",
"type": "string"
},
"predictionsStartDate": {
"description": "Used for historical predictions in order to override date from which predictions should be calculated. By default value will be inferred automatically from the dataset.",
"format": "date-time",
"type": "string"
},
"relaxKnownInAdvanceFeaturesCheck": {
"default": false,
"description": "If activated, missing values in the known in advance features are allowed in the forecast window at prediction time. If omitted or false, missing values are not allowed.",
"type": "boolean"
},
"type": {
"description": "Historical mode enables bulk predictions which calculates predictions for all possible forecast points and forecast distances in the dataset within the predictionsStartDate/predictionsEndDate range.",
"enum": [
"historical"
],
"type": "string"
}
},
"required": [
"type"
],
"type": "object"
}
],
"x-versionadded": "v2.20"
}
},
"type": "object"
}
Parameters
Example responses
200 Response
{
"properties": {
"batchPredictionJob": {
"description": "The Batch Prediction Job specification to be put on the queue in intervals",
"properties": {
"abortOnError": {
"default": true,
"description": "Should this job abort if too many errors are encountered",
"type": "boolean"
},
"batchJobType": {
"description": "Batch job type.",
"enum": [
"monitoring",
"prediction"
],
"type": "string",
"x-versionadded": "v2.30"
},
"chunkSize": {
"default": "auto",
"description": "Which strategy should be used to determine the chunk size. Can be either a named strategy or a fixed size in bytes.",
"oneOf": [
{
"enum": [
"auto",
"fixed",
"dynamic"
],
"type": "string"
},
{
"maximum": 41943040,
"minimum": 20,
"type": "integer"
}
]
},
"columnNamesRemapping": {
"description": "Remap (rename or remove columns from) the output from this job",
"oneOf": [
{
"description": "Provide a dictionary with key/value pairs to remap (deprecated)",
"type": "object"
},
{
"description": "Provide a list of items to remap",
"items": {
"properties": {
"inputName": {
"description": "Rename column with this name",
"type": "string"
},
"outputName": {
"description": "Rename column to this name (leave as null to remove from the output)",
"type": [
"string",
"null"
]
}
},
"required": [
"inputName",
"outputName"
],
"type": "object"
},
"maxItems": 1000,
"type": "array"
}
]
},
"csvSettings": {
"description": "The CSV settings used for this job",
"properties": {
"delimiter": {
"default": ",",
"description": "CSV fields are delimited by this character. Use the string \"tab\" to denote TSV (TAB separated values).",
"oneOf": [
{
"enum": [
"tab"
],
"type": "string"
},
{
"maxLength": 1,
"minLength": 1,
"type": "string"
}
]
},
"encoding": {
"default": "utf-8",
"description": "The encoding to be used for intake and output. For example (but not limited to): \"shift_jis\", \"latin_1\" or \"mskanji\".",
"type": "string"
},
"quotechar": {
"default": "\"",
"description": "Fields containing the delimiter or newlines must be quoted using this character.",
"maxLength": 1,
"minLength": 1,
"type": "string"
}
},
"required": [
"delimiter",
"encoding",
"quotechar"
],
"type": "object"
},
"deploymentId": {
"description": "ID of deployment which is used in job for processing predictions dataset",
"type": "string"
},
"disableRowLevelErrorHandling": {
"default": false,
"description": "Skip row by row error handling",
"type": "boolean"
},
"explanationAlgorithm": {
"description": "Which algorithm will be used to calculate prediction explanations",
"enum": [
"shap",
"xemp"
],
"type": "string"
},
"explanationClassNames": {
"description": "List of class names that will be explained for each row for multiclass. Mutually exclusive with explanationNumTopClasses. If neither specified - we assume explanationNumTopClasses=1",
"items": {
"description": "Class name to explain",
"type": "string"
},
"maxItems": 10,
"minItems": 1,
"type": "array",
"x-versionadded": "v2.30"
},
"explanationNumTopClasses": {
"description": "Number of top predicted classes for each row that will be explained for multiclass. Mutually exclusive with explanationClassNames. If neither specified - we assume explanationNumTopClasses=1",
"maximum": 10,
"minimum": 1,
"type": "integer",
"x-versionadded": "v2.30"
},
"includePredictionStatus": {
"default": false,
"description": "Include prediction status column in the output",
"type": "boolean"
},
"includeProbabilities": {
"default": true,
"description": "Include probabilities for all classes",
"type": "boolean"
},
"includeProbabilitiesClasses": {
"default": [],
"description": "Include only probabilities for these specific class names.",
"items": {
"description": "Include probability for this class name",
"type": "string"
},
"maxItems": 100,
"type": "array"
},
"intakeSettings": {
"default": {
"type": "localFile"
},
"description": "The response option configured for this job",
"oneOf": [
{
"description": "Stream CSV data chunks from Azure",
"properties": {
"credentialId": {
"description": "Either the populated value of the field or [redacted] due to permission settings",
"oneOf": [
{
"description": "Use the specified credential to access the url",
"type": [
"string",
"null"
]
},
{
"enum": [
"[redacted]"
],
"type": "string"
}
]
},
"format": {
"default": "csv",
"description": "Type of input file format",
"enum": [
"csv",
"parquet"
],
"type": "string",
"x-versionadded": "v2.25"
},
"type": {
"description": "Type name for this intake type",
"enum": [
"azure"
],
"type": "string"
},
"url": {
"description": "URL for the CSV file",
"format": "url",
"type": "string"
}
},
"required": [
"type",
"url"
],
"type": "object"
},
{
"description": "Stream CSV data chunks from data stage storage",
"properties": {
"dataStageId": {
"description": "The ID of the data stage",
"type": "string"
},
"type": {
"description": "Type name for this intake type",
"enum": [
"dataStage"
],
"type": "string"
}
},
"required": [
"dataStageId",
"type"
],
"type": "object"
},
{
"description": "Stream CSV data chunks from AI catalog dataset",
"properties": {
"datasetId": {
"description": "Either the populated value of the field or [redacted] due to permission settings",
"oneOf": [
{
"description": "The ID of the AI catalog dataset",
"type": "string"
},
{
"enum": [
"[redacted]"
],
"type": "string"
}
]
},
"datasetVersionId": {
"description": "The ID of the AI catalog dataset version",
"type": "string"
},
"type": {
"description": "Type name for this intake type",
"enum": [
"dataset"
],
"type": "string"
}
},
"required": [
"datasetId",
"type"
],
"type": "object"
},
{
"description": "Stream CSV data chunks from Google Storage",
"properties": {
"credentialId": {
"description": "Either the populated value of the field or [redacted] due to permission settings",
"oneOf": [
{
"description": "Use the specified credential to access the url",
"type": [
"string",
"null"
]
},
{
"enum": [
"[redacted]"
],
"type": "string"
}
]
},
"format": {
"default": "csv",
"description": "Type of input file format",
"enum": [
"csv",
"parquet"
],
"type": "string",
"x-versionadded": "v2.25"
},
"type": {
"description": "Type name for this intake type",
"enum": [
"gcp"
],
"type": "string"
},
"url": {
"description": "URL for the CSV file",
"format": "url",
"type": "string"
}
},
"required": [
"type",
"url"
],
"type": "object"
},
{
"description": "Stream CSV data chunks from Big Query using GCS",
"properties": {
"bucket": {
"description": "The name of gcs bucket for data export",
"type": "string"
},
"credentialId": {
"description": "Either the populated value of the field or [redacted] due to permission settings",
"oneOf": [
{
"description": "The ID of the GCP credentials",
"type": "string"
},
{
"enum": [
"[redacted]"
],
"type": "string"
}
]
},
"dataset": {
"description": "The name of the specified big query dataset to read input data from",
"type": "string"
},
"table": {
"description": "The name of the specified big query table to read input data from",
"type": "string"
},
"type": {
"description": "Type name for this intake type",
"enum": [
"bigquery"
],
"type": "string"
}
},
"required": [
"bucket",
"credentialId",
"dataset",
"table",
"type"
],
"type": "object"
},
{
"description": "Stream CSV data chunks from Amazon Cloud Storage S3",
"properties": {
"credentialId": {
"description": "Either the populated value of the field or [redacted] due to permission settings",
"oneOf": [
{
"description": "Use the specified credential to access the url",
"type": [
"string",
"null"
]
},
{
"enum": [
"[redacted]"
],
"type": "string"
}
]
},
"endpointUrl": {
"description": "Endpoint URL for the S3 connection (omit to use the default)",
"format": "url",
"type": "string",
"x-versionadded": "v2.29"
},
"format": {
"default": "csv",
"description": "Type of input file format",
"enum": [
"csv",
"parquet"
],
"type": "string",
"x-versionadded": "v2.25"
},
"type": {
"description": "Type name for this intake type",
"enum": [
"s3"
],
"type": "string"
},
"url": {
"description": "URL for the CSV file",
"format": "url",
"type": "string"
}
},
"required": [
"type",
"url"
],
"type": "object"
},
{
"description": "Stream CSV data chunks from Snowflake",
"properties": {
"catalog": {
"description": "The name of the specified database catalog to read input data from.",
"type": "string",
"x-versionadded": "v2.28"
},
"cloudStorageCredentialId": {
"description": "Either the populated value of the field or [redacted] due to permission settings",
"oneOf": [
{
"description": "The ID of the credential holding information about a user with read access to the cloud storage.",
"type": [
"string",
"null"
]
},
{
"enum": [
"[redacted]"
],
"type": "string"
}
]
},
"cloudStorageType": {
"default": "s3",
"description": "Type name for cloud storage",
"enum": [
"azure",
"gcp",
"s3"
],
"type": "string"
},
"credentialId": {
"description": "Either the populated value of the field or [redacted] due to permission settings",
"oneOf": [
{
"description": "The ID of the credential holding information about a user with read access to the Snowflake data source.",
"type": [
"string",
"null"
]
},
{
"enum": [
"[redacted]"
],
"type": "string"
}
]
},
"dataStoreId": {
"description": "Either the populated value of the field or [redacted] due to permission settings",
"oneOf": [
{
"description": "ID of the data store to connect to",
"type": "string"
},
{
"enum": [
"[redacted]"
],
"type": "string"
}
]
},
"externalStage": {
"description": "External storage",
"type": "string"
},
"query": {
"description": "A self-supplied SELECT statement of the dataset you wish to score. Helpful for supplying a more fine-grained selection of data not achievable through specification of \"table\" and/or \"schema\" parameters exclusively.If this job is executed with a job definition, then template variables are available which will be substituted for timestamps: {{ current_run_timestamp }}, {{ last_completed_run_time }}, {{ last_scheduled_run_time }}, {{ next_scheduled_run_time }}, {{ current_run_time }}",
"type": "string"
},
"schema": {
"description": "The name of the specified database schema to read input data from.",
"type": "string"
},
"table": {
"description": "The name of the specified database table to read input data from.",
"type": "string"
},
"type": {
"description": "Type name for this intake type",
"enum": [
"snowflake"
],
"type": "string"
}
},
"required": [
"dataStoreId",
"externalStage",
"type"
],
"type": "object"
},
{
"description": "Stream CSV data chunks from Azure Synapse",
"properties": {
"cloudStorageCredentialId": {
"description": "Either the populated value of the field or [redacted] due to permission settings",
"oneOf": [
{
"description": "The ID of the Azure credential holding information about a user with read access to the cloud storage.",
"type": [
"string",
"null"
]
},
{
"enum": [
"[redacted]"
],
"type": "string"
}
]
},
"credentialId": {
"description": "Either the populated value of the field or [redacted] due to permission settings",
"oneOf": [
{
"description": "The ID of the credential holding information about a user with read access to the JDBC data source.",
"type": [
"string",
"null"
]
},
{
"enum": [
"[redacted]"
],
"type": "string"
}
]
},
"dataStoreId": {
"description": "Either the populated value of the field or [redacted] due to permission settings",
"oneOf": [
{
"description": "ID of the data store to connect to",
"type": "string"
},
{
"enum": [
"[redacted]"
],
"type": "string"
}
]
},
"externalDataSource": {
"description": "External datasource name",
"type": "string"
},
"query": {
"description": "A self-supplied SELECT statement of the dataset you wish to score. Helpful for supplying a more fine-grained selection of data not achievable through specification of \"table\" and/or \"schema\" parameters exclusively.If this job is executed with a job definition, then template variables are available which will be substituted for timestamps: {{ current_run_timestamp }}, {{ last_completed_run_time }}, {{ last_scheduled_run_time }}, {{ next_scheduled_run_time }}, {{ current_run_time }}",
"type": "string"
},
"schema": {
"description": "The name of the specified database schema to read input data from.",
"type": "string"
},
"table": {
"description": "The name of the specified database table to read input data from.",
"type": "string"
},
"type": {
"description": "Type name for this intake type",
"enum": [
"synapse"
],
"type": "string"
}
},
"required": [
"dataStoreId",
"externalDataSource",
"type"
],
"type": "object"
},
{
"description": "Stream CSV data chunks from DSS dataset",
"properties": {
"datasetId": {
"description": "Either the populated value of the field or [redacted] due to permission settings",
"oneOf": [
{
"description": "The ID of the dataset",
"type": "string"
},
{
"enum": [
"[redacted]"
],
"type": "string"
}
]
},
"partition": {
"default": null,
"description": "Partition used to predict",
"enum": [
"holdout",
"validation",
"allBacktests",
null
],
"type": [
"string",
"null"
]
},
"projectId": {
"description": "The ID of the project",
"type": "string"
},
"type": {
"description": "Type name for this intake type",
"enum": [
"dss"
],
"type": "string"
}
},
"required": [
"projectId",
"type"
],
"type": "object"
},
{
"properties": {
"path": {
"description": "Path to data on host filesystem",
"type": "string"
},
"type": {
"description": "Type name for this intake type",
"enum": [
"filesystem"
],
"type": "string"
}
},
"required": [
"path",
"type"
],
"type": "object"
},
{
"description": "Stream CSV data chunks from HTTP",
"properties": {
"type": {
"description": "Type name for this intake type",
"enum": [
"http"
],
"type": "string"
},
"url": {
"description": "URL for the CSV file",
"format": "url",
"type": "string"
}
},
"required": [
"type",
"url"
],
"type": "object"
},
{
"description": "Stream CSV data chunks from JDBC",
"properties": {
"catalog": {
"description": "The name of the specified database catalog to read input data from.",
"type": "string",
"x-versionadded": "v2.22"
},
"credentialId": {
"description": "Either the populated value of the field or [redacted] due to permission settings",
"oneOf": [
{
"description": "The ID of the credential holding information about a user with read access to the JDBC data source.",
"type": [
"string",
"null"
]
},
{
"enum": [
"[redacted]"
],
"type": "string"
}
]
},
"dataStoreId": {
"description": "Either the populated value of the field or [redacted] due to permission settings",
"oneOf": [
{
"description": "ID of the data store to connect to",
"type": "string"
},
{
"enum": [
"[redacted]"
],
"type": "string"
}
]
},
"fetchSize": {
"description": "A user specified fetch size. Changing it can be used to balance throughput and memory usage. Deprecated and ignored since v2.21.",
"maximum": 1000000,
"minimum": 1,
"type": "integer"
},
"query": {
"description": "A self-supplied SELECT statement of the dataset you wish to score. Helpful for supplying a more fine-grained selection of data not achievable through specification of \"table\" and/or \"schema\" parameters exclusively.If this job is executed with a job definition, then template variables are available which will be substituted for timestamps: {{ current_run_timestamp }}, {{ last_completed_run_time }}, {{ last_scheduled_run_time }}, {{ next_scheduled_run_time }}, {{ current_run_time }}",
"type": "string"
},
"schema": {
"description": "The name of the specified database schema to read input data from.",
"type": "string"
},
"table": {
"description": "The name of the specified database table to read input data from.",
"type": "string"
},
"type": {
"description": "Type name for this intake type",
"enum": [
"jdbc"
],
"type": "string"
}
},
"required": [
"dataStoreId",
"type"
],
"type": "object"
},
{
"description": "Stream CSV data chunks from local file storage",
"properties": {
"async": {
"description": "The default behavior (async: true) will still submit the job to the queue and start processing as soon as the upload is started.Setting it to false will postpone submitting the job to the queue until all data has been uploaded.This is helpful if the user is on a bad connection and bottlednecked by the upload speed. Instead of blocking the queue this will allow others to submit to the queue until the upload has finished.",
"type": [
"boolean",
"null"
],
"x-versionadded": "v2.28"
},
"multipart": {
"description": "specify if the data will be uploaded in multiple parts instead of a single file",
"type": "boolean",
"x-versionadded": "v2.27"
},
"type": {
"description": "Type name for this intake type",
"enum": [
"local_file",
"localFile"
],
"type": "string"
}
},
"required": [
"type"
],
"type": "object"
},
{
"description": "Stream CSV data chunks from Datasphere using browser-datasphere",
"properties": {
"catalog": {
"description": "The name of the specified database catalog to read input data from.",
"type": "string"
},
"credentialId": {
"description": "Either the populated value of the field or [redacted] due to permission settings",
"oneOf": [
{
"description": "The ID of the credential holding information about a user with read access to the data source.",
"type": "string"
},
{
"enum": [
"[redacted]"
],
"type": "string"
}
]
},
"dataStoreId": {
"description": "Either the populated value of the field or [redacted] due to permission settings",
"oneOf": [
{
"description": "ID of the data store to connect to",
"type": "string"
},
{
"enum": [
"[redacted]"
],
"type": "string"
}
]
},
"schema": {
"description": "The name of the specified database schema to read input data from.",
"type": "string"
},
"table": {
"description": "The name of the specified database table to read input data from.",
"type": "string"
},
"type": {
"description": "Type name for this intake type",
"enum": [
"datasphere"
],
"type": "string"
}
},
"required": [
"credentialId",
"dataStoreId",
"schema",
"table",
"type"
],
"type": "object",
"x-versionadded": "v2.35"
}
]
},
"maxExplanations": {
"default": 0,
"description": "Number of explanations requested. Will be ordered by strength.",
"maximum": 100,
"minimum": 0,
"type": "integer"
},
"maxNgramExplanations": {
"description": "The maximum number of text ngram explanations to supply per row of the dataset. The default recommended `maxNgramExplanations` is `all` (no limit)",
"oneOf": [
{
"minimum": 0,
"type": "integer"
},
{
"enum": [
"all"
],
"type": "string"
}
],
"x-versionadded": "v2.30"
},
"modelId": {
"description": "ID of leaderboard model which is used in job for processing predictions dataset",
"type": "string",
"x-versionadded": "v2.30"
},
"modelPackageId": {
"description": "ID of model package from registry is used in job for processing predictions dataset",
"type": "string",
"x-versionadded": "v2.30"
},
"monitoringAggregation": {
"description": "Defines the aggregation policy for monitoring jobs.",
"properties": {
"retentionPolicy": {
"default": "percentage",
"description": "Monitoring jobs retention policy for aggregation.",
"enum": [
"samples",
"percentage"
],
"type": "string"
},
"retentionValue": {
"default": 0,
"description": "Amount/percentage of samples to retain.",
"type": "integer"
}
},
"type": "object"
},
"monitoringBatchPrefix": {
"description": "Name of the batch to create with this job",
"type": [
"string",
"null"
]
},
"monitoringColumns": {
"description": "Column names mapping for monitoring",
"properties": {
"actedUponColumn": {
"description": "Name of column that contains value for acted_on.",
"type": "string"
},
"actualsTimestampColumn": {
"description": "Name of column that contains actual timestamps.",
"type": "string"
},
"actualsValueColumn": {
"description": "Name of column that contains actuals value.",
"type": "string"
},
"associationIdColumn": {
"description": "Name of column that contains association Id.",
"type": "string"
},
"customMetricId": {
"description": "Id of custom metric to process values for.",
"type": "string"
},
"customMetricTimestampColumn": {
"description": "Name of column that contains custom metric values timestamps.",
"type": "string"
},
"customMetricTimestampFormat": {
"description": "Format of timestamps from customMetricTimestampColumn.",
"type": "string"
},
"customMetricValueColumn": {
"description": "Name of column that contains values for custom metric.",
"type": "string"
},
"monitoredStatusColumn": {
"description": "Column name used to mark monitored rows.",
"type": "string"
},
"predictionsColumns": {
"description": "Name of the column(s) which contain prediction values.",
"oneOf": [
{
"description": "Map containing column name(s) and class name(s) for multiclass problem.",
"items": {
"properties": {
"className": {
"description": "Class name.",
"type": "string"
},
"columnName": {
"description": "Column name that contains the prediction for a specific class.",
"type": "string"
}
},
"required": [
"className",
"columnName"
],
"type": "object"
},
"maxItems": 100,
"type": "array"
},
{
"description": "Column name that contains the prediction for regressions problem.",
"type": "string"
}
]
},
"reportDrift": {
"description": "True to report drift, False otherwise.",
"type": "boolean"
},
"reportPredictions": {
"description": "True to report prediction, False otherwise.",
"type": "boolean"
},
"uniqueRowIdentifierColumns": {
"description": "Column(s) name of unique row identifiers.",
"items": {
"type": "string"
},
"maxItems": 100,
"type": "array"
}
},
"type": "object"
},
"monitoringOutputSettings": {
"description": "Output settings for monitoring jobs",
"properties": {
"monitoredStatusColumn": {
"description": "Column name used to mark monitored rows.",
"type": "string"
},
"uniqueRowIdentifierColumns": {
"description": "Column(s) name of unique row identifiers.",
"items": {
"type": "string"
},
"maxItems": 100,
"type": "array"
}
},
"required": [
"monitoredStatusColumn",
"uniqueRowIdentifierColumns"
],
"type": "object"
},
"numConcurrent": {
"description": "Number of simultaneous requests to run against the prediction instance",
"minimum": 0,
"type": "integer"
},
"outputSettings": {
"description": "The response option configured for this job",
"oneOf": [
{
"description": "Save CSV data chunks to Azure Blob Storage",
"properties": {
"credentialId": {
"description": "Either the populated value of the field or [redacted] due to permission settings",
"oneOf": [
{
"description": "Use the specified credential to access the url",
"type": [
"string",
"null"
]
},
{
"enum": [
"[redacted]"
],
"type": "string"
}
]
},
"format": {
"default": "csv",
"description": "Type of output file format",
"enum": [
"csv",
"parquet"
],
"type": "string",
"x-versionadded": "v2.25"
},
"partitionColumns": {
"description": "For Parquet directory-scoring only. The column names of the intake data of which to partition the dataset. Columns are partitioned in the order they are given. At least one value is required if scoring to a directory (meaning the output url ends with a slash (\"/\").",
"items": {
"type": "string"
},
"maxItems": 100,
"type": "array",
"x-versionadded": "v2.26"
},
"type": {
"description": "Type name for this output type",
"enum": [
"azure"
],
"type": "string"
},
"url": {
"description": "URL for the file or directory",
"format": "url",
"type": "string"
}
},
"required": [
"type",
"url"
],
"type": "object"
},
{
"description": "Save CSV data chunks to Google Storage",
"properties": {
"credentialId": {
"description": "Either the populated value of the field or [redacted] due to permission settings",
"oneOf": [
{
"description": "Use the specified credential to access the url",
"type": [
"string",
"null"
]
},
{
"enum": [
"[redacted]"
],
"type": "string"
}
]
},
"format": {
"default": "csv",
"description": "Type of input file format",
"enum": [
"csv",
"parquet"
],
"type": "string",
"x-versionadded": "v2.25"
},
"partitionColumns": {
"description": "For Parquet directory-scoring only. The column names of the intake data of which to partition the dataset. Columns are partitioned in the order they are given. At least one value is required if scoring to a directory (meaning the output url ends with a slash (\"/\").",
"items": {
"type": "string"
},
"maxItems": 100,
"type": "array",
"x-versionadded": "v2.26"
},
"type": {
"description": "Type name for this output type",
"enum": [
"gcp"
],
"type": "string"
},
"url": {
"description": "URL for the CSV file",
"format": "url",
"type": "string"
}
},
"required": [
"type",
"url"
],
"type": "object"
},
{
"description": "Save CSV data chunks to Google BigQuery in bulk",
"properties": {
"bucket": {
"description": "The name of gcs bucket for data loading",
"type": "string"
},
"credentialId": {
"description": "Either the populated value of the field or [redacted] due to permission settings",
"oneOf": [
{
"description": "The ID of the GCP credentials",
"type": "string"
},
{
"enum": [
"[redacted]"
],
"type": "string"
}
]
},
"dataset": {
"description": "The name of the specified big query dataset to write data back",
"type": "string"
},
"table": {
"description": "The name of the specified big query table to write data back",
"type": "string"
},
"type": {
"description": "Type name for this output type",
"enum": [
"bigquery"
],
"type": "string"
}
},
"required": [
"bucket",
"credentialId",
"dataset",
"table",
"type"
],
"type": "object"
},
{
"description": "Saves CSV data chunks to Amazon Cloud Storage S3",
"properties": {
"credentialId": {
"description": "Either the populated value of the field or [redacted] due to permission settings",
"oneOf": [
{
"description": "Use the specified credential to access the url",
"type": [
"string",
"null"
]
},
{
"enum": [
"[redacted]"
],
"type": "string"
}
]
},
"endpointUrl": {
"description": "Endpoint URL for the S3 connection (omit to use the default)",
"format": "url",
"type": "string",
"x-versionadded": "v2.29"
},
"format": {
"default": "csv",
"description": "Type of output file format",
"enum": [
"csv",
"parquet"
],
"type": "string",
"x-versionadded": "v2.25"
},
"partitionColumns": {
"description": "For Parquet directory-scoring only. The column names of the intake data of which to partition the dataset. Columns are partitioned in the order they are given. At least one value is required if scoring to a directory (meaning the output url ends with a slash (\"/\").",
"items": {
"type": "string"
},
"maxItems": 100,
"type": "array",
"x-versionadded": "v2.26"
},
"serverSideEncryption": {
"description": "Configure Server-Side Encryption for S3 output",
"properties": {
"algorithm": {
"description": "The server-side encryption algorithm used when storing this object in Amazon S3 (for example, AES256, aws:kms).",
"type": "string"
},
"customerAlgorithm": {
"description": "Specifies the algorithm to use to when encrypting the object (for example, AES256).",
"type": "string"
},
"customerKey": {
"description": "Specifies the customer-provided encryption key for Amazon S3 to use in encrypting data. This value is used to store the object and then it is discarded; Amazon S3 does not store the encryption key. The key must be appropriate for use with the algorithm specified in customerAlgorithm. The key must be sent as an base64 encoded string.",
"type": "string"
},
"kmsEncryptionContext": {
"description": "Specifies the Amazon Web Services KMS Encryption Context to use for object encryption. The value of this header is a base64-encoded UTF-8 string holding JSON with the encryption context key-value pairs.",
"type": "string"
},
"kmsKeyId": {
"description": "Specifies the ID of the symmetric customer managed key to use for object encryption.",
"type": "string"
}
},
"type": "object"
},
"type": {
"description": "Type name for this output type",
"enum": [
"s3"
],
"type": "string"
},
"url": {
"description": "URL for the CSV file",
"format": "url",
"type": "string"
}
},
"required": [
"type",
"url"
],
"type": "object"
},
{
"description": "Save CSV data chunks to Snowflake in bulk",
"properties": {
"catalog": {
"description": "The name of the specified database catalog to write output data to.",
"type": "string",
"x-versionadded": "v2.28"
},
"cloudStorageCredentialId": {
"description": "Either the populated value of the field or [redacted] due to permission settings",
"oneOf": [
{
"description": "The ID of the credential holding information about a user with write access to the cloud storage.",
"type": [
"string",
"null"
]
},
{
"enum": [
"[redacted]"
],
"type": "string"
}
]
},
"cloudStorageType": {
"default": "s3",
"description": "Type name for cloud storage",
"enum": [
"azure",
"gcp",
"s3"
],
"type": "string"
},
"createTableIfNotExists": {
"default": false,
"description": "Attempt to create the table first if no existing one is detected, before writing data with the strategy defined in the `statementType` parameter.",
"type": "boolean",
"x-versionadded": "v2.25"
},
"credentialId": {
"description": "Either the populated value of the field or [redacted] due to permission settings",
"oneOf": [
{
"description": "The ID of the credential holding information about a user with write access to the Snowflake data source.",
"type": [
"string",
"null"
]
},
{
"enum": [
"[redacted]"
],
"type": "string"
}
]
},
"dataStoreId": {
"description": "Either the populated value of the field or [redacted] due to permission settings",
"oneOf": [
{
"description": "ID of the data store to connect to",
"type": "string"
},
{
"enum": [
"[redacted]"
],
"type": "string"
}
]
},
"externalStage": {
"description": "External storage",
"type": "string"
},
"schema": {
"description": "The name of the specified database schema to write results to.",
"type": "string"
},
"statementType": {
"description": "The statement type to use when writing the results.",
"enum": [
"insert",
"create_table",
"createTable"
],
"type": "string"
},
"table": {
"description": "The name of the specified database table to write results to.",
"type": "string"
},
"type": {
"description": "Type name for this output type",
"enum": [
"snowflake"
],
"type": "string"
}
},
"required": [
"dataStoreId",
"externalStage",
"statementType",
"table",
"type"
],
"type": "object"
},
{
"description": "Save CSV data chunks to Azure Synapse in bulk",
"properties": {
"cloudStorageCredentialId": {
"description": "Either the populated value of the field or [redacted] due to permission settings",
"oneOf": [
{
"description": "The ID of the credential holding information about a user with write access to the cloud storage.",
"type": [
"string",
"null"
]
},
{
"enum": [
"[redacted]"
],
"type": "string"
}
]
},
"createTableIfNotExists": {
"default": false,
"description": "Attempt to create the table first if no existing one is detected, before writing data with the strategy defined in the `statementType` parameter.",
"type": "boolean",
"x-versionadded": "v2.25"
},
"credentialId": {
"description": "Either the populated value of the field or [redacted] due to permission settings",
"oneOf": [
{
"description": "The ID of the credential holding information about a user with write access to the JDBC data source.",
"type": [
"string",
"null"
]
},
{
"enum": [
"[redacted]"
],
"type": "string"
}
]
},
"dataStoreId": {
"description": "Either the populated value of the field or [redacted] due to permission settings",
"oneOf": [
{
"description": "ID of the data store to connect to",
"type": "string"
},
{
"enum": [
"[redacted]"
],
"type": "string"
}
]
},
"externalDataSource": {
"description": "External data source name",
"type": "string"
},
"schema": {
"description": "The name of the specified database schema to write results to.",
"type": "string"
},
"statementType": {
"description": "The statement type to use when writing the results.",
"enum": [
"insert",
"create_table",
"createTable"
],
"type": "string"
},
"table": {
"description": "The name of the specified database table to write results to.",
"type": "string"
},
"type": {
"description": "Type name for this output type",
"enum": [
"synapse"
],
"type": "string"
}
},
"required": [
"dataStoreId",
"externalDataSource",
"statementType",
"table",
"type"
],
"type": "object"
},
{
"properties": {
"path": {
"description": "Path to results on host filesystem",
"type": "string"
},
"type": {
"description": "Type name for this output type",
"enum": [
"filesystem"
],
"type": "string"
}
},
"required": [
"path",
"type"
],
"type": "object"
},
{
"description": "Save CSV data chunks to HTTP data endpoint",
"properties": {
"headers": {
"description": "Extra headers to send with the request",
"type": "object"
},
"method": {
"description": "Method to use when saving the CSV file",
"enum": [
"POST",
"PUT"
],
"type": "string"
},
"type": {
"description": "Type name for this output type",
"enum": [
"http"
],
"type": "string"
},
"url": {
"description": "URL for the CSV file",
"format": "url",
"type": "string"
}
},
"required": [
"method",
"type",
"url"
],
"type": "object"
},
{
"description": "Save CSV data chunks via JDBC",
"properties": {
"catalog": {
"description": "The name of the specified database catalog to write output data to.",
"type": "string",
"x-versionadded": "v2.22"
},
"commitInterval": {
"default": 600,
"description": "Defines a time interval in seconds between each commit is done to the JDBC source. If set to 0, the batch prediction operation will write the entire job before committing.",
"maximum": 86400,
"minimum": 0,
"type": "integer",
"x-versionadded": "v2.21"
},
"createTableIfNotExists": {
"default": false,
"description": "Attempt to create the table first if no existing one is detected, before writing data with the strategy defined in the `statementType` parameter.",
"type": "boolean",
"x-versionadded": "v2.24"
},
"credentialId": {
"description": "Either the populated value of the field or [redacted] due to permission settings",
"oneOf": [
{
"description": "The ID of the credential holding information about a user with write access to the JDBC data source.",
"type": [
"string",
"null"
]
},
{
"enum": [
"[redacted]"
],
"type": "string"
}
]
},
"dataStoreId": {
"description": "Either the populated value of the field or [redacted] due to permission settings",
"oneOf": [
{
"description": "ID of the data store to connect to",
"type": "string"
},
{
"enum": [
"[redacted]"
],
"type": "string"
}
]
},
"schema": {
"description": "The name of the specified database schema to write the results to.",
"type": "string"
},
"statementType": {
"description": "The statement type to use when writing the results. Deprecation Warning: Use of `create_table` is now discouraged. Use one of the other possibilities along with the parameter `createTableIfNotExists` set to `true`.",
"enum": [
"createTable",
"create_table",
"insert",
"insertUpdate",
"insert_update",
"update"
],
"type": "string"
},
"table": {
"description": "The name of the specified database table to write the results to.If this job is executed with a job definition, then template variables are available which will be substituted for timestamps: {{ current_run_timestamp }}, {{ last_completed_run_time }}, {{ last_scheduled_run_time }}, {{ next_scheduled_run_time }}, {{ current_run_time }}",
"type": "string"
},
"type": {
"description": "Type name for this intake type",
"enum": [
"jdbc"
],
"type": "string"
},
"updateColumns": {
"description": "The column names to be updated if statementType is set to either update or upsert.",
"items": {
"type": "string"
},
"maxItems": 100,
"type": "array"
},
"whereColumns": {
"description": "The column names to be used in the where clause if statementType is set to update or upsert.",
"items": {
"type": "string"
},
"maxItems": 100,
"type": "array"
}
},
"required": [
"dataStoreId",
"statementType",
"table",
"type"
],
"type": "object"
},
{
"description": "Save CSV data chunks to local file storage",
"properties": {
"type": {
"description": "Type name for this output type",
"enum": [
"local_file",
"localFile"
],
"type": "string"
}
},
"required": [
"type"
],
"type": "object"
},
{
"description": "Saves CSV data chunks to Datasphere using browser-datasphere",
"properties": {
"catalog": {
"description": "The name of the specified database catalog to read input data from.",
"type": "string"
},
"credentialId": {
"description": "Either the populated value of the field or [redacted] due to permission settings",
"oneOf": [
{
"description": "The ID of the credential holding information about a user with write access to the data destination.",
"type": "string"
},
{
"enum": [
"[redacted]"
],
"type": "string"
}
]
},
"dataStoreId": {
"description": "Either the populated value of the field or [redacted] due to permission settings",
"oneOf": [
{
"description": "The ID of the data store to connect to",
"type": "string"
},
{
"enum": [
"[redacted]"
],
"type": "string"
}
]
},
"schema": {
"description": "The name of the specified database schema to read input data from.",
"type": "string"
},
"table": {
"description": "The name of the specified database table to read input data from.",
"type": "string"
},
"type": {
"description": "The type name for this output type",
"enum": [
"datasphere"
],
"type": "string"
}
},
"required": [
"credentialId",
"dataStoreId",
"schema",
"table",
"type"
],
"type": "object",
"x-versionadded": "v2.35"
}
]
},
"passthroughColumns": {
"description": "Pass through columns from the original dataset",
"items": {
"description": "A column name from the original dataset to pass through to the resulting predictions",
"maxLength": 50,
"minLength": 1,
"type": "string"
},
"maxItems": 100,
"type": "array"
},
"passthroughColumnsSet": {
"description": "Pass through all columns from the original dataset",
"enum": [
"all"
],
"type": "string"
},
"pinnedModelId": {
"description": "Specify a model ID used for scoring",
"type": "string"
},
"predictionInstance": {
"description": "Override the default prediction instance from the deployment when scoring this job.",
"properties": {
"apiKey": {
"description": "By default, prediction requests will use the API key of the user that created the job. This allows you to make requests on behalf of other users.",
"type": "string"
},
"datarobotKey": {
"description": "If running a job against a prediction instance in the Managed AI Cloud, you must provide the organization level DataRobot-Key.",
"type": "string"
},
"hostName": {
"description": "Override the default host name of the deployment with this.",
"type": "string"
},
"sslEnabled": {
"default": true,
"description": "Use SSL (HTTPS) when communicating with the overriden prediction server.",
"type": "boolean"
}
},
"required": [
"hostName",
"sslEnabled"
],
"type": "object"
},
"predictionWarningEnabled": {
"description": "Enable prediction warnings.",
"type": [
"boolean",
"null"
]
},
"redactedFields": {
"description": "A list of qualified field names from intake- and/or outputSettings that was redacted due to permissions and sharing settings. For example: intakeSettings.dataStoreId",
"items": {
"description": "Field names that are potentially redacted",
"type": "string"
},
"type": "array",
"x-versionadded": "v2.30"
},
"skipDriftTracking": {
"default": false,
"description": "Skip drift tracking for this job.",
"type": "boolean"
},
"thresholdHigh": {
"description": "Compute explanations for predictions above this threshold",
"type": "number"
},
"thresholdLow": {
"description": "Compute explanations for predictions below this threshold",
"type": "number"
},
"timeseriesSettings": {
"description": "Time Series settings included of this job is a Time Series job.",
"oneOf": [
{
"properties": {
"forecastPoint": {
"description": "Used for forecast predictions in order to override the inferred forecast point from the dataset.",
"format": "date-time",
"type": "string"
},
"relaxKnownInAdvanceFeaturesCheck": {
"default": false,
"description": "If activated, missing values in the known in advance features are allowed in the forecast window at prediction time. If omitted or false, missing values are not allowed.",
"type": "boolean"
},
"type": {
"description": "Forecast mode makes predictions using forecastPoint or rows in the dataset without target.",
"enum": [
"forecast"
],
"type": "string"
}
},
"required": [
"type"
],
"type": "object"
},
{
"properties": {
"forecastPointPolicy": {
"description": "Forecast point policy",
"properties": {
"configuration": {
"description": "Customize if forecast point based on job run time needs to be shifted.",
"properties": {
"offset": {
"description": "Offset to apply to scheduled run time of the job in a ISO-8601 format toobtain a relative forecast point. Example of the positive offset 'P2DT5H3M', example of the negative offset '-P2DT5H4M'",
"format": "offset",
"type": "string"
}
},
"required": [
"offset"
],
"type": "object"
},
"type": {
"description": "Type of the forecast point policy. Forecast point will be based on the scheduled run time of the job or the current moment in UTC if job was launched manually. Run time can be adjusted backwards or forwards.",
"enum": [
"jobRunTimeBased"
],
"type": "string"
}
},
"required": [
"type"
],
"type": "object"
},
"relaxKnownInAdvanceFeaturesCheck": {
"default": false,
"description": "If activated, missing values in the known in advance features are allowed in the forecast window at prediction time. If omitted or false, missing values are not allowed.",
"type": "boolean"
},
"type": {
"description": "Forecast mode makes predictions using forecastPoint or rows in the dataset without target.",
"enum": [
"forecast"
],
"type": "string"
}
},
"required": [
"forecastPointPolicy",
"type"
],
"type": "object"
},
{
"properties": {
"predictionsEndDate": {
"description": "Used for historical predictions in order to override date to which predictions should be calculated. By default value will be inferred automatically from the dataset.",
"format": "date-time",
"type": "string"
},
"predictionsStartDate": {
"description": "Used for historical predictions in order to override date from which predictions should be calculated. By default value will be inferred automatically from the dataset.",
"format": "date-time",
"type": "string"
},
"relaxKnownInAdvanceFeaturesCheck": {
"default": false,
"description": "If activated, missing values in the known in advance features are allowed in the forecast window at prediction time. If omitted or false, missing values are not allowed.",
"type": "boolean"
},
"type": {
"description": "Historical mode enables bulk predictions which calculates predictions for all possible forecast points and forecast distances in the dataset within the predictionsStartDate/predictionsEndDate range.",
"enum": [
"historical"
],
"type": "string"
}
},
"required": [
"type"
],
"type": "object"
}
],
"x-versionadded": "v2.20"
}
},
"required": [
"abortOnError",
"csvSettings",
"disableRowLevelErrorHandling",
"includePredictionStatus",
"includeProbabilities",
"includeProbabilitiesClasses",
"intakeSettings",
"maxExplanations",
"numConcurrent",
"redactedFields",
"skipDriftTracking"
],
"type": "object"
},
"created": {
"description": "When was this job created",
"format": "date-time",
"type": "string"
},
"createdBy": {
"description": "Who created this job",
"properties": {
"fullName": {
"description": "The full name of the user who created this job (if defined by the user)",
"type": [
"string",
"null"
]
},
"userId": {
"description": "The User ID of the user who created this job",
"type": "string"
},
"username": {
"description": "The username (e-mail address) of the user who created this job",
"type": "string"
}
},
"required": [
"fullName",
"userId",
"username"
],
"type": "object"
},
"enabled": {
"default": false,
"description": "If this job definition is enabled as a scheduled job.",
"type": "boolean"
},
"id": {
"description": "The ID of the Batch job definition",
"type": "string"
},
"lastFailedRunTime": {
"description": "Last time this job had a failed run",
"format": "date-time",
"type": [
"string",
"null"
]
},
"lastScheduledRunTime": {
"description": "Last time this job was scheduled to run (though not guaranteed it actually ran at that time)",
"format": "date-time",
"type": [
"string",
"null"
]
},
"lastStartedJobStatus": {
"description": "The status of the latest job launched to the queue (if any).",
"enum": [
"INITIALIZING",
"RUNNING",
"COMPLETED",
"ABORTED",
"FAILED"
],
"type": [
"string",
"null"
]
},
"lastStartedJobTime": {
"description": "The last time (if any) a job was launched.",
"format": "date-time",
"type": [
"string",
"null"
]
},
"lastSuccessfulRunTime": {
"description": "Last time this job had a successful run",
"format": "date-time",
"type": [
"string",
"null"
]
},
"name": {
"description": "A human-readable name for the definition, must be unique across organisations",
"type": "string"
},
"nextScheduledRunTime": {
"description": "Next time this job is scheduled to run",
"format": "date-time",
"type": [
"string",
"null"
]
},
"schedule": {
"description": "The scheduling information defining how often and when to execute this job to the Job Scheduling service. Optional if enabled = False.",
"properties": {
"dayOfMonth": {
"description": "The date(s) of the month that the job will run. Allowed values are either ``[1 ... 31]`` or ``[\"*\"]`` for all days of the month. This field is additive with ``dayOfWeek``, meaning the job will run both on the date(s) defined in this field and the day specified by ``dayOfWeek`` (for example, dates 1st, 2nd, 3rd, plus every Tuesday). If ``dayOfMonth`` is set to ``[\"*\"]`` and ``dayOfWeek`` is defined, the scheduler will trigger on every day of the month that matches ``dayOfWeek`` (for example, Tuesday the 2nd, 9th, 16th, 23rd, 30th). Invalid dates such as February 31st are ignored.",
"items": {
"enum": [
"*",
1,
2,
3,
4,
5,
6,
7,
8,
9,
10,
11,
12,
13,
14,
15,
16,
17,
18,
19,
20,
21,
22,
23,
24,
25,
26,
27,
28,
29,
30,
31
],
"type": [
"number",
"string"
]
},
"maxItems": 31,
"type": "array"
},
"dayOfWeek": {
"description": "The day(s) of the week that the job will run. Allowed values are ``[0 .. 6]``, where (Sunday=0), or ``[\"*\"]``, for all days of the week. Strings, either 3-letter abbreviations or the full name of the day, can be used interchangeably (e.g., \"sunday\", \"Sunday\", \"sun\", or \"Sun\", all map to ``[0]``. This field is additive with ``dayOfMonth``, meaning the job will run both on the date specified by ``dayOfMonth`` and the day defined in this field.",
"items": {
"enum": [
"*",
0,
1,
2,
3,
4,
5,
6,
"sunday",
"SUNDAY",
"Sunday",
"monday",
"MONDAY",
"Monday",
"tuesday",
"TUESDAY",
"Tuesday",
"wednesday",
"WEDNESDAY",
"Wednesday",
"thursday",
"THURSDAY",
"Thursday",
"friday",
"FRIDAY",
"Friday",
"saturday",
"SATURDAY",
"Saturday",
"sun",
"SUN",
"Sun",
"mon",
"MON",
"Mon",
"tue",
"TUE",
"Tue",
"wed",
"WED",
"Wed",
"thu",
"THU",
"Thu",
"fri",
"FRI",
"Fri",
"sat",
"SAT",
"Sat"
],
"type": [
"number",
"string"
]
},
"maxItems": 7,
"type": "array"
},
"hour": {
"description": "The hour(s) of the day that the job will run. Allowed values are either ``[\"*\"]`` meaning every hour of the day or ``[0 ... 23]``.",
"items": {
"enum": [
"*",
0,
1,
2,
3,
4,
5,
6,
7,
8,
9,
10,
11,
12,
13,
14,
15,
16,
17,
18,
19,
20,
21,
22,
23
],
"type": [
"number",
"string"
]
},
"maxItems": 24,
"type": "array"
},
"minute": {
"description": "The minute(s) of the day that the job will run. Allowed values are either ``[\"*\"]`` meaning every minute of the day or``[0 ... 59]``.",
"items": {
"enum": [
"*",
0,
1,
2,
3,
4,
5,
6,
7,
8,
9,
10,
11,
12,
13,
14,
15,
16,
17,
18,
19,
20,
21,
22,
23,
24,
25,
26,
27,
28,
29,
30,
31,
32,
33,
34,
35,
36,
37,
38,
39,
40,
41,
42,
43,
44,
45,
46,
47,
48,
49,
50,
51,
52,
53,
54,
55,
56,
57,
58,
59
],
"type": [
"number",
"string"
]
},
"maxItems": 60,
"type": "array"
},
"month": {
"description": "The month(s) of the year that the job will run. Allowed values are either ``[1 ... 12]`` or ``[\"*\"]`` for all months of the year. Strings, either 3-letter abbreviations or the full name of the month, can be used interchangeably (e.g., \"jan\" or \"october\"). Months that are not compatible with ``dayOfMonth`` are ignored, for example ``{\"dayOfMonth\": [31], \"month\":[\"feb\"]}``.",
"items": {
"enum": [
"*",
1,
2,
3,
4,
5,
6,
7,
8,
9,
10,
11,
12,
"january",
"JANUARY",
"January",
"february",
"FEBRUARY",
"February",
"march",
"MARCH",
"March",
"april",
"APRIL",
"April",
"may",
"MAY",
"May",
"june",
"JUNE",
"June",
"july",
"JULY",
"July",
"august",
"AUGUST",
"August",
"september",
"SEPTEMBER",
"September",
"october",
"OCTOBER",
"October",
"november",
"NOVEMBER",
"November",
"december",
"DECEMBER",
"December",
"jan",
"JAN",
"Jan",
"feb",
"FEB",
"Feb",
"mar",
"MAR",
"Mar",
"apr",
"APR",
"Apr",
"jun",
"JUN",
"Jun",
"jul",
"JUL",
"Jul",
"aug",
"AUG",
"Aug",
"sep",
"SEP",
"Sep",
"oct",
"OCT",
"Oct",
"nov",
"NOV",
"Nov",
"dec",
"DEC",
"Dec"
],
"type": [
"number",
"string"
]
},
"maxItems": 12,
"type": "array"
}
},
"required": [
"dayOfMonth",
"dayOfWeek",
"hour",
"minute",
"month"
],
"type": "object"
},
"updated": {
"description": "When was this job last updated",
"format": "date-time",
"type": "string"
},
"updatedBy": {
"description": "Who created this job",
"properties": {
"fullName": {
"description": "The full name of the user who created this job (if defined by the user)",
"type": [
"string",
"null"
]
},
"userId": {
"description": "The User ID of the user who created this job",
"type": "string"
},
"username": {
"description": "The username (e-mail address) of the user who created this job",
"type": "string"
}
},
"required": [
"fullName",
"userId",
"username"
],
"type": "object"
}
},
"required": [
"batchPredictionJob",
"created",
"createdBy",
"enabled",
"id",
"lastStartedJobStatus",
"lastStartedJobTime",
"name",
"updated",
"updatedBy"
],
"type": "object"
}
Responses
Status |
Meaning |
Description |
Schema |
200 |
OK |
Job details for the updated Batch Prediction job definition |
BatchPredictionJobDefinitionsResponse |
403 |
Forbidden |
You are not authorized to alter the contents of this job definition due to your permissions role |
None |
404 |
Not Found |
Job was deleted, never existed or you do not have access to it |
None |
409 |
Conflict |
You chose a name of your job definition that was already existing within your organization |
None |
422 |
Unprocessable Entity |
Could not update the job definition. Possible reasons: {} |
None |
To perform this operation, you must be authenticated by means of one of the following methods:
BearerAuth
GET /api/v2/batchPredictionJobDefinitions/{jobDefinitionId}/portable/
Retrieve a Batch Prediction job definition for Portable Batch Predictions
Parameters
Name |
In |
Type |
Required |
Description |
jobDefinitionId |
path |
string |
true |
ID of the Batch Prediction job definition |
Responses
Status |
Meaning |
Description |
Schema |
200 |
OK |
Snippet for Portable Batch Predictions |
None |
404 |
Not Found |
Job was deleted, never existed or you do not have access to it |
None |
To perform this operation, you must be authenticated by means of one of the following methods:
BearerAuth
GET /api/v2/batchPredictions/
Get a collection of batch prediction jobs by statuses
Parameters
Name |
In |
Type |
Required |
Description |
offset |
query |
integer |
true |
This many results will be skipped |
limit |
query |
integer |
true |
At most this many results are returned |
status |
query |
any |
false |
Includes only jobs that have the status value that matches this flag. Repeat the parameter for filtering on multiple statuses. |
source |
query |
any |
false |
Includes only jobs that have the source value that matches this flag. Repeat the parameter for filtering on multiple statuses.Prefix values with a dash (- ) to exclude those sources. |
deploymentId |
query |
string |
false |
Includes only jobs for this particular deployment |
modelId |
query |
string |
false |
ID of leaderboard model which is used in job for processing predictions dataset |
jobId |
query |
string |
false |
Includes only job by specific id |
orderBy |
query |
string |
false |
Sort order which will be applied to batch prediction list. Prefix the attribute name with a dash to sort in descending order, e.g. "-created". |
allJobs |
query |
boolean |
false |
[DEPRECATED - replaced with RBAC permission model] - No effect |
cutoffHours |
query |
integer |
false |
Only list jobs created at most this amount of hours ago. |
startDateTime |
query |
string(date-time) |
false |
ISO-formatted datetime of the earliest time the job was added (inclusive). For example "2008-08-24T12:00:00Z". Will ignore cutoffHours if set. |
endDateTime |
query |
string(date-time) |
false |
ISO-formatted datetime of the latest time the job was added (inclusive). For example "2008-08-24T12:00:00Z". |
batchPredictionJobDefinitionId |
query |
string |
false |
Includes only jobs for this particular definition |
hostname |
query |
any |
false |
Includes only jobs for this particular prediction instance hostname |
intakeType |
query |
any |
false |
Includes only jobs for these particular intakes type |
outputType |
query |
any |
false |
Includes only jobs for these particular outputs type |
Enumerated Values
Parameter |
Value |
orderBy |
[created , -created , status , -status ] |
Example responses
200 Response
{
"properties": {
"count": {
"description": "Number of items returned on this page.",
"type": "integer"
},
"data": {
"description": "An array of jobs",
"items": {
"properties": {
"batchPredictionJobDefinition": {
"description": "The Batch Prediction Job Definition linking to this job, if any.",
"properties": {
"createdBy": {
"description": "The ID of creator of this job definition",
"type": "string"
},
"id": {
"description": "The ID of the Batch Prediction job definition",
"type": "string"
},
"name": {
"description": "A human-readable name for the definition, must be unique across organisations",
"type": "string"
}
},
"required": [
"createdBy",
"id",
"name"
],
"type": "object"
},
"created": {
"description": "When was this job created",
"format": "date-time",
"type": "string",
"x-versionadded": "v2.21"
},
"createdBy": {
"description": "Who created this job",
"properties": {
"fullName": {
"description": "The full name of the user who created this job (if defined by the user)",
"type": [
"string",
"null"
]
},
"userId": {
"description": "The User ID of the user who created this job",
"type": "string"
},
"username": {
"description": "The username (e-mail address) of the user who created this job",
"type": "string"
}
},
"required": [
"fullName",
"userId",
"username"
],
"type": "object"
},
"elapsedTimeSec": {
"description": "Number of seconds the job has been processing for",
"minimum": 0,
"type": "integer"
},
"failedRows": {
"description": "Number of rows that have failed scoring",
"minimum": 0,
"type": "integer"
},
"hidden": {
"description": "When was this job was hidden last, blank if visible",
"format": "date-time",
"type": "string",
"x-versionadded": "v2.24"
},
"id": {
"description": "The ID of the Batch Prediction job",
"type": "string",
"x-versionadded": "v2.21"
},
"intakeDatasetDisplayName": {
"description": "If applicable (e.g. for AI catalog), will contain the dataset name used for the intake dataset.",
"type": [
"string",
"null"
],
"x-versionadded": "v2.23"
},
"jobIntakeSize": {
"description": "Number of bytes in the intake dataset for this job",
"minimum": 0,
"type": [
"integer",
"null"
]
},
"jobOutputSize": {
"description": "Number of bytes in the output dataset for this job",
"minimum": 0,
"type": [
"integer",
"null"
]
},
"jobSpec": {
"description": "The job configuration used to create this job",
"properties": {
"abortOnError": {
"default": true,
"description": "Should this job abort if too many errors are encountered",
"type": "boolean"
},
"batchJobType": {
"default": "prediction",
"description": "Batch job type.",
"enum": [
"monitoring",
"prediction"
],
"type": "string",
"x-versionadded": "v2.35"
},
"chunkSize": {
"default": "auto",
"description": "Which strategy should be used to determine the chunk size. Can be either a named strategy or a fixed size in bytes.",
"oneOf": [
{
"enum": [
"auto",
"fixed",
"dynamic"
],
"type": "string"
},
{
"maximum": 41943040,
"minimum": 20,
"type": "integer"
}
]
},
"columnNamesRemapping": {
"description": "Remap (rename or remove columns from) the output from this job",
"oneOf": [
{
"description": "Provide a dictionary with key/value pairs to remap (deprecated)",
"type": "object"
},
{
"description": "Provide a list of items to remap",
"items": {
"properties": {
"inputName": {
"description": "Rename column with this name",
"type": "string"
},
"outputName": {
"description": "Rename column to this name (leave as null to remove from the output)",
"type": [
"string",
"null"
]
}
},
"required": [
"inputName",
"outputName"
],
"type": "object"
},
"maxItems": 1000,
"type": "array"
}
]
},
"csvSettings": {
"description": "The CSV settings used for this job",
"properties": {
"delimiter": {
"default": ",",
"description": "CSV fields are delimited by this character. Use the string \"tab\" to denote TSV (TAB separated values).",
"oneOf": [
{
"enum": [
"tab"
],
"type": "string"
},
{
"maxLength": 1,
"minLength": 1,
"type": "string"
}
]
},
"encoding": {
"default": "utf-8",
"description": "The encoding to be used for intake and output. For example (but not limited to): \"shift_jis\", \"latin_1\" or \"mskanji\".",
"type": "string"
},
"quotechar": {
"default": "\"",
"description": "Fields containing the delimiter or newlines must be quoted using this character.",
"maxLength": 1,
"minLength": 1,
"type": "string"
}
},
"required": [
"delimiter",
"encoding",
"quotechar"
],
"type": "object"
},
"deploymentId": {
"description": "ID of deployment which is used in job for processing predictions dataset",
"type": "string"
},
"disableRowLevelErrorHandling": {
"default": false,
"description": "Skip row by row error handling",
"type": "boolean"
},
"explanationAlgorithm": {
"description": "Which algorithm will be used to calculate prediction explanations",
"enum": [
"shap",
"xemp"
],
"type": "string"
},
"explanationClassNames": {
"description": "List of class names that will be explained for each row for multiclass. Mutually exclusive with explanationNumTopClasses. If neither specified - we assume explanationNumTopClasses=1",
"items": {
"description": "Class name to explain",
"type": "string"
},
"maxItems": 10,
"minItems": 1,
"type": "array",
"x-versionadded": "v2.29"
},
"explanationNumTopClasses": {
"description": "Number of top predicted classes for each row that will be explained for multiclass. Mutually exclusive with explanationClassNames. If neither specified - we assume explanationNumTopClasses=1",
"maximum": 10,
"minimum": 1,
"type": "integer",
"x-versionadded": "v2.29"
},
"includePredictionStatus": {
"default": false,
"description": "Include prediction status column in the output",
"type": "boolean"
},
"includeProbabilities": {
"default": true,
"description": "Include probabilities for all classes",
"type": "boolean"
},
"includeProbabilitiesClasses": {
"default": [],
"description": "Include only probabilities for these specific class names.",
"items": {
"description": "Include probability for this class name",
"type": "string"
},
"maxItems": 100,
"type": "array"
},
"intakeSettings": {
"default": {
"type": "localFile"
},
"description": "The response option configured for this job",
"oneOf": [
{
"description": "Stream CSV data chunks from Azure",
"properties": {
"credentialId": {
"description": "Either the populated value of the field or [redacted] due to permission settings",
"oneOf": [
{
"description": "Use the specified credential to access the url",
"type": [
"string",
"null"
]
},
{
"enum": [
"[redacted]"
],
"type": "string"
}
]
},
"format": {
"default": "csv",
"description": "Type of input file format",
"enum": [
"csv",
"parquet"
],
"type": "string",
"x-versionadded": "v2.25"
},
"type": {
"description": "Type name for this intake type",
"enum": [
"azure"
],
"type": "string"
},
"url": {
"description": "URL for the CSV file",
"format": "url",
"type": "string"
}
},
"required": [
"type",
"url"
],
"type": "object"
},
{
"description": "Stream CSV data chunks from data stage storage",
"properties": {
"dataStageId": {
"description": "The ID of the data stage",
"type": "string"
},
"type": {
"description": "Type name for this intake type",
"enum": [
"dataStage"
],
"type": "string"
}
},
"required": [
"dataStageId",
"type"
],
"type": "object"
},
{
"description": "Stream CSV data chunks from AI catalog dataset",
"properties": {
"datasetId": {
"description": "Either the populated value of the field or [redacted] due to permission settings",
"oneOf": [
{
"description": "The ID of the AI catalog dataset",
"type": "string"
},
{
"enum": [
"[redacted]"
],
"type": "string"
}
]
},
"datasetVersionId": {
"description": "The ID of the AI catalog dataset version",
"type": "string"
},
"type": {
"description": "Type name for this intake type",
"enum": [
"dataset"
],
"type": "string"
}
},
"required": [
"datasetId",
"type"
],
"type": "object"
},
{
"description": "Stream CSV data chunks from Google Storage",
"properties": {
"credentialId": {
"description": "Either the populated value of the field or [redacted] due to permission settings",
"oneOf": [
{
"description": "Use the specified credential to access the url",
"type": [
"string",
"null"
]
},
{
"enum": [
"[redacted]"
],
"type": "string"
}
]
},
"format": {
"default": "csv",
"description": "Type of input file format",
"enum": [
"csv",
"parquet"
],
"type": "string",
"x-versionadded": "v2.25"
},
"type": {
"description": "Type name for this intake type",
"enum": [
"gcp"
],
"type": "string"
},
"url": {
"description": "URL for the CSV file",
"format": "url",
"type": "string"
}
},
"required": [
"type",
"url"
],
"type": "object"
},
{
"description": "Stream CSV data chunks from Big Query using GCS",
"properties": {
"bucket": {
"description": "The name of gcs bucket for data export",
"type": "string"
},
"credentialId": {
"description": "Either the populated value of the field or [redacted] due to permission settings",
"oneOf": [
{
"description": "The ID of the GCP credentials",
"type": "string"
},
{
"enum": [
"[redacted]"
],
"type": "string"
}
]
},
"dataset": {
"description": "The name of the specified big query dataset to read input data from",
"type": "string"
},
"table": {
"description": "The name of the specified big query table to read input data from",
"type": "string"
},
"type": {
"description": "Type name for this intake type",
"enum": [
"bigquery"
],
"type": "string"
}
},
"required": [
"bucket",
"credentialId",
"dataset",
"table",
"type"
],
"type": "object"
},
{
"description": "Stream CSV data chunks from Amazon Cloud Storage S3",
"properties": {
"credentialId": {
"description": "Either the populated value of the field or [redacted] due to permission settings",
"oneOf": [
{
"description": "Use the specified credential to access the url",
"type": [
"string",
"null"
]
},
{
"enum": [
"[redacted]"
],
"type": "string"
}
]
},
"endpointUrl": {
"description": "Endpoint URL for the S3 connection (omit to use the default)",
"format": "url",
"type": "string",
"x-versionadded": "v2.29"
},
"format": {
"default": "csv",
"description": "Type of input file format",
"enum": [
"csv",
"parquet"
],
"type": "string",
"x-versionadded": "v2.25"
},
"type": {
"description": "Type name for this intake type",
"enum": [
"s3"
],
"type": "string"
},
"url": {
"description": "URL for the CSV file",
"format": "url",
"type": "string"
}
},
"required": [
"type",
"url"
],
"type": "object"
},
{
"description": "Stream CSV data chunks from Snowflake",
"properties": {
"catalog": {
"description": "The name of the specified database catalog to read input data from.",
"type": "string",
"x-versionadded": "v2.28"
},
"cloudStorageCredentialId": {
"description": "Either the populated value of the field or [redacted] due to permission settings",
"oneOf": [
{
"description": "The ID of the credential holding information about a user with read access to the cloud storage.",
"type": [
"string",
"null"
]
},
{
"enum": [
"[redacted]"
],
"type": "string"
}
]
},
"cloudStorageType": {
"default": "s3",
"description": "Type name for cloud storage",
"enum": [
"azure",
"gcp",
"s3"
],
"type": "string"
},
"credentialId": {
"description": "Either the populated value of the field or [redacted] due to permission settings",
"oneOf": [
{
"description": "The ID of the credential holding information about a user with read access to the Snowflake data source.",
"type": [
"string",
"null"
]
},
{
"enum": [
"[redacted]"
],
"type": "string"
}
]
},
"dataStoreId": {
"description": "Either the populated value of the field or [redacted] due to permission settings",
"oneOf": [
{
"description": "ID of the data store to connect to",
"type": "string"
},
{
"enum": [
"[redacted]"
],
"type": "string"
}
]
},
"externalStage": {
"description": "External storage",
"type": "string"
},
"query": {
"description": "A self-supplied SELECT statement of the dataset you wish to score. Helpful for supplying a more fine-grained selection of data not achievable through specification of \"table\" and/or \"schema\" parameters exclusively.If this job is executed with a job definition, then template variables are available which will be substituted for timestamps: {{ current_run_timestamp }}, {{ last_completed_run_time }}, {{ last_scheduled_run_time }}, {{ next_scheduled_run_time }}, {{ current_run_time }}",
"type": "string"
},
"schema": {
"description": "The name of the specified database schema to read input data from.",
"type": "string"
},
"table": {
"description": "The name of the specified database table to read input data from.",
"type": "string"
},
"type": {
"description": "Type name for this intake type",
"enum": [
"snowflake"
],
"type": "string"
}
},
"required": [
"dataStoreId",
"externalStage",
"type"
],
"type": "object"
},
{
"description": "Stream CSV data chunks from Azure Synapse",
"properties": {
"cloudStorageCredentialId": {
"description": "Either the populated value of the field or [redacted] due to permission settings",
"oneOf": [
{
"description": "The ID of the Azure credential holding information about a user with read access to the cloud storage.",
"type": [
"string",
"null"
]
},
{
"enum": [
"[redacted]"
],
"type": "string"
}
]
},
"credentialId": {
"description": "Either the populated value of the field or [redacted] due to permission settings",
"oneOf": [
{
"description": "The ID of the credential holding information about a user with read access to the JDBC data source.",
"type": [
"string",
"null"
]
},
{
"enum": [
"[redacted]"
],
"type": "string"
}
]
},
"dataStoreId": {
"description": "Either the populated value of the field or [redacted] due to permission settings",
"oneOf": [
{
"description": "ID of the data store to connect to",
"type": "string"
},
{
"enum": [
"[redacted]"
],
"type": "string"
}
]
},
"externalDataSource": {
"description": "External datasource name",
"type": "string"
},
"query": {
"description": "A self-supplied SELECT statement of the dataset you wish to score. Helpful for supplying a more fine-grained selection of data not achievable through specification of \"table\" and/or \"schema\" parameters exclusively.If this job is executed with a job definition, then template variables are available which will be substituted for timestamps: {{ current_run_timestamp }}, {{ last_completed_run_time }}, {{ last_scheduled_run_time }}, {{ next_scheduled_run_time }}, {{ current_run_time }}",
"type": "string"
},
"schema": {
"description": "The name of the specified database schema to read input data from.",
"type": "string"
},
"table": {
"description": "The name of the specified database table to read input data from.",
"type": "string"
},
"type": {
"description": "Type name for this intake type",
"enum": [
"synapse"
],
"type": "string"
}
},
"required": [
"dataStoreId",
"externalDataSource",
"type"
],
"type": "object"
},
{
"description": "Stream CSV data chunks from DSS dataset",
"properties": {
"datasetId": {
"description": "Either the populated value of the field or [redacted] due to permission settings",
"oneOf": [
{
"description": "The ID of the dataset",
"type": "string"
},
{
"enum": [
"[redacted]"
],
"type": "string"
}
]
},
"partition": {
"default": null,
"description": "Partition used to predict",
"enum": [
"holdout",
"validation",
"allBacktests",
null
],
"type": [
"string",
"null"
]
},
"projectId": {
"description": "The ID of the project",
"type": "string"
},
"type": {
"description": "Type name for this intake type",
"enum": [
"dss"
],
"type": "string"
}
},
"required": [
"projectId",
"type"
],
"type": "object"
},
{
"properties": {
"path": {
"description": "Path to data on host filesystem",
"type": "string"
},
"type": {
"description": "Type name for this intake type",
"enum": [
"filesystem"
],
"type": "string"
}
},
"required": [
"path",
"type"
],
"type": "object"
},
{
"description": "Stream CSV data chunks from HTTP",
"properties": {
"type": {
"description": "Type name for this intake type",
"enum": [
"http"
],
"type": "string"
},
"url": {
"description": "URL for the CSV file",
"format": "url",
"type": "string"
}
},
"required": [
"type",
"url"
],
"type": "object"
},
{
"description": "Stream CSV data chunks from JDBC",
"properties": {
"catalog": {
"description": "The name of the specified database catalog to read input data from.",
"type": "string",
"x-versionadded": "v2.22"
},
"credentialId": {
"description": "Either the populated value of the field or [redacted] due to permission settings",
"oneOf": [
{
"description": "The ID of the credential holding information about a user with read access to the JDBC data source.",
"type": [
"string",
"null"
]
},
{
"enum": [
"[redacted]"
],
"type": "string"
}
]
},
"dataStoreId": {
"description": "Either the populated value of the field or [redacted] due to permission settings",
"oneOf": [
{
"description": "ID of the data store to connect to",
"type": "string"
},
{
"enum": [
"[redacted]"
],
"type": "string"
}
]
},
"fetchSize": {
"description": "A user specified fetch size. Changing it can be used to balance throughput and memory usage. Deprecated and ignored since v2.21.",
"maximum": 1000000,
"minimum": 1,
"type": "integer"
},
"query": {
"description": "A self-supplied SELECT statement of the dataset you wish to score. Helpful for supplying a more fine-grained selection of data not achievable through specification of \"table\" and/or \"schema\" parameters exclusively.If this job is executed with a job definition, then template variables are available which will be substituted for timestamps: {{ current_run_timestamp }}, {{ last_completed_run_time }}, {{ last_scheduled_run_time }}, {{ next_scheduled_run_time }}, {{ current_run_time }}",
"type": "string"
},
"schema": {
"description": "The name of the specified database schema to read input data from.",
"type": "string"
},
"table": {
"description": "The name of the specified database table to read input data from.",
"type": "string"
},
"type": {
"description": "Type name for this intake type",
"enum": [
"jdbc"
],
"type": "string"
}
},
"required": [
"dataStoreId",
"type"
],
"type": "object"
},
{
"description": "Stream CSV data chunks from local file storage",
"properties": {
"async": {
"description": "The default behavior (async: true) will still submit the job to the queue and start processing as soon as the upload is started.Setting it to false will postpone submitting the job to the queue until all data has been uploaded.This is helpful if the user is on a bad connection and bottlednecked by the upload speed. Instead of blocking the queue this will allow others to submit to the queue until the upload has finished.",
"type": [
"boolean",
"null"
],
"x-versionadded": "v2.28"
},
"multipart": {
"description": "specify if the data will be uploaded in multiple parts instead of a single file",
"type": "boolean",
"x-versionadded": "v2.27"
},
"type": {
"description": "Type name for this intake type",
"enum": [
"local_file",
"localFile"
],
"type": "string"
}
},
"required": [
"type"
],
"type": "object"
},
{
"description": "Stream CSV data chunks from Datasphere using browser-datasphere",
"properties": {
"catalog": {
"description": "The name of the specified database catalog to read input data from.",
"type": "string"
},
"credentialId": {
"description": "Either the populated value of the field or [redacted] due to permission settings",
"oneOf": [
{
"description": "The ID of the credential holding information about a user with read access to the data source.",
"type": "string"
},
{
"enum": [
"[redacted]"
],
"type": "string"
}
]
},
"dataStoreId": {
"description": "Either the populated value of the field or [redacted] due to permission settings",
"oneOf": [
{
"description": "ID of the data store to connect to",
"type": "string"
},
{
"enum": [
"[redacted]"
],
"type": "string"
}
]
},
"schema": {
"description": "The name of the specified database schema to read input data from.",
"type": "string"
},
"table": {
"description": "The name of the specified database table to read input data from.",
"type": "string"
},
"type": {
"description": "Type name for this intake type",
"enum": [
"datasphere"
],
"type": "string"
}
},
"required": [
"credentialId",
"dataStoreId",
"schema",
"table",
"type"
],
"type": "object",
"x-versionadded": "v2.35"
}
]
},
"maxExplanations": {
"default": 0,
"description": "Number of explanations requested. Will be ordered by strength.",
"maximum": 100,
"minimum": 0,
"type": "integer"
},
"modelId": {
"description": "ID of leaderboard model which is used in job for processing predictions dataset",
"type": "string",
"x-versionadded": "v2.28"
},
"modelPackageId": {
"description": "ID of model package from registry is used in job for processing predictions dataset",
"type": "string",
"x-versionadded": "v2.28"
},
"monitoringBatchPrefix": {
"description": "Name of the batch to create with this job",
"type": [
"string",
"null"
]
},
"numConcurrent": {
"description": "Number of simultaneous requests to run against the prediction instance",
"minimum": 1,
"type": "integer"
},
"outputSettings": {
"description": "The response option configured for this job",
"oneOf": [
{
"description": "Save CSV data chunks to Azure Blob Storage",
"properties": {
"credentialId": {
"description": "Either the populated value of the field or [redacted] due to permission settings",
"oneOf": [
{
"description": "Use the specified credential to access the url",
"type": [
"string",
"null"
]
},
{
"enum": [
"[redacted]"
],
"type": "string"
}
]
},
"format": {
"default": "csv",
"description": "Type of output file format",
"enum": [
"csv",
"parquet"
],
"type": "string",
"x-versionadded": "v2.25"
},
"partitionColumns": {
"description": "For Parquet directory-scoring only. The column names of the intake data of which to partition the dataset. Columns are partitioned in the order they are given. At least one value is required if scoring to a directory (meaning the output url ends with a slash (\"/\").",
"items": {
"type": "string"
},
"maxItems": 100,
"type": "array",
"x-versionadded": "v2.26"
},
"type": {
"description": "Type name for this output type",
"enum": [
"azure"
],
"type": "string"
},
"url": {
"description": "URL for the file or directory",
"format": "url",
"type": "string"
}
},
"required": [
"type",
"url"
],
"type": "object"
},
{
"description": "Save CSV data chunks to Google Storage",
"properties": {
"credentialId": {
"description": "Either the populated value of the field or [redacted] due to permission settings",
"oneOf": [
{
"description": "Use the specified credential to access the url",
"type": [
"string",
"null"
]
},
{
"enum": [
"[redacted]"
],
"type": "string"
}
]
},
"format": {
"default": "csv",
"description": "Type of input file format",
"enum": [
"csv",
"parquet"
],
"type": "string",
"x-versionadded": "v2.25"
},
"partitionColumns": {
"description": "For Parquet directory-scoring only. The column names of the intake data of which to partition the dataset. Columns are partitioned in the order they are given. At least one value is required if scoring to a directory (meaning the output url ends with a slash (\"/\").",
"items": {
"type": "string"
},
"maxItems": 100,
"type": "array",
"x-versionadded": "v2.26"
},
"type": {
"description": "Type name for this output type",
"enum": [
"gcp"
],
"type": "string"
},
"url": {
"description": "URL for the CSV file",
"format": "url",
"type": "string"
}
},
"required": [
"type",
"url"
],
"type": "object"
},
{
"description": "Save CSV data chunks to Google BigQuery in bulk",
"properties": {
"bucket": {
"description": "The name of gcs bucket for data loading",
"type": "string"
},
"credentialId": {
"description": "Either the populated value of the field or [redacted] due to permission settings",
"oneOf": [
{
"description": "The ID of the GCP credentials",
"type": "string"
},
{
"enum": [
"[redacted]"
],
"type": "string"
}
]
},
"dataset": {
"description": "The name of the specified big query dataset to write data back",
"type": "string"
},
"table": {
"description": "The name of the specified big query table to write data back",
"type": "string"
},
"type": {
"description": "Type name for this output type",
"enum": [
"bigquery"
],
"type": "string"
}
},
"required": [
"bucket",
"credentialId",
"dataset",
"table",
"type"
],
"type": "object"
},
{
"description": "Saves CSV data chunks to Amazon Cloud Storage S3",
"properties": {
"credentialId": {
"description": "Either the populated value of the field or [redacted] due to permission settings",
"oneOf": [
{
"description": "Use the specified credential to access the url",
"type": [
"string",
"null"
]
},
{
"enum": [
"[redacted]"
],
"type": "string"
}
]
},
"endpointUrl": {
"description": "Endpoint URL for the S3 connection (omit to use the default)",
"format": "url",
"type": "string",
"x-versionadded": "v2.29"
},
"format": {
"default": "csv",
"description": "Type of output file format",
"enum": [
"csv",
"parquet"
],
"type": "string",
"x-versionadded": "v2.25"
},
"partitionColumns": {
"description": "For Parquet directory-scoring only. The column names of the intake data of which to partition the dataset. Columns are partitioned in the order they are given. At least one value is required if scoring to a directory (meaning the output url ends with a slash (\"/\").",
"items": {
"type": "string"
},
"maxItems": 100,
"type": "array",
"x-versionadded": "v2.26"
},
"serverSideEncryption": {
"description": "Configure Server-Side Encryption for S3 output",
"properties": {
"algorithm": {
"description": "The server-side encryption algorithm used when storing this object in Amazon S3 (for example, AES256, aws:kms).",
"type": "string"
},
"customerAlgorithm": {
"description": "Specifies the algorithm to use to when encrypting the object (for example, AES256).",
"type": "string"
},
"customerKey": {
"description": "Specifies the customer-provided encryption key for Amazon S3 to use in encrypting data. This value is used to store the object and then it is discarded; Amazon S3 does not store the encryption key. The key must be appropriate for use with the algorithm specified in customerAlgorithm. The key must be sent as an base64 encoded string.",
"type": "string"
},
"kmsEncryptionContext": {
"description": "Specifies the Amazon Web Services KMS Encryption Context to use for object encryption. The value of this header is a base64-encoded UTF-8 string holding JSON with the encryption context key-value pairs.",
"type": "string"
},
"kmsKeyId": {
"description": "Specifies the ID of the symmetric customer managed key to use for object encryption.",
"type": "string"
}
},
"type": "object"
},
"type": {
"description": "Type name for this output type",
"enum": [
"s3"
],
"type": "string"
},
"url": {
"description": "URL for the CSV file",
"format": "url",
"type": "string"
}
},
"required": [
"type",
"url"
],
"type": "object"
},
{
"description": "Save CSV data chunks to Snowflake in bulk",
"properties": {
"catalog": {
"description": "The name of the specified database catalog to write output data to.",
"type": "string",
"x-versionadded": "v2.28"
},
"cloudStorageCredentialId": {
"description": "Either the populated value of the field or [redacted] due to permission settings",
"oneOf": [
{
"description": "The ID of the credential holding information about a user with write access to the cloud storage.",
"type": [
"string",
"null"
]
},
{
"enum": [
"[redacted]"
],
"type": "string"
}
]
},
"cloudStorageType": {
"default": "s3",
"description": "Type name for cloud storage",
"enum": [
"azure",
"gcp",
"s3"
],
"type": "string"
},
"createTableIfNotExists": {
"default": false,
"description": "Attempt to create the table first if no existing one is detected, before writing data with the strategy defined in the `statementType` parameter.",
"type": "boolean",
"x-versionadded": "v2.25"
},
"credentialId": {
"description": "Either the populated value of the field or [redacted] due to permission settings",
"oneOf": [
{
"description": "The ID of the credential holding information about a user with write access to the Snowflake data source.",
"type": [
"string",
"null"
]
},
{
"enum": [
"[redacted]"
],
"type": "string"
}
]
},
"dataStoreId": {
"description": "Either the populated value of the field or [redacted] due to permission settings",
"oneOf": [
{
"description": "ID of the data store to connect to",
"type": "string"
},
{
"enum": [
"[redacted]"
],
"type": "string"
}
]
},
"externalStage": {
"description": "External storage",
"type": "string"
},
"schema": {
"description": "The name of the specified database schema to write results to.",
"type": "string"
},
"statementType": {
"description": "The statement type to use when writing the results.",
"enum": [
"insert",
"create_table",
"createTable"
],
"type": "string"
},
"table": {
"description": "The name of the specified database table to write results to.",
"type": "string"
},
"type": {
"description": "Type name for this output type",
"enum": [
"snowflake"
],
"type": "string"
}
},
"required": [
"dataStoreId",
"externalStage",
"statementType",
"table",
"type"
],
"type": "object"
},
{
"description": "Save CSV data chunks to Azure Synapse in bulk",
"properties": {
"cloudStorageCredentialId": {
"description": "Either the populated value of the field or [redacted] due to permission settings",
"oneOf": [
{
"description": "The ID of the credential holding information about a user with write access to the cloud storage.",
"type": [
"string",
"null"
]
},
{
"enum": [
"[redacted]"
],
"type": "string"
}
]
},
"createTableIfNotExists": {
"default": false,
"description": "Attempt to create the table first if no existing one is detected, before writing data with the strategy defined in the `statementType` parameter.",
"type": "boolean",
"x-versionadded": "v2.25"
},
"credentialId": {
"description": "Either the populated value of the field or [redacted] due to permission settings",
"oneOf": [
{
"description": "The ID of the credential holding information about a user with write access to the JDBC data source.",
"type": [
"string",
"null"
]
},
{
"enum": [
"[redacted]"
],
"type": "string"
}
]
},
"dataStoreId": {
"description": "Either the populated value of the field or [redacted] due to permission settings",
"oneOf": [
{
"description": "ID of the data store to connect to",
"type": "string"
},
{
"enum": [
"[redacted]"
],
"type": "string"
}
]
},
"externalDataSource": {
"description": "External data source name",
"type": "string"
},
"schema": {
"description": "The name of the specified database schema to write results to.",
"type": "string"
},
"statementType": {
"description": "The statement type to use when writing the results.",
"enum": [
"insert",
"create_table",
"createTable"
],
"type": "string"
},
"table": {
"description": "The name of the specified database table to write results to.",
"type": "string"
},
"type": {
"description": "Type name for this output type",
"enum": [
"synapse"
],
"type": "string"
}
},
"required": [
"dataStoreId",
"externalDataSource",
"statementType",
"table",
"type"
],
"type": "object"
},
{
"properties": {
"path": {
"description": "Path to results on host filesystem",
"type": "string"
},
"type": {
"description": "Type name for this output type",
"enum": [
"filesystem"
],
"type": "string"
}
},
"required": [
"path",
"type"
],
"type": "object"
},
{
"description": "Save CSV data chunks to HTTP data endpoint",
"properties": {
"headers": {
"description": "Extra headers to send with the request",
"type": "object"
},
"method": {
"description": "Method to use when saving the CSV file",
"enum": [
"POST",
"PUT"
],
"type": "string"
},
"type": {
"description": "Type name for this output type",
"enum": [
"http"
],
"type": "string"
},
"url": {
"description": "URL for the CSV file",
"format": "url",
"type": "string"
}
},
"required": [
"method",
"type",
"url"
],
"type": "object"
},
{
"description": "Save CSV data chunks via JDBC",
"properties": {
"catalog": {
"description": "The name of the specified database catalog to write output data to.",
"type": "string",
"x-versionadded": "v2.22"
},
"commitInterval": {
"default": 600,
"description": "Defines a time interval in seconds between each commit is done to the JDBC source. If set to 0, the batch prediction operation will write the entire job before committing.",
"maximum": 86400,
"minimum": 0,
"type": "integer",
"x-versionadded": "v2.21"
},
"createTableIfNotExists": {
"default": false,
"description": "Attempt to create the table first if no existing one is detected, before writing data with the strategy defined in the `statementType` parameter.",
"type": "boolean",
"x-versionadded": "v2.24"
},
"credentialId": {
"description": "Either the populated value of the field or [redacted] due to permission settings",
"oneOf": [
{
"description": "The ID of the credential holding information about a user with write access to the JDBC data source.",
"type": [
"string",
"null"
]
},
{
"enum": [
"[redacted]"
],
"type": "string"
}
]
},
"dataStoreId": {
"description": "Either the populated value of the field or [redacted] due to permission settings",
"oneOf": [
{
"description": "ID of the data store to connect to",
"type": "string"
},
{
"enum": [
"[redacted]"
],
"type": "string"
}
]
},
"schema": {
"description": "The name of the specified database schema to write the results to.",
"type": "string"
},
"statementType": {
"description": "The statement type to use when writing the results. Deprecation Warning: Use of `create_table` is now discouraged. Use one of the other possibilities along with the parameter `createTableIfNotExists` set to `true`.",
"enum": [
"createTable",
"create_table",
"insert",
"insertUpdate",
"insert_update",
"update"
],
"type": "string"
},
"table": {
"description": "The name of the specified database table to write the results to.If this job is executed with a job definition, then template variables are available which will be substituted for timestamps: {{ current_run_timestamp }}, {{ last_completed_run_time }}, {{ last_scheduled_run_time }}, {{ next_scheduled_run_time }}, {{ current_run_time }}",
"type": "string"
},
"type": {
"description": "Type name for this intake type",
"enum": [
"jdbc"
],
"type": "string"
},
"updateColumns": {
"description": "The column names to be updated if statementType is set to either update or upsert.",
"items": {
"type": "string"
},
"maxItems": 100,
"type": "array"
},
"whereColumns": {
"description": "The column names to be used in the where clause if statementType is set to update or upsert.",
"items": {
"type": "string"
},
"maxItems": 100,
"type": "array"
}
},
"required": [
"dataStoreId",
"statementType",
"table",
"type"
],
"type": "object"
},
{
"description": "Save CSV data chunks to local file storage",
"properties": {
"type": {
"description": "Type name for this output type",
"enum": [
"local_file",
"localFile"
],
"type": "string"
}
},
"required": [
"type"
],
"type": "object"
},
{
"description": "Saves CSV data chunks to Datasphere using browser-datasphere",
"properties": {
"catalog": {
"description": "The name of the specified database catalog to read input data from.",
"type": "string"
},
"credentialId": {
"description": "Either the populated value of the field or [redacted] due to permission settings",
"oneOf": [
{
"description": "The ID of the credential holding information about a user with write access to the data destination.",
"type": "string"
},
{
"enum": [
"[redacted]"
],
"type": "string"
}
]
},
"dataStoreId": {
"description": "Either the populated value of the field or [redacted] due to permission settings",
"oneOf": [
{
"description": "The ID of the data store to connect to",
"type": "string"
},
{
"enum": [
"[redacted]"
],
"type": "string"
}
]
},
"schema": {
"description": "The name of the specified database schema to read input data from.",
"type": "string"
},
"table": {
"description": "The name of the specified database table to read input data from.",
"type": "string"
},
"type": {
"description": "The type name for this output type",
"enum": [
"datasphere"
],
"type": "string"
}
},
"required": [
"credentialId",
"dataStoreId",
"schema",
"table",
"type"
],
"type": "object",
"x-versionadded": "v2.35"
}
]
},
"passthroughColumns": {
"description": "Pass through columns from the original dataset",
"items": {
"description": "A column name from the original dataset to pass through to the resulting predictions",
"maxLength": 50,
"minLength": 1,
"type": "string"
},
"maxItems": 100,
"type": "array"
},
"passthroughColumnsSet": {
"description": "Pass through all columns from the original dataset",
"enum": [
"all"
],
"type": "string"
},
"pinnedModelId": {
"description": "Specify a model ID used for scoring",
"type": "string"
},
"predictionInstance": {
"description": "Override the default prediction instance from the deployment when scoring this job.",
"properties": {
"apiKey": {
"description": "By default, prediction requests will use the API key of the user that created the job. This allows you to make requests on behalf of other users.",
"type": "string"
},
"datarobotKey": {
"description": "If running a job against a prediction instance in the Managed AI Cloud, you must provide the organization level DataRobot-Key.",
"type": "string"
},
"hostName": {
"description": "Override the default host name of the deployment with this.",
"type": "string"
},
"sslEnabled": {
"default": true,
"description": "Use SSL (HTTPS) when communicating with the overriden prediction server.",
"type": "boolean"
}
},
"required": [
"hostName",
"sslEnabled"
],
"type": "object"
},
"predictionThreshold": {
"description": "Threshold is the point that sets the class boundary for a predicted value. The model classifies an observation below the threshold as FALSE, and an observation above the threshold as TRUE. In other words, DataRobot automatically assigns the positive class label to any prediction exceeding the threshold. This value can be set between 0.0 and 1.0.",
"maximum": 1,
"minimum": 0,
"type": "number"
},
"predictionWarningEnabled": {
"description": "Enable prediction warnings.",
"type": [
"boolean",
"null"
]
},
"redactedFields": {
"description": "A list of qualified field names from intake- and/or outputSettings that was redacted due to permissions and sharing settings. For example: intakeSettings.dataStoreId",
"items": {
"description": "Field names that are potentially redacted",
"type": "string"
},
"type": "array",
"x-versionadded": "v2.30"
},
"secondaryDatasetsConfigId": {
"description": "Configuration id for secondary datasets to use when making a prediction.",
"type": "string",
"x-versionadded": "v2.33"
},
"skipDriftTracking": {
"default": false,
"description": "Skip drift tracking for this job.",
"type": "boolean"
},
"thresholdHigh": {
"description": "Compute explanations for predictions above this threshold",
"type": "number"
},
"thresholdLow": {
"description": "Compute explanations for predictions below this threshold",
"type": "number"
},
"timeseriesSettings": {
"description": "Time Series settings included of this job is a Time Series job.",
"oneOf": [
{
"properties": {
"forecastPoint": {
"description": "Used for forecast predictions in order to override the inferred forecast point from the dataset.",
"format": "date-time",
"type": "string"
},
"relaxKnownInAdvanceFeaturesCheck": {
"default": false,
"description": "If activated, missing values in the known in advance features are allowed in the forecast window at prediction time. If omitted or false, missing values are not allowed.",
"type": "boolean"
},
"type": {
"description": "Forecast mode makes predictions using forecastPoint or rows in the dataset without target.",
"enum": [
"forecast"
],
"type": "string"
}
},
"required": [
"type"
],
"type": "object"
},
{
"properties": {
"predictionsEndDate": {
"description": "Used for historical predictions in order to override date to which predictions should be calculated. By default value will be inferred automatically from the dataset.",
"format": "date-time",
"type": "string"
},
"predictionsStartDate": {
"description": "Used for historical predictions in order to override date from which predictions should be calculated. By default value will be inferred automatically from the dataset.",
"format": "date-time",
"type": "string"
},
"relaxKnownInAdvanceFeaturesCheck": {
"default": false,
"description": "If activated, missing values in the known in advance features are allowed in the forecast window at prediction time. If omitted or false, missing values are not allowed.",
"type": "boolean"
},
"type": {
"description": "Historical mode enables bulk predictions which calculates predictions for all possible forecast points and forecast distances in the dataset within the predictionsStartDate/predictionsEndDate range.",
"enum": [
"historical"
],
"type": "string"
}
},
"required": [
"type"
],
"type": "object"
},
{
"properties": {
"relaxKnownInAdvanceFeaturesCheck": {
"default": false,
"description": "If activated, missing values in the known in advance features are allowed in the forecast window at prediction time. If omitted or false, missing values are not allowed.",
"type": "boolean"
},
"type": {
"description": "Forecast mode used for making predictions on subsets of training data.",
"enum": [
"training"
],
"type": "string"
}
},
"required": [
"type"
],
"type": "object"
}
],
"x-versionadded": "v2.20"
}
},
"required": [
"abortOnError",
"csvSettings",
"disableRowLevelErrorHandling",
"includePredictionStatus",
"includeProbabilities",
"includeProbabilitiesClasses",
"intakeSettings",
"maxExplanations",
"redactedFields",
"skipDriftTracking"
],
"type": "object"
},
"links": {
"description": "Links useful for this job",
"properties": {
"csvUpload": {
"description": "The URL used to upload the dataset for this job. Only available for localFile intake.",
"format": "url",
"type": "string"
},
"download": {
"description": "The URL used to download the results from this job. Only available for localFile outputs. Will be null if the download is not yet available.",
"type": [
"string",
"null"
]
},
"self": {
"description": "The URL used access this job.",
"format": "url",
"type": "string"
}
},
"required": [
"self"
],
"type": "object"
},
"logs": {
"description": "The job log.",
"items": {
"description": "A log line from the job log.",
"type": "string"
},
"type": "array"
},
"monitoringBatchId": {
"description": "Id of the monitoring batch created by this job. Only present if the job runs on a deployment with batch monitoring enabled.",
"type": [
"string",
"null"
],
"x-versionadded": "v2.33"
},
"percentageCompleted": {
"description": "Indicates job progress which is based on number of already processed rows in dataset",
"maximum": 100,
"minimum": 0,
"type": "number"
},
"queuePosition": {
"description": "To ensure a dedicated prediction instance is not overloaded, only one job will be run against it at a time. This is the number of jobs that are awaiting processing before this job start running. May not be available in all environments.",
"minimum": 0,
"type": [
"integer",
"null"
],
"x-versionadded": "v2.21"
},
"queued": {
"description": "The job has been put on the queue for execution.",
"type": "boolean",
"x-versionadded": "v2.26"
},
"resultsDeleted": {
"description": "Indicates if the job was subject to garbage collection and had its artifacts deleted (output files, if any, and scoring data on local storage)",
"type": "boolean",
"x-versionadded": "v2.24"
},
"scoredRows": {
"description": "Number of rows that have been used in prediction computation",
"minimum": 0,
"type": "integer"
},
"skippedRows": {
"description": "Number of rows that have been skipped during scoring. May contain non-zero value only in time-series predictions case if provided dataset contains more than required historical rows.",
"minimum": 0,
"type": "integer",
"x-versionadded": "v2.20"
},
"source": {
"description": "Source from which batch job was started",
"type": "string",
"x-versionadded": "v2.24"
},
"status": {
"description": "The current job status",
"enum": [
"INITIALIZING",
"RUNNING",
"COMPLETED",
"ABORTED",
"FAILED"
],
"type": "string"
},
"statusDetails": {
"description": "Explanation for current status",
"type": "string"
}
},
"required": [
"created",
"createdBy",
"elapsedTimeSec",
"failedRows",
"id",
"jobIntakeSize",
"jobOutputSize",
"jobSpec",
"links",
"logs",
"monitoringBatchId",
"percentageCompleted",
"queued",
"scoredRows",
"skippedRows",
"status",
"statusDetails"
],
"type": "object"
},
"type": "array"
},
"next": {
"description": "URL pointing to the next page (if null, there is no next page).",
"format": "uri",
"type": [
"string",
"null"
]
},
"previous": {
"description": "URL pointing to the previous page (if null, there is no previous page).",
"format": "uri",
"type": [
"string",
"null"
]
},
"totalCount": {
"description": "The total number of items across all pages.",
"type": "integer"
}
},
"required": [
"data",
"next",
"previous",
"totalCount"
],
"type": "object"
}
Responses
To perform this operation, you must be authenticated by means of one of the following methods:
BearerAuth
POST /api/v2/batchPredictions/
Submit the configuration for the job and it will be submitted to the queue
Body parameter
{
"properties": {
"abortOnError": {
"default": true,
"description": "Should this job abort if too many errors are encountered",
"type": "boolean"
},
"batchJobType": {
"default": "prediction",
"description": "Batch job type.",
"enum": [
"monitoring",
"prediction"
],
"type": "string",
"x-versionadded": "v2.35"
},
"chunkSize": {
"default": "auto",
"description": "Which strategy should be used to determine the chunk size. Can be either a named strategy or a fixed size in bytes.",
"oneOf": [
{
"enum": [
"auto",
"fixed",
"dynamic"
],
"type": "string"
},
{
"maximum": 41943040,
"minimum": 20,
"type": "integer"
}
]
},
"columnNamesRemapping": {
"description": "Remap (rename or remove columns from) the output from this job",
"oneOf": [
{
"description": "Provide a dictionary with key/value pairs to remap (deprecated)",
"type": "object"
},
{
"description": "Provide a list of items to remap",
"items": {
"properties": {
"inputName": {
"description": "Rename column with this name",
"type": "string"
},
"outputName": {
"description": "Rename column to this name (leave as null to remove from the output)",
"type": [
"string",
"null"
]
}
},
"required": [
"inputName",
"outputName"
],
"type": "object"
},
"maxItems": 1000,
"type": "array"
}
]
},
"csvSettings": {
"description": "The CSV settings used for this job",
"properties": {
"delimiter": {
"default": ",",
"description": "CSV fields are delimited by this character. Use the string \"tab\" to denote TSV (TAB separated values).",
"oneOf": [
{
"enum": [
"tab"
],
"type": "string"
},
{
"maxLength": 1,
"minLength": 1,
"type": "string"
}
]
},
"encoding": {
"default": "utf-8",
"description": "The encoding to be used for intake and output. For example (but not limited to): \"shift_jis\", \"latin_1\" or \"mskanji\".",
"type": "string"
},
"quotechar": {
"default": "\"",
"description": "Fields containing the delimiter or newlines must be quoted using this character.",
"maxLength": 1,
"minLength": 1,
"type": "string"
}
},
"required": [
"delimiter",
"encoding",
"quotechar"
],
"type": "object"
},
"deploymentId": {
"description": "ID of deployment which is used in job for processing predictions dataset",
"type": "string"
},
"disableRowLevelErrorHandling": {
"default": false,
"description": "Skip row by row error handling",
"type": "boolean"
},
"explanationAlgorithm": {
"description": "Which algorithm will be used to calculate prediction explanations",
"enum": [
"shap",
"xemp"
],
"type": "string"
},
"explanationClassNames": {
"description": "List of class names that will be explained for each row for multiclass. Mutually exclusive with explanationNumTopClasses. If neither specified - we assume explanationNumTopClasses=1",
"items": {
"description": "Class name to explain",
"type": "string"
},
"maxItems": 10,
"minItems": 1,
"type": "array",
"x-versionadded": "v2.29"
},
"explanationNumTopClasses": {
"description": "Number of top predicted classes for each row that will be explained for multiclass. Mutually exclusive with explanationClassNames. If neither specified - we assume explanationNumTopClasses=1",
"maximum": 10,
"minimum": 1,
"type": "integer",
"x-versionadded": "v2.29"
},
"includePredictionStatus": {
"default": false,
"description": "Include prediction status column in the output",
"type": "boolean"
},
"includeProbabilities": {
"default": true,
"description": "Include probabilities for all classes",
"type": "boolean"
},
"includeProbabilitiesClasses": {
"default": [],
"description": "Include only probabilities for these specific class names.",
"items": {
"description": "Include probability for this class name",
"type": "string"
},
"maxItems": 100,
"type": "array"
},
"intakeSettings": {
"default": {
"type": "localFile"
},
"description": "The intake option configured for this job",
"oneOf": [
{
"description": "Stream CSV data chunks from Azure",
"properties": {
"credentialId": {
"description": "Use the specified credential to access the url",
"type": [
"string",
"null"
]
},
"format": {
"default": "csv",
"description": "Type of input file format",
"enum": [
"csv",
"parquet"
],
"type": "string",
"x-versionadded": "v2.25"
},
"type": {
"description": "Type name for this intake type",
"enum": [
"azure"
],
"type": "string"
},
"url": {
"description": "URL for the CSV file",
"format": "url",
"type": "string"
}
},
"required": [
"type",
"url"
],
"type": "object"
},
{
"description": "Stream CSV data chunks from Big Query using GCS",
"properties": {
"bucket": {
"description": "The name of gcs bucket for data export",
"type": "string"
},
"credentialId": {
"description": "The ID of the GCP credentials",
"type": "string"
},
"dataset": {
"description": "The name of the specified big query dataset to read input data from",
"type": "string"
},
"table": {
"description": "The name of the specified big query table to read input data from",
"type": "string"
},
"type": {
"description": "Type name for this intake type",
"enum": [
"bigquery"
],
"type": "string"
}
},
"required": [
"bucket",
"credentialId",
"dataset",
"table",
"type"
],
"type": "object"
},
{
"description": "Stream CSV data chunks from data stage storage",
"properties": {
"dataStageId": {
"description": "The ID of the data stage",
"type": "string"
},
"type": {
"description": "Type name for this intake type",
"enum": [
"dataStage"
],
"type": "string"
}
},
"required": [
"dataStageId",
"type"
],
"type": "object"
},
{
"description": "Stream CSV data chunks from AI catalog dataset",
"properties": {
"datasetId": {
"description": "The ID of the AI catalog dataset",
"type": "string"
},
"datasetVersionId": {
"description": "The ID of the AI catalog dataset version",
"type": "string"
},
"type": {
"description": "Type name for this intake type",
"enum": [
"dataset"
],
"type": "string"
}
},
"required": [
"datasetId",
"type"
],
"type": "object"
},
{
"description": "Stream CSV data chunks from Datasphere using browser-datasphere",
"properties": {
"catalog": {
"description": "The name of the specified database catalog to read input data from.",
"type": "string"
},
"credentialId": {
"description": "The ID of the credential holding information about a user with read access to the data source.",
"type": "string"
},
"dataStoreId": {
"description": "ID of the data store to connect to",
"type": "string"
},
"schema": {
"description": "The name of the specified database schema to read input data from.",
"type": "string"
},
"table": {
"description": "The name of the specified database table to read input data from.",
"type": "string"
},
"type": {
"description": "Type name for this intake type",
"enum": [
"datasphere"
],
"type": "string"
}
},
"required": [
"credentialId",
"dataStoreId",
"schema",
"table",
"type"
],
"type": "object",
"x-versionadded": "v2.35"
},
{
"description": "Stream CSV data chunks from DSS dataset",
"properties": {
"datasetId": {
"description": "The ID of the dataset",
"type": "string"
},
"partition": {
"default": null,
"description": "Partition used to predict",
"enum": [
"holdout",
"validation",
"allBacktests",
null
],
"type": [
"string",
"null"
]
},
"projectId": {
"description": "The ID of the project",
"type": "string"
},
"type": {
"description": "Type name for this intake type",
"enum": [
"dss"
],
"type": "string"
}
},
"required": [
"projectId",
"type"
],
"type": "object"
},
{
"properties": {
"path": {
"description": "Path to data on host filesystem",
"type": "string"
},
"type": {
"description": "Type name for this intake type",
"enum": [
"filesystem"
],
"type": "string"
}
},
"required": [
"path",
"type"
],
"type": "object"
},
{
"description": "Stream CSV data chunks from Google Storage",
"properties": {
"credentialId": {
"description": "Use the specified credential to access the url",
"type": [
"string",
"null"
]
},
"format": {
"default": "csv",
"description": "Type of input file format",
"enum": [
"csv",
"parquet"
],
"type": "string",
"x-versionadded": "v2.25"
},
"type": {
"description": "Type name for this intake type",
"enum": [
"gcp"
],
"type": "string"
},
"url": {
"description": "URL for the CSV file",
"format": "url",
"type": "string"
}
},
"required": [
"type",
"url"
],
"type": "object"
},
{
"description": "Stream CSV data chunks from HTTP",
"properties": {
"type": {
"description": "Type name for this intake type",
"enum": [
"http"
],
"type": "string"
},
"url": {
"description": "URL for the CSV file",
"format": "url",
"type": "string"
}
},
"required": [
"type",
"url"
],
"type": "object"
},
{
"description": "Stream CSV data chunks from JDBC",
"properties": {
"catalog": {
"description": "The name of the specified database catalog to read input data from.",
"type": "string",
"x-versionadded": "v2.22"
},
"credentialId": {
"description": "The ID of the credential holding information about a user with read access to the JDBC data source.",
"type": [
"string",
"null"
]
},
"dataStoreId": {
"description": "ID of the data store to connect to",
"type": "string"
},
"fetchSize": {
"description": "A user specified fetch size. Changing it can be used to balance throughput and memory usage. Deprecated and ignored since v2.21.",
"maximum": 1000000,
"minimum": 1,
"type": "integer"
},
"query": {
"description": "A self-supplied SELECT statement of the dataset you wish to score. Helpful for supplying a more fine-grained selection of data not achievable through specification of \"table\" and/or \"schema\" parameters exclusively.If this job is executed with a job definition, then template variables are available which will be substituted for timestamps: {{ current_run_timestamp }}, {{ last_completed_run_time }}, {{ last_scheduled_run_time }}, {{ next_scheduled_run_time }}, {{ current_run_time }}",
"type": "string"
},
"schema": {
"description": "The name of the specified database schema to read input data from.",
"type": "string"
},
"table": {
"description": "The name of the specified database table to read input data from.",
"type": "string"
},
"type": {
"description": "Type name for this intake type",
"enum": [
"jdbc"
],
"type": "string"
}
},
"required": [
"dataStoreId",
"type"
],
"type": "object"
},
{
"description": "Stream CSV data chunks from local file storage",
"properties": {
"async": {
"description": "The default behavior (async: true) will still submit the job to the queue and start processing as soon as the upload is started.Setting it to false will postpone submitting the job to the queue until all data has been uploaded.This is helpful if the user is on a bad connection and bottlednecked by the upload speed. Instead of blocking the queue this will allow others to submit to the queue until the upload has finished.",
"type": [
"boolean",
"null"
],
"x-versionadded": "v2.28"
},
"multipart": {
"description": "specify if the data will be uploaded in multiple parts instead of a single file",
"type": "boolean",
"x-versionadded": "v2.27"
},
"type": {
"description": "Type name for this intake type",
"enum": [
"local_file",
"localFile"
],
"type": "string"
}
},
"required": [
"type"
],
"type": "object"
},
{
"description": "Stream CSV data chunks from Amazon Cloud Storage S3",
"properties": {
"credentialId": {
"description": "Use the specified credential to access the url",
"type": [
"string",
"null"
]
},
"endpointUrl": {
"description": "Endpoint URL for the S3 connection (omit to use the default)",
"format": "url",
"type": "string",
"x-versionadded": "v2.29"
},
"format": {
"default": "csv",
"description": "Type of input file format",
"enum": [
"csv",
"parquet"
],
"type": "string",
"x-versionadded": "v2.25"
},
"type": {
"description": "Type name for this intake type",
"enum": [
"s3"
],
"type": "string"
},
"url": {
"description": "URL for the CSV file",
"format": "url",
"type": "string"
}
},
"required": [
"type",
"url"
],
"type": "object"
},
{
"description": "Stream CSV data chunks from Snowflake",
"properties": {
"catalog": {
"description": "The name of the specified database catalog to read input data from.",
"type": "string",
"x-versionadded": "v2.28"
},
"cloudStorageCredentialId": {
"description": "The ID of the credential holding information about a user with read access to the cloud storage.",
"type": [
"string",
"null"
]
},
"cloudStorageType": {
"default": "s3",
"description": "Type name for cloud storage",
"enum": [
"azure",
"gcp",
"s3"
],
"type": "string"
},
"credentialId": {
"description": "The ID of the credential holding information about a user with read access to the Snowflake data source.",
"type": [
"string",
"null"
]
},
"dataStoreId": {
"description": "ID of the data store to connect to",
"type": "string"
},
"externalStage": {
"description": "External storage",
"type": "string"
},
"query": {
"description": "A self-supplied SELECT statement of the dataset you wish to score. Helpful for supplying a more fine-grained selection of data not achievable through specification of \"table\" and/or \"schema\" parameters exclusively.If this job is executed with a job definition, then template variables are available which will be substituted for timestamps: {{ current_run_timestamp }}, {{ last_completed_run_time }}, {{ last_scheduled_run_time }}, {{ next_scheduled_run_time }}, {{ current_run_time }}",
"type": "string"
},
"schema": {
"description": "The name of the specified database schema to read input data from.",
"type": "string"
},
"table": {
"description": "The name of the specified database table to read input data from.",
"type": "string"
},
"type": {
"description": "Type name for this intake type",
"enum": [
"snowflake"
],
"type": "string"
}
},
"required": [
"dataStoreId",
"externalStage",
"type"
],
"type": "object"
},
{
"description": "Stream CSV data chunks from Azure Synapse",
"properties": {
"cloudStorageCredentialId": {
"description": "The ID of the Azure credential holding information about a user with read access to the cloud storage.",
"type": [
"string",
"null"
]
},
"credentialId": {
"description": "The ID of the credential holding information about a user with read access to the JDBC data source.",
"type": [
"string",
"null"
]
},
"dataStoreId": {
"description": "ID of the data store to connect to",
"type": "string"
},
"externalDataSource": {
"description": "External datasource name",
"type": "string"
},
"query": {
"description": "A self-supplied SELECT statement of the dataset you wish to score. Helpful for supplying a more fine-grained selection of data not achievable through specification of \"table\" and/or \"schema\" parameters exclusively.If this job is executed with a job definition, then template variables are available which will be substituted for timestamps: {{ current_run_timestamp }}, {{ last_completed_run_time }}, {{ last_scheduled_run_time }}, {{ next_scheduled_run_time }}, {{ current_run_time }}",
"type": "string"
},
"schema": {
"description": "The name of the specified database schema to read input data from.",
"type": "string"
},
"table": {
"description": "The name of the specified database table to read input data from.",
"type": "string"
},
"type": {
"description": "Type name for this intake type",
"enum": [
"synapse"
],
"type": "string"
}
},
"required": [
"dataStoreId",
"externalDataSource",
"type"
],
"type": "object"
}
]
},
"maxExplanations": {
"default": 0,
"description": "Number of explanations requested. Will be ordered by strength.",
"maximum": 100,
"minimum": 0,
"type": "integer"
},
"modelId": {
"description": "ID of leaderboard model which is used in job for processing predictions dataset",
"type": "string",
"x-versionadded": "v2.28"
},
"modelPackageId": {
"description": "ID of model package from registry is used in job for processing predictions dataset",
"type": "string",
"x-versionadded": "v2.28"
},
"monitoringBatchPrefix": {
"description": "Name of the batch to create with this job",
"type": [
"string",
"null"
]
},
"numConcurrent": {
"description": "Number of simultaneous requests to run against the prediction instance",
"minimum": 1,
"type": "integer"
},
"outputSettings": {
"description": "The output option configured for this job",
"oneOf": [
{
"description": "Save CSV data chunks to Azure Blob Storage",
"properties": {
"credentialId": {
"description": "Use the specified credential to access the url",
"type": [
"string",
"null"
]
},
"format": {
"default": "csv",
"description": "Type of output file format",
"enum": [
"csv",
"parquet"
],
"type": "string",
"x-versionadded": "v2.25"
},
"partitionColumns": {
"description": "For Parquet directory-scoring only. The column names of the intake data of which to partition the dataset. Columns are partitioned in the order they are given. At least one value is required if scoring to a directory (meaning the output url ends with a slash (\"/\").",
"items": {
"type": "string"
},
"maxItems": 100,
"type": "array",
"x-versionadded": "v2.26"
},
"type": {
"description": "Type name for this output type",
"enum": [
"azure"
],
"type": "string"
},
"url": {
"description": "URL for the file or directory",
"format": "url",
"type": "string"
}
},
"required": [
"type",
"url"
],
"type": "object"
},
{
"description": "Save CSV data chunks to Google BigQuery in bulk",
"properties": {
"bucket": {
"description": "The name of gcs bucket for data loading",
"type": "string"
},
"credentialId": {
"description": "The ID of the GCP credentials",
"type": "string"
},
"dataset": {
"description": "The name of the specified big query dataset to write data back",
"type": "string"
},
"table": {
"description": "The name of the specified big query table to write data back",
"type": "string"
},
"type": {
"description": "Type name for this output type",
"enum": [
"bigquery"
],
"type": "string"
}
},
"required": [
"bucket",
"credentialId",
"dataset",
"table",
"type"
],
"type": "object"
},
{
"description": "Saves CSV data chunks to Datasphere using browser-datasphere",
"properties": {
"catalog": {
"description": "The name of the specified database catalog to read input data from.",
"type": "string"
},
"credentialId": {
"description": "The ID of the credential holding information about a user with write access to the data destination.",
"type": "string"
},
"dataStoreId": {
"description": "The ID of the data store to connect to",
"type": "string"
},
"schema": {
"description": "The name of the specified database schema to read input data from.",
"type": "string"
},
"table": {
"description": "The name of the specified database table to read input data from.",
"type": "string"
},
"type": {
"description": "The type name for this output type",
"enum": [
"datasphere"
],
"type": "string"
}
},
"required": [
"credentialId",
"dataStoreId",
"schema",
"table",
"type"
],
"type": "object",
"x-versionadded": "v2.35"
},
{
"properties": {
"path": {
"description": "Path to results on host filesystem",
"type": "string"
},
"type": {
"description": "Type name for this output type",
"enum": [
"filesystem"
],
"type": "string"
}
},
"required": [
"path",
"type"
],
"type": "object"
},
{
"description": "Save CSV data chunks to Google Storage",
"properties": {
"credentialId": {
"description": "Use the specified credential to access the url",
"type": [
"string",
"null"
]
},
"format": {
"default": "csv",
"description": "Type of input file format",
"enum": [
"csv",
"parquet"
],
"type": "string",
"x-versionadded": "v2.25"
},
"partitionColumns": {
"description": "For Parquet directory-scoring only. The column names of the intake data of which to partition the dataset. Columns are partitioned in the order they are given. At least one value is required if scoring to a directory (meaning the output url ends with a slash (\"/\").",
"items": {
"type": "string"
},
"maxItems": 100,
"type": "array",
"x-versionadded": "v2.26"
},
"type": {
"description": "Type name for this output type",
"enum": [
"gcp"
],
"type": "string"
},
"url": {
"description": "URL for the CSV file",
"format": "url",
"type": "string"
}
},
"required": [
"type",
"url"
],
"type": "object"
},
{
"description": "Save CSV data chunks to HTTP data endpoint",
"properties": {
"headers": {
"description": "Extra headers to send with the request",
"type": "object"
},
"method": {
"description": "Method to use when saving the CSV file",
"enum": [
"POST",
"PUT"
],
"type": "string"
},
"type": {
"description": "Type name for this output type",
"enum": [
"http"
],
"type": "string"
},
"url": {
"description": "URL for the CSV file",
"format": "url",
"type": "string"
}
},
"required": [
"method",
"type",
"url"
],
"type": "object"
},
{
"description": "Save CSV data chunks via JDBC",
"properties": {
"catalog": {
"description": "The name of the specified database catalog to write output data to.",
"type": "string",
"x-versionadded": "v2.22"
},
"commitInterval": {
"default": 600,
"description": "Defines a time interval in seconds between each commit is done to the JDBC source. If set to 0, the batch prediction operation will write the entire job before committing.",
"maximum": 86400,
"minimum": 0,
"type": "integer",
"x-versionadded": "v2.21"
},
"createTableIfNotExists": {
"default": false,
"description": "Attempt to create the table first if no existing one is detected, before writing data with the strategy defined in the `statementType` parameter.",
"type": "boolean",
"x-versionadded": "v2.24"
},
"credentialId": {
"description": "The ID of the credential holding information about a user with write access to the JDBC data source.",
"type": [
"string",
"null"
]
},
"dataStoreId": {
"description": "ID of the data store to connect to",
"type": "string"
},
"schema": {
"description": "The name of the specified database schema to write the results to.",
"type": "string"
},
"statementType": {
"description": "The statement type to use when writing the results. Deprecation Warning: Use of `create_table` is now discouraged. Use one of the other possibilities along with the parameter `createTableIfNotExists` set to `true`.",
"enum": [
"createTable",
"create_table",
"insert",
"insertUpdate",
"insert_update",
"update"
],
"type": "string"
},
"table": {
"description": "The name of the specified database table to write the results to.If this job is executed with a job definition, then template variables are available which will be substituted for timestamps: {{ current_run_timestamp }}, {{ last_completed_run_time }}, {{ last_scheduled_run_time }}, {{ next_scheduled_run_time }}, {{ current_run_time }}",
"type": "string"
},
"type": {
"description": "Type name for this intake type",
"enum": [
"jdbc"
],
"type": "string"
},
"updateColumns": {
"description": "The column names to be updated if statementType is set to either update or upsert.",
"items": {
"type": "string"
},
"maxItems": 100,
"type": "array"
},
"whereColumns": {
"description": "The column names to be used in the where clause if statementType is set to update or upsert.",
"items": {
"type": "string"
},
"maxItems": 100,
"type": "array"
}
},
"required": [
"dataStoreId",
"statementType",
"table",
"type"
],
"type": "object"
},
{
"description": "Save CSV data chunks to local file storage",
"properties": {
"type": {
"description": "Type name for this output type",
"enum": [
"local_file",
"localFile"
],
"type": "string"
}
},
"required": [
"type"
],
"type": "object"
},
{
"description": "Saves CSV data chunks to Amazon Cloud Storage S3",
"properties": {
"credentialId": {
"description": "Use the specified credential to access the url",
"type": [
"string",
"null"
]
},
"endpointUrl": {
"description": "Endpoint URL for the S3 connection (omit to use the default)",
"format": "url",
"type": "string",
"x-versionadded": "v2.29"
},
"format": {
"default": "csv",
"description": "Type of output file format",
"enum": [
"csv",
"parquet"
],
"type": "string",
"x-versionadded": "v2.25"
},
"partitionColumns": {
"description": "For Parquet directory-scoring only. The column names of the intake data of which to partition the dataset. Columns are partitioned in the order they are given. At least one value is required if scoring to a directory (meaning the output url ends with a slash (\"/\").",
"items": {
"type": "string"
},
"maxItems": 100,
"type": "array",
"x-versionadded": "v2.26"
},
"serverSideEncryption": {
"description": "Configure Server-Side Encryption for S3 output",
"properties": {
"algorithm": {
"description": "The server-side encryption algorithm used when storing this object in Amazon S3 (for example, AES256, aws:kms).",
"type": "string"
},
"customerAlgorithm": {
"description": "Specifies the algorithm to use to when encrypting the object (for example, AES256).",
"type": "string"
},
"customerKey": {
"description": "Specifies the customer-provided encryption key for Amazon S3 to use in encrypting data. This value is used to store the object and then it is discarded; Amazon S3 does not store the encryption key. The key must be appropriate for use with the algorithm specified in customerAlgorithm. The key must be sent as an base64 encoded string.",
"type": "string"
},
"kmsEncryptionContext": {
"description": "Specifies the Amazon Web Services KMS Encryption Context to use for object encryption. The value of this header is a base64-encoded UTF-8 string holding JSON with the encryption context key-value pairs.",
"type": "string"
},
"kmsKeyId": {
"description": "Specifies the ID of the symmetric customer managed key to use for object encryption.",
"type": "string"
}
},
"type": "object"
},
"type": {
"description": "Type name for this output type",
"enum": [
"s3"
],
"type": "string"
},
"url": {
"description": "URL for the CSV file",
"format": "url",
"type": "string"
}
},
"required": [
"type",
"url"
],
"type": "object"
},
{
"description": "Save CSV data chunks to Snowflake in bulk",
"properties": {
"catalog": {
"description": "The name of the specified database catalog to write output data to.",
"type": "string",
"x-versionadded": "v2.28"
},
"cloudStorageCredentialId": {
"description": "The ID of the credential holding information about a user with write access to the cloud storage.",
"type": [
"string",
"null"
]
},
"cloudStorageType": {
"default": "s3",
"description": "Type name for cloud storage",
"enum": [
"azure",
"gcp",
"s3"
],
"type": "string"
},
"createTableIfNotExists": {
"default": false,
"description": "Attempt to create the table first if no existing one is detected, before writing data with the strategy defined in the `statementType` parameter.",
"type": "boolean",
"x-versionadded": "v2.25"
},
"credentialId": {
"description": "The ID of the credential holding information about a user with write access to the Snowflake data source.",
"type": [
"string",
"null"
]
},
"dataStoreId": {
"description": "ID of the data store to connect to",
"type": "string"
},
"externalStage": {
"description": "External storage",
"type": "string"
},
"schema": {
"description": "The name of the specified database schema to write results to.",
"type": "string"
},
"statementType": {
"description": "The statement type to use when writing the results.",
"enum": [
"insert",
"create_table",
"createTable"
],
"type": "string"
},
"table": {
"description": "The name of the specified database table to write results to.",
"type": "string"
},
"type": {
"description": "Type name for this output type",
"enum": [
"snowflake"
],
"type": "string"
}
},
"required": [
"dataStoreId",
"externalStage",
"statementType",
"table",
"type"
],
"type": "object"
},
{
"description": "Save CSV data chunks to Azure Synapse in bulk",
"properties": {
"cloudStorageCredentialId": {
"description": "The ID of the credential holding information about a user with write access to the cloud storage.",
"type": [
"string",
"null"
]
},
"createTableIfNotExists": {
"default": false,
"description": "Attempt to create the table first if no existing one is detected, before writing data with the strategy defined in the `statementType` parameter.",
"type": "boolean",
"x-versionadded": "v2.25"
},
"credentialId": {
"description": "The ID of the credential holding information about a user with write access to the JDBC data source.",
"type": [
"string",
"null"
]
},
"dataStoreId": {
"description": "ID of the data store to connect to",
"type": "string"
},
"externalDataSource": {
"description": "External data source name",
"type": "string"
},
"schema": {
"description": "The name of the specified database schema to write results to.",
"type": "string"
},
"statementType": {
"description": "The statement type to use when writing the results.",
"enum": [
"insert",
"create_table",
"createTable"
],
"type": "string"
},
"table": {
"description": "The name of the specified database table to write results to.",
"type": "string"
},
"type": {
"description": "Type name for this output type",
"enum": [
"synapse"
],
"type": "string"
}
},
"required": [
"dataStoreId",
"externalDataSource",
"statementType",
"table",
"type"
],
"type": "object"
}
]
},
"passthroughColumns": {
"description": "Pass through columns from the original dataset",
"items": {
"description": "A column name from the original dataset to pass through to the resulting predictions",
"maxLength": 50,
"minLength": 1,
"type": "string"
},
"maxItems": 100,
"type": "array"
},
"passthroughColumnsSet": {
"description": "Pass through all columns from the original dataset",
"enum": [
"all"
],
"type": "string"
},
"pinnedModelId": {
"description": "Specify a model ID used for scoring",
"type": "string"
},
"predictionInstance": {
"description": "Override the default prediction instance from the deployment when scoring this job.",
"properties": {
"apiKey": {
"description": "By default, prediction requests will use the API key of the user that created the job. This allows you to make requests on behalf of other users.",
"type": "string"
},
"datarobotKey": {
"description": "If running a job against a prediction instance in the Managed AI Cloud, you must provide the organization level DataRobot-Key.",
"type": "string"
},
"hostName": {
"description": "Override the default host name of the deployment with this.",
"type": "string"
},
"sslEnabled": {
"default": true,
"description": "Use SSL (HTTPS) when communicating with the overriden prediction server.",
"type": "boolean"
}
},
"required": [
"hostName",
"sslEnabled"
],
"type": "object"
},
"predictionThreshold": {
"description": "Threshold is the point that sets the class boundary for a predicted value. The model classifies an observation below the threshold as FALSE, and an observation above the threshold as TRUE. In other words, DataRobot automatically assigns the positive class label to any prediction exceeding the threshold. This value can be set between 0.0 and 1.0.",
"maximum": 1,
"minimum": 0,
"type": "number"
},
"predictionWarningEnabled": {
"description": "Enable prediction warnings.",
"type": [
"boolean",
"null"
]
},
"secondaryDatasetsConfigId": {
"description": "Configuration id for secondary datasets to use when making a prediction.",
"type": "string",
"x-versionadded": "v2.33"
},
"skipDriftTracking": {
"default": false,
"description": "Skip drift tracking for this job.",
"type": "boolean"
},
"thresholdHigh": {
"description": "Compute explanations for predictions above this threshold",
"type": "number"
},
"thresholdLow": {
"description": "Compute explanations for predictions below this threshold",
"type": "number"
},
"timeseriesSettings": {
"description": "Time Series settings included of this job is a Time Series job.",
"oneOf": [
{
"properties": {
"forecastPoint": {
"description": "Used for forecast predictions in order to override the inferred forecast point from the dataset.",
"format": "date-time",
"type": "string"
},
"relaxKnownInAdvanceFeaturesCheck": {
"default": false,
"description": "If activated, missing values in the known in advance features are allowed in the forecast window at prediction time. If omitted or false, missing values are not allowed.",
"type": "boolean"
},
"type": {
"description": "Forecast mode makes predictions using forecastPoint or rows in the dataset without target.",
"enum": [
"forecast"
],
"type": "string"
}
},
"required": [
"type"
],
"type": "object"
},
{
"properties": {
"predictionsEndDate": {
"description": "Used for historical predictions in order to override date to which predictions should be calculated. By default value will be inferred automatically from the dataset.",
"format": "date-time",
"type": "string"
},
"predictionsStartDate": {
"description": "Used for historical predictions in order to override date from which predictions should be calculated. By default value will be inferred automatically from the dataset.",
"format": "date-time",
"type": "string"
},
"relaxKnownInAdvanceFeaturesCheck": {
"default": false,
"description": "If activated, missing values in the known in advance features are allowed in the forecast window at prediction time. If omitted or false, missing values are not allowed.",
"type": "boolean"
},
"type": {
"description": "Historical mode enables bulk predictions which calculates predictions for all possible forecast points and forecast distances in the dataset within the predictionsStartDate/predictionsEndDate range.",
"enum": [
"historical"
],
"type": "string"
}
},
"required": [
"type"
],
"type": "object"
},
{
"properties": {
"relaxKnownInAdvanceFeaturesCheck": {
"default": false,
"description": "If activated, missing values in the known in advance features are allowed in the forecast window at prediction time. If omitted or false, missing values are not allowed.",
"type": "boolean"
},
"type": {
"description": "Forecast mode used for making predictions on subsets of training data.",
"enum": [
"training"
],
"type": "string"
}
},
"required": [
"type"
],
"type": "object"
}
],
"x-versionadded": "v2.20"
}
},
"required": [
"abortOnError",
"csvSettings",
"disableRowLevelErrorHandling",
"includePredictionStatus",
"includeProbabilities",
"includeProbabilitiesClasses",
"intakeSettings",
"maxExplanations",
"skipDriftTracking"
],
"type": "object"
}
Parameters
Example responses
202 Response
{
"properties": {
"batchPredictionJobDefinition": {
"description": "The Batch Prediction Job Definition linking to this job, if any.",
"properties": {
"createdBy": {
"description": "The ID of creator of this job definition",
"type": "string"
},
"id": {
"description": "The ID of the Batch Prediction job definition",
"type": "string"
},
"name": {
"description": "A human-readable name for the definition, must be unique across organisations",
"type": "string"
}
},
"required": [
"createdBy",
"id",
"name"
],
"type": "object"
},
"created": {
"description": "When was this job created",
"format": "date-time",
"type": "string",
"x-versionadded": "v2.21"
},
"createdBy": {
"description": "Who created this job",
"properties": {
"fullName": {
"description": "The full name of the user who created this job (if defined by the user)",
"type": [
"string",
"null"
]
},
"userId": {
"description": "The User ID of the user who created this job",
"type": "string"
},
"username": {
"description": "The username (e-mail address) of the user who created this job",
"type": "string"
}
},
"required": [
"fullName",
"userId",
"username"
],
"type": "object"
},
"elapsedTimeSec": {
"description": "Number of seconds the job has been processing for",
"minimum": 0,
"type": "integer"
},
"failedRows": {
"description": "Number of rows that have failed scoring",
"minimum": 0,
"type": "integer"
},
"hidden": {
"description": "When was this job was hidden last, blank if visible",
"format": "date-time",
"type": "string",
"x-versionadded": "v2.24"
},
"id": {
"description": "The ID of the Batch Prediction job",
"type": "string",
"x-versionadded": "v2.21"
},
"intakeDatasetDisplayName": {
"description": "If applicable (e.g. for AI catalog), will contain the dataset name used for the intake dataset.",
"type": [
"string",
"null"
],
"x-versionadded": "v2.23"
},
"jobIntakeSize": {
"description": "Number of bytes in the intake dataset for this job",
"minimum": 0,
"type": [
"integer",
"null"
]
},
"jobOutputSize": {
"description": "Number of bytes in the output dataset for this job",
"minimum": 0,
"type": [
"integer",
"null"
]
},
"jobSpec": {
"description": "The job configuration used to create this job",
"properties": {
"abortOnError": {
"default": true,
"description": "Should this job abort if too many errors are encountered",
"type": "boolean"
},
"batchJobType": {
"default": "prediction",
"description": "Batch job type.",
"enum": [
"monitoring",
"prediction"
],
"type": "string",
"x-versionadded": "v2.35"
},
"chunkSize": {
"default": "auto",
"description": "Which strategy should be used to determine the chunk size. Can be either a named strategy or a fixed size in bytes.",
"oneOf": [
{
"enum": [
"auto",
"fixed",
"dynamic"
],
"type": "string"
},
{
"maximum": 41943040,
"minimum": 20,
"type": "integer"
}
]
},
"columnNamesRemapping": {
"description": "Remap (rename or remove columns from) the output from this job",
"oneOf": [
{
"description": "Provide a dictionary with key/value pairs to remap (deprecated)",
"type": "object"
},
{
"description": "Provide a list of items to remap",
"items": {
"properties": {
"inputName": {
"description": "Rename column with this name",
"type": "string"
},
"outputName": {
"description": "Rename column to this name (leave as null to remove from the output)",
"type": [
"string",
"null"
]
}
},
"required": [
"inputName",
"outputName"
],
"type": "object"
},
"maxItems": 1000,
"type": "array"
}
]
},
"csvSettings": {
"description": "The CSV settings used for this job",
"properties": {
"delimiter": {
"default": ",",
"description": "CSV fields are delimited by this character. Use the string \"tab\" to denote TSV (TAB separated values).",
"oneOf": [
{
"enum": [
"tab"
],
"type": "string"
},
{
"maxLength": 1,
"minLength": 1,
"type": "string"
}
]
},
"encoding": {
"default": "utf-8",
"description": "The encoding to be used for intake and output. For example (but not limited to): \"shift_jis\", \"latin_1\" or \"mskanji\".",
"type": "string"
},
"quotechar": {
"default": "\"",
"description": "Fields containing the delimiter or newlines must be quoted using this character.",
"maxLength": 1,
"minLength": 1,
"type": "string"
}
},
"required": [
"delimiter",
"encoding",
"quotechar"
],
"type": "object"
},
"deploymentId": {
"description": "ID of deployment which is used in job for processing predictions dataset",
"type": "string"
},
"disableRowLevelErrorHandling": {
"default": false,
"description": "Skip row by row error handling",
"type": "boolean"
},
"explanationAlgorithm": {
"description": "Which algorithm will be used to calculate prediction explanations",
"enum": [
"shap",
"xemp"
],
"type": "string"
},
"explanationClassNames": {
"description": "List of class names that will be explained for each row for multiclass. Mutually exclusive with explanationNumTopClasses. If neither specified - we assume explanationNumTopClasses=1",
"items": {
"description": "Class name to explain",
"type": "string"
},
"maxItems": 10,
"minItems": 1,
"type": "array",
"x-versionadded": "v2.29"
},
"explanationNumTopClasses": {
"description": "Number of top predicted classes for each row that will be explained for multiclass. Mutually exclusive with explanationClassNames. If neither specified - we assume explanationNumTopClasses=1",
"maximum": 10,
"minimum": 1,
"type": "integer",
"x-versionadded": "v2.29"
},
"includePredictionStatus": {
"default": false,
"description": "Include prediction status column in the output",
"type": "boolean"
},
"includeProbabilities": {
"default": true,
"description": "Include probabilities for all classes",
"type": "boolean"
},
"includeProbabilitiesClasses": {
"default": [],
"description": "Include only probabilities for these specific class names.",
"items": {
"description": "Include probability for this class name",
"type": "string"
},
"maxItems": 100,
"type": "array"
},
"intakeSettings": {
"default": {
"type": "localFile"
},
"description": "The response option configured for this job",
"oneOf": [
{
"description": "Stream CSV data chunks from Azure",
"properties": {
"credentialId": {
"description": "Either the populated value of the field or [redacted] due to permission settings",
"oneOf": [
{
"description": "Use the specified credential to access the url",
"type": [
"string",
"null"
]
},
{
"enum": [
"[redacted]"
],
"type": "string"
}
]
},
"format": {
"default": "csv",
"description": "Type of input file format",
"enum": [
"csv",
"parquet"
],
"type": "string",
"x-versionadded": "v2.25"
},
"type": {
"description": "Type name for this intake type",
"enum": [
"azure"
],
"type": "string"
},
"url": {
"description": "URL for the CSV file",
"format": "url",
"type": "string"
}
},
"required": [
"type",
"url"
],
"type": "object"
},
{
"description": "Stream CSV data chunks from data stage storage",
"properties": {
"dataStageId": {
"description": "The ID of the data stage",
"type": "string"
},
"type": {
"description": "Type name for this intake type",
"enum": [
"dataStage"
],
"type": "string"
}
},
"required": [
"dataStageId",
"type"
],
"type": "object"
},
{
"description": "Stream CSV data chunks from AI catalog dataset",
"properties": {
"datasetId": {
"description": "Either the populated value of the field or [redacted] due to permission settings",
"oneOf": [
{
"description": "The ID of the AI catalog dataset",
"type": "string"
},
{
"enum": [
"[redacted]"
],
"type": "string"
}
]
},
"datasetVersionId": {
"description": "The ID of the AI catalog dataset version",
"type": "string"
},
"type": {
"description": "Type name for this intake type",
"enum": [
"dataset"
],
"type": "string"
}
},
"required": [
"datasetId",
"type"
],
"type": "object"
},
{
"description": "Stream CSV data chunks from Google Storage",
"properties": {
"credentialId": {
"description": "Either the populated value of the field or [redacted] due to permission settings",
"oneOf": [
{
"description": "Use the specified credential to access the url",
"type": [
"string",
"null"
]
},
{
"enum": [
"[redacted]"
],
"type": "string"
}
]
},
"format": {
"default": "csv",
"description": "Type of input file format",
"enum": [
"csv",
"parquet"
],
"type": "string",
"x-versionadded": "v2.25"
},
"type": {
"description": "Type name for this intake type",
"enum": [
"gcp"
],
"type": "string"
},
"url": {
"description": "URL for the CSV file",
"format": "url",
"type": "string"
}
},
"required": [
"type",
"url"
],
"type": "object"
},
{
"description": "Stream CSV data chunks from Big Query using GCS",
"properties": {
"bucket": {
"description": "The name of gcs bucket for data export",
"type": "string"
},
"credentialId": {
"description": "Either the populated value of the field or [redacted] due to permission settings",
"oneOf": [
{
"description": "The ID of the GCP credentials",
"type": "string"
},
{
"enum": [
"[redacted]"
],
"type": "string"
}
]
},
"dataset": {
"description": "The name of the specified big query dataset to read input data from",
"type": "string"
},
"table": {
"description": "The name of the specified big query table to read input data from",
"type": "string"
},
"type": {
"description": "Type name for this intake type",
"enum": [
"bigquery"
],
"type": "string"
}
},
"required": [
"bucket",
"credentialId",
"dataset",
"table",
"type"
],
"type": "object"
},
{
"description": "Stream CSV data chunks from Amazon Cloud Storage S3",
"properties": {
"credentialId": {
"description": "Either the populated value of the field or [redacted] due to permission settings",
"oneOf": [
{
"description": "Use the specified credential to access the url",
"type": [
"string",
"null"
]
},
{
"enum": [
"[redacted]"
],
"type": "string"
}
]
},
"endpointUrl": {
"description": "Endpoint URL for the S3 connection (omit to use the default)",
"format": "url",
"type": "string",
"x-versionadded": "v2.29"
},
"format": {
"default": "csv",
"description": "Type of input file format",
"enum": [
"csv",
"parquet"
],
"type": "string",
"x-versionadded": "v2.25"
},
"type": {
"description": "Type name for this intake type",
"enum": [
"s3"
],
"type": "string"
},
"url": {
"description": "URL for the CSV file",
"format": "url",
"type": "string"
}
},
"required": [
"type",
"url"
],
"type": "object"
},
{
"description": "Stream CSV data chunks from Snowflake",
"properties": {
"catalog": {
"description": "The name of the specified database catalog to read input data from.",
"type": "string",
"x-versionadded": "v2.28"
},
"cloudStorageCredentialId": {
"description": "Either the populated value of the field or [redacted] due to permission settings",
"oneOf": [
{
"description": "The ID of the credential holding information about a user with read access to the cloud storage.",
"type": [
"string",
"null"
]
},
{
"enum": [
"[redacted]"
],
"type": "string"
}
]
},
"cloudStorageType": {
"default": "s3",
"description": "Type name for cloud storage",
"enum": [
"azure",
"gcp",
"s3"
],
"type": "string"
},
"credentialId": {
"description": "Either the populated value of the field or [redacted] due to permission settings",
"oneOf": [
{
"description": "The ID of the credential holding information about a user with read access to the Snowflake data source.",
"type": [
"string",
"null"
]
},
{
"enum": [
"[redacted]"
],
"type": "string"
}
]
},
"dataStoreId": {
"description": "Either the populated value of the field or [redacted] due to permission settings",
"oneOf": [
{
"description": "ID of the data store to connect to",
"type": "string"
},
{
"enum": [
"[redacted]"
],
"type": "string"
}
]
},
"externalStage": {
"description": "External storage",
"type": "string"
},
"query": {
"description": "A self-supplied SELECT statement of the dataset you wish to score. Helpful for supplying a more fine-grained selection of data not achievable through specification of \"table\" and/or \"schema\" parameters exclusively.If this job is executed with a job definition, then template variables are available which will be substituted for timestamps: {{ current_run_timestamp }}, {{ last_completed_run_time }}, {{ last_scheduled_run_time }}, {{ next_scheduled_run_time }}, {{ current_run_time }}",
"type": "string"
},
"schema": {
"description": "The name of the specified database schema to read input data from.",
"type": "string"
},
"table": {
"description": "The name of the specified database table to read input data from.",
"type": "string"
},
"type": {
"description": "Type name for this intake type",
"enum": [
"snowflake"
],
"type": "string"
}
},
"required": [
"dataStoreId",
"externalStage",
"type"
],
"type": "object"
},
{
"description": "Stream CSV data chunks from Azure Synapse",
"properties": {
"cloudStorageCredentialId": {
"description": "Either the populated value of the field or [redacted] due to permission settings",
"oneOf": [
{
"description": "The ID of the Azure credential holding information about a user with read access to the cloud storage.",
"type": [
"string",
"null"
]
},
{
"enum": [
"[redacted]"
],
"type": "string"
}
]
},
"credentialId": {
"description": "Either the populated value of the field or [redacted] due to permission settings",
"oneOf": [
{
"description": "The ID of the credential holding information about a user with read access to the JDBC data source.",
"type": [
"string",
"null"
]
},
{
"enum": [
"[redacted]"
],
"type": "string"
}
]
},
"dataStoreId": {
"description": "Either the populated value of the field or [redacted] due to permission settings",
"oneOf": [
{
"description": "ID of the data store to connect to",
"type": "string"
},
{
"enum": [
"[redacted]"
],
"type": "string"
}
]
},
"externalDataSource": {
"description": "External datasource name",
"type": "string"
},
"query": {
"description": "A self-supplied SELECT statement of the dataset you wish to score. Helpful for supplying a more fine-grained selection of data not achievable through specification of \"table\" and/or \"schema\" parameters exclusively.If this job is executed with a job definition, then template variables are available which will be substituted for timestamps: {{ current_run_timestamp }}, {{ last_completed_run_time }}, {{ last_scheduled_run_time }}, {{ next_scheduled_run_time }}, {{ current_run_time }}",
"type": "string"
},
"schema": {
"description": "The name of the specified database schema to read input data from.",
"type": "string"
},
"table": {
"description": "The name of the specified database table to read input data from.",
"type": "string"
},
"type": {
"description": "Type name for this intake type",
"enum": [
"synapse"
],
"type": "string"
}
},
"required": [
"dataStoreId",
"externalDataSource",
"type"
],
"type": "object"
},
{
"description": "Stream CSV data chunks from DSS dataset",
"properties": {
"datasetId": {
"description": "Either the populated value of the field or [redacted] due to permission settings",
"oneOf": [
{
"description": "The ID of the dataset",
"type": "string"
},
{
"enum": [
"[redacted]"
],
"type": "string"
}
]
},
"partition": {
"default": null,
"description": "Partition used to predict",
"enum": [
"holdout",
"validation",
"allBacktests",
null
],
"type": [
"string",
"null"
]
},
"projectId": {
"description": "The ID of the project",
"type": "string"
},
"type": {
"description": "Type name for this intake type",
"enum": [
"dss"
],
"type": "string"
}
},
"required": [
"projectId",
"type"
],
"type": "object"
},
{
"properties": {
"path": {
"description": "Path to data on host filesystem",
"type": "string"
},
"type": {
"description": "Type name for this intake type",
"enum": [
"filesystem"
],
"type": "string"
}
},
"required": [
"path",
"type"
],
"type": "object"
},
{
"description": "Stream CSV data chunks from HTTP",
"properties": {
"type": {
"description": "Type name for this intake type",
"enum": [
"http"
],
"type": "string"
},
"url": {
"description": "URL for the CSV file",
"format": "url",
"type": "string"
}
},
"required": [
"type",
"url"
],
"type": "object"
},
{
"description": "Stream CSV data chunks from JDBC",
"properties": {
"catalog": {
"description": "The name of the specified database catalog to read input data from.",
"type": "string",
"x-versionadded": "v2.22"
},
"credentialId": {
"description": "Either the populated value of the field or [redacted] due to permission settings",
"oneOf": [
{
"description": "The ID of the credential holding information about a user with read access to the JDBC data source.",
"type": [
"string",
"null"
]
},
{
"enum": [
"[redacted]"
],
"type": "string"
}
]
},
"dataStoreId": {
"description": "Either the populated value of the field or [redacted] due to permission settings",
"oneOf": [
{
"description": "ID of the data store to connect to",
"type": "string"
},
{
"enum": [
"[redacted]"
],
"type": "string"
}
]
},
"fetchSize": {
"description": "A user specified fetch size. Changing it can be used to balance throughput and memory usage. Deprecated and ignored since v2.21.",
"maximum": 1000000,
"minimum": 1,
"type": "integer"
},
"query": {
"description": "A self-supplied SELECT statement of the dataset you wish to score. Helpful for supplying a more fine-grained selection of data not achievable through specification of \"table\" and/or \"schema\" parameters exclusively.If this job is executed with a job definition, then template variables are available which will be substituted for timestamps: {{ current_run_timestamp }}, {{ last_completed_run_time }}, {{ last_scheduled_run_time }}, {{ next_scheduled_run_time }}, {{ current_run_time }}",
"type": "string"
},
"schema": {
"description": "The name of the specified database schema to read input data from.",
"type": "string"
},
"table": {
"description": "The name of the specified database table to read input data from.",
"type": "string"
},
"type": {
"description": "Type name for this intake type",
"enum": [
"jdbc"
],
"type": "string"
}
},
"required": [
"dataStoreId",
"type"
],
"type": "object"
},
{
"description": "Stream CSV data chunks from local file storage",
"properties": {
"async": {
"description": "The default behavior (async: true) will still submit the job to the queue and start processing as soon as the upload is started.Setting it to false will postpone submitting the job to the queue until all data has been uploaded.This is helpful if the user is on a bad connection and bottlednecked by the upload speed. Instead of blocking the queue this will allow others to submit to the queue until the upload has finished.",
"type": [
"boolean",
"null"
],
"x-versionadded": "v2.28"
},
"multipart": {
"description": "specify if the data will be uploaded in multiple parts instead of a single file",
"type": "boolean",
"x-versionadded": "v2.27"
},
"type": {
"description": "Type name for this intake type",
"enum": [
"local_file",
"localFile"
],
"type": "string"
}
},
"required": [
"type"
],
"type": "object"
},
{
"description": "Stream CSV data chunks from Datasphere using browser-datasphere",
"properties": {
"catalog": {
"description": "The name of the specified database catalog to read input data from.",
"type": "string"
},
"credentialId": {
"description": "Either the populated value of the field or [redacted] due to permission settings",
"oneOf": [
{
"description": "The ID of the credential holding information about a user with read access to the data source.",
"type": "string"
},
{
"enum": [
"[redacted]"
],
"type": "string"
}
]
},
"dataStoreId": {
"description": "Either the populated value of the field or [redacted] due to permission settings",
"oneOf": [
{
"description": "ID of the data store to connect to",
"type": "string"
},
{
"enum": [
"[redacted]"
],
"type": "string"
}
]
},
"schema": {
"description": "The name of the specified database schema to read input data from.",
"type": "string"
},
"table": {
"description": "The name of the specified database table to read input data from.",
"type": "string"
},
"type": {
"description": "Type name for this intake type",
"enum": [
"datasphere"
],
"type": "string"
}
},
"required": [
"credentialId",
"dataStoreId",
"schema",
"table",
"type"
],
"type": "object",
"x-versionadded": "v2.35"
}
]
},
"maxExplanations": {
"default": 0,
"description": "Number of explanations requested. Will be ordered by strength.",
"maximum": 100,
"minimum": 0,
"type": "integer"
},
"modelId": {
"description": "ID of leaderboard model which is used in job for processing predictions dataset",
"type": "string",
"x-versionadded": "v2.28"
},
"modelPackageId": {
"description": "ID of model package from registry is used in job for processing predictions dataset",
"type": "string",
"x-versionadded": "v2.28"
},
"monitoringBatchPrefix": {
"description": "Name of the batch to create with this job",
"type": [
"string",
"null"
]
},
"numConcurrent": {
"description": "Number of simultaneous requests to run against the prediction instance",
"minimum": 1,
"type": "integer"
},
"outputSettings": {
"description": "The response option configured for this job",
"oneOf": [
{
"description": "Save CSV data chunks to Azure Blob Storage",
"properties": {
"credentialId": {
"description": "Either the populated value of the field or [redacted] due to permission settings",
"oneOf": [
{
"description": "Use the specified credential to access the url",
"type": [
"string",
"null"
]
},
{
"enum": [
"[redacted]"
],
"type": "string"
}
]
},
"format": {
"default": "csv",
"description": "Type of output file format",
"enum": [
"csv",
"parquet"
],
"type": "string",
"x-versionadded": "v2.25"
},
"partitionColumns": {
"description": "For Parquet directory-scoring only. The column names of the intake data of which to partition the dataset. Columns are partitioned in the order they are given. At least one value is required if scoring to a directory (meaning the output url ends with a slash (\"/\").",
"items": {
"type": "string"
},
"maxItems": 100,
"type": "array",
"x-versionadded": "v2.26"
},
"type": {
"description": "Type name for this output type",
"enum": [
"azure"
],
"type": "string"
},
"url": {
"description": "URL for the file or directory",
"format": "url",
"type": "string"
}
},
"required": [
"type",
"url"
],
"type": "object"
},
{
"description": "Save CSV data chunks to Google Storage",
"properties": {
"credentialId": {
"description": "Either the populated value of the field or [redacted] due to permission settings",
"oneOf": [
{
"description": "Use the specified credential to access the url",
"type": [
"string",
"null"
]
},
{
"enum": [
"[redacted]"
],
"type": "string"
}
]
},
"format": {
"default": "csv",
"description": "Type of input file format",
"enum": [
"csv",
"parquet"
],
"type": "string",
"x-versionadded": "v2.25"
},
"partitionColumns": {
"description": "For Parquet directory-scoring only. The column names of the intake data of which to partition the dataset. Columns are partitioned in the order they are given. At least one value is required if scoring to a directory (meaning the output url ends with a slash (\"/\").",
"items": {
"type": "string"
},
"maxItems": 100,
"type": "array",
"x-versionadded": "v2.26"
},
"type": {
"description": "Type name for this output type",
"enum": [
"gcp"
],
"type": "string"
},
"url": {
"description": "URL for the CSV file",
"format": "url",
"type": "string"
}
},
"required": [
"type",
"url"
],
"type": "object"
},
{
"description": "Save CSV data chunks to Google BigQuery in bulk",
"properties": {
"bucket": {
"description": "The name of gcs bucket for data loading",
"type": "string"
},
"credentialId": {
"description": "Either the populated value of the field or [redacted] due to permission settings",
"oneOf": [
{
"description": "The ID of the GCP credentials",
"type": "string"
},
{
"enum": [
"[redacted]"
],
"type": "string"
}
]
},
"dataset": {
"description": "The name of the specified big query dataset to write data back",
"type": "string"
},
"table": {
"description": "The name of the specified big query table to write data back",
"type": "string"
},
"type": {
"description": "Type name for this output type",
"enum": [
"bigquery"
],
"type": "string"
}
},
"required": [
"bucket",
"credentialId",
"dataset",
"table",
"type"
],
"type": "object"
},
{
"description": "Saves CSV data chunks to Amazon Cloud Storage S3",
"properties": {
"credentialId": {
"description": "Either the populated value of the field or [redacted] due to permission settings",
"oneOf": [
{
"description": "Use the specified credential to access the url",
"type": [
"string",
"null"
]
},
{
"enum": [
"[redacted]"
],
"type": "string"
}
]
},
"endpointUrl": {
"description": "Endpoint URL for the S3 connection (omit to use the default)",
"format": "url",
"type": "string",
"x-versionadded": "v2.29"
},
"format": {
"default": "csv",
"description": "Type of output file format",
"enum": [
"csv",
"parquet"
],
"type": "string",
"x-versionadded": "v2.25"
},
"partitionColumns": {
"description": "For Parquet directory-scoring only. The column names of the intake data of which to partition the dataset. Columns are partitioned in the order they are given. At least one value is required if scoring to a directory (meaning the output url ends with a slash (\"/\").",
"items": {
"type": "string"
},
"maxItems": 100,
"type": "array",
"x-versionadded": "v2.26"
},
"serverSideEncryption": {
"description": "Configure Server-Side Encryption for S3 output",
"properties": {
"algorithm": {
"description": "The server-side encryption algorithm used when storing this object in Amazon S3 (for example, AES256, aws:kms).",
"type": "string"
},
"customerAlgorithm": {
"description": "Specifies the algorithm to use to when encrypting the object (for example, AES256).",
"type": "string"
},
"customerKey": {
"description": "Specifies the customer-provided encryption key for Amazon S3 to use in encrypting data. This value is used to store the object and then it is discarded; Amazon S3 does not store the encryption key. The key must be appropriate for use with the algorithm specified in customerAlgorithm. The key must be sent as an base64 encoded string.",
"type": "string"
},
"kmsEncryptionContext": {
"description": "Specifies the Amazon Web Services KMS Encryption Context to use for object encryption. The value of this header is a base64-encoded UTF-8 string holding JSON with the encryption context key-value pairs.",
"type": "string"
},
"kmsKeyId": {
"description": "Specifies the ID of the symmetric customer managed key to use for object encryption.",
"type": "string"
}
},
"type": "object"
},
"type": {
"description": "Type name for this output type",
"enum": [
"s3"
],
"type": "string"
},
"url": {
"description": "URL for the CSV file",
"format": "url",
"type": "string"
}
},
"required": [
"type",
"url"
],
"type": "object"
},
{
"description": "Save CSV data chunks to Snowflake in bulk",
"properties": {
"catalog": {
"description": "The name of the specified database catalog to write output data to.",
"type": "string",
"x-versionadded": "v2.28"
},
"cloudStorageCredentialId": {
"description": "Either the populated value of the field or [redacted] due to permission settings",
"oneOf": [
{
"description": "The ID of the credential holding information about a user with write access to the cloud storage.",
"type": [
"string",
"null"
]
},
{
"enum": [
"[redacted]"
],
"type": "string"
}
]
},
"cloudStorageType": {
"default": "s3",
"description": "Type name for cloud storage",
"enum": [
"azure",
"gcp",
"s3"
],
"type": "string"
},
"createTableIfNotExists": {
"default": false,
"description": "Attempt to create the table first if no existing one is detected, before writing data with the strategy defined in the `statementType` parameter.",
"type": "boolean",
"x-versionadded": "v2.25"
},
"credentialId": {
"description": "Either the populated value of the field or [redacted] due to permission settings",
"oneOf": [
{
"description": "The ID of the credential holding information about a user with write access to the Snowflake data source.",
"type": [
"string",
"null"
]
},
{
"enum": [
"[redacted]"
],
"type": "string"
}
]
},
"dataStoreId": {
"description": "Either the populated value of the field or [redacted] due to permission settings",
"oneOf": [
{
"description": "ID of the data store to connect to",
"type": "string"
},
{
"enum": [
"[redacted]"
],
"type": "string"
}
]
},
"externalStage": {
"description": "External storage",
"type": "string"
},
"schema": {
"description": "The name of the specified database schema to write results to.",
"type": "string"
},
"statementType": {
"description": "The statement type to use when writing the results.",
"enum": [
"insert",
"create_table",
"createTable"
],
"type": "string"
},
"table": {
"description": "The name of the specified database table to write results to.",
"type": "string"
},
"type": {
"description": "Type name for this output type",
"enum": [
"snowflake"
],
"type": "string"
}
},
"required": [
"dataStoreId",
"externalStage",
"statementType",
"table",
"type"
],
"type": "object"
},
{
"description": "Save CSV data chunks to Azure Synapse in bulk",
"properties": {
"cloudStorageCredentialId": {
"description": "Either the populated value of the field or [redacted] due to permission settings",
"oneOf": [
{
"description": "The ID of the credential holding information about a user with write access to the cloud storage.",
"type": [
"string",
"null"
]
},
{
"enum": [
"[redacted]"
],
"type": "string"
}
]
},
"createTableIfNotExists": {
"default": false,
"description": "Attempt to create the table first if no existing one is detected, before writing data with the strategy defined in the `statementType` parameter.",
"type": "boolean",
"x-versionadded": "v2.25"
},
"credentialId": {
"description": "Either the populated value of the field or [redacted] due to permission settings",
"oneOf": [
{
"description": "The ID of the credential holding information about a user with write access to the JDBC data source.",
"type": [
"string",
"null"
]
},
{
"enum": [
"[redacted]"
],
"type": "string"
}
]
},
"dataStoreId": {
"description": "Either the populated value of the field or [redacted] due to permission settings",
"oneOf": [
{
"description": "ID of the data store to connect to",
"type": "string"
},
{
"enum": [
"[redacted]"
],
"type": "string"
}
]
},
"externalDataSource": {
"description": "External data source name",
"type": "string"
},
"schema": {
"description": "The name of the specified database schema to write results to.",
"type": "string"
},
"statementType": {
"description": "The statement type to use when writing the results.",
"enum": [
"insert",
"create_table",
"createTable"
],
"type": "string"
},
"table": {
"description": "The name of the specified database table to write results to.",
"type": "string"
},
"type": {
"description": "Type name for this output type",
"enum": [
"synapse"
],
"type": "string"
}
},
"required": [
"dataStoreId",
"externalDataSource",
"statementType",
"table",
"type"
],
"type": "object"
},
{
"properties": {
"path": {
"description": "Path to results on host filesystem",
"type": "string"
},
"type": {
"description": "Type name for this output type",
"enum": [
"filesystem"
],
"type": "string"
}
},
"required": [
"path",
"type"
],
"type": "object"
},
{
"description": "Save CSV data chunks to HTTP data endpoint",
"properties": {
"headers": {
"description": "Extra headers to send with the request",
"type": "object"
},
"method": {
"description": "Method to use when saving the CSV file",
"enum": [
"POST",
"PUT"
],
"type": "string"
},
"type": {
"description": "Type name for this output type",
"enum": [
"http"
],
"type": "string"
},
"url": {
"description": "URL for the CSV file",
"format": "url",
"type": "string"
}
},
"required": [
"method",
"type",
"url"
],
"type": "object"
},
{
"description": "Save CSV data chunks via JDBC",
"properties": {
"catalog": {
"description": "The name of the specified database catalog to write output data to.",
"type": "string",
"x-versionadded": "v2.22"
},
"commitInterval": {
"default": 600,
"description": "Defines a time interval in seconds between each commit is done to the JDBC source. If set to 0, the batch prediction operation will write the entire job before committing.",
"maximum": 86400,
"minimum": 0,
"type": "integer",
"x-versionadded": "v2.21"
},
"createTableIfNotExists": {
"default": false,
"description": "Attempt to create the table first if no existing one is detected, before writing data with the strategy defined in the `statementType` parameter.",
"type": "boolean",
"x-versionadded": "v2.24"
},
"credentialId": {
"description": "Either the populated value of the field or [redacted] due to permission settings",
"oneOf": [
{
"description": "The ID of the credential holding information about a user with write access to the JDBC data source.",
"type": [
"string",
"null"
]
},
{
"enum": [
"[redacted]"
],
"type": "string"
}
]
},
"dataStoreId": {
"description": "Either the populated value of the field or [redacted] due to permission settings",
"oneOf": [
{
"description": "ID of the data store to connect to",
"type": "string"
},
{
"enum": [
"[redacted]"
],
"type": "string"
}
]
},
"schema": {
"description": "The name of the specified database schema to write the results to.",
"type": "string"
},
"statementType": {
"description": "The statement type to use when writing the results. Deprecation Warning: Use of `create_table` is now discouraged. Use one of the other possibilities along with the parameter `createTableIfNotExists` set to `true`.",
"enum": [
"createTable",
"create_table",
"insert",
"insertUpdate",
"insert_update",
"update"
],
"type": "string"
},
"table": {
"description": "The name of the specified database table to write the results to.If this job is executed with a job definition, then template variables are available which will be substituted for timestamps: {{ current_run_timestamp }}, {{ last_completed_run_time }}, {{ last_scheduled_run_time }}, {{ next_scheduled_run_time }}, {{ current_run_time }}",
"type": "string"
},
"type": {
"description": "Type name for this intake type",
"enum": [
"jdbc"
],
"type": "string"
},
"updateColumns": {
"description": "The column names to be updated if statementType is set to either update or upsert.",
"items": {
"type": "string"
},
"maxItems": 100,
"type": "array"
},
"whereColumns": {
"description": "The column names to be used in the where clause if statementType is set to update or upsert.",
"items": {
"type": "string"
},
"maxItems": 100,
"type": "array"
}
},
"required": [
"dataStoreId",
"statementType",
"table",
"type"
],
"type": "object"
},
{
"description": "Save CSV data chunks to local file storage",
"properties": {
"type": {
"description": "Type name for this output type",
"enum": [
"local_file",
"localFile"
],
"type": "string"
}
},
"required": [
"type"
],
"type": "object"
},
{
"description": "Saves CSV data chunks to Datasphere using browser-datasphere",
"properties": {
"catalog": {
"description": "The name of the specified database catalog to read input data from.",
"type": "string"
},
"credentialId": {
"description": "Either the populated value of the field or [redacted] due to permission settings",
"oneOf": [
{
"description": "The ID of the credential holding information about a user with write access to the data destination.",
"type": "string"
},
{
"enum": [
"[redacted]"
],
"type": "string"
}
]
},
"dataStoreId": {
"description": "Either the populated value of the field or [redacted] due to permission settings",
"oneOf": [
{
"description": "The ID of the data store to connect to",
"type": "string"
},
{
"enum": [
"[redacted]"
],
"type": "string"
}
]
},
"schema": {
"description": "The name of the specified database schema to read input data from.",
"type": "string"
},
"table": {
"description": "The name of the specified database table to read input data from.",
"type": "string"
},
"type": {
"description": "The type name for this output type",
"enum": [
"datasphere"
],
"type": "string"
}
},
"required": [
"credentialId",
"dataStoreId",
"schema",
"table",
"type"
],
"type": "object",
"x-versionadded": "v2.35"
}
]
},
"passthroughColumns": {
"description": "Pass through columns from the original dataset",
"items": {
"description": "A column name from the original dataset to pass through to the resulting predictions",
"maxLength": 50,
"minLength": 1,
"type": "string"
},
"maxItems": 100,
"type": "array"
},
"passthroughColumnsSet": {
"description": "Pass through all columns from the original dataset",
"enum": [
"all"
],
"type": "string"
},
"pinnedModelId": {
"description": "Specify a model ID used for scoring",
"type": "string"
},
"predictionInstance": {
"description": "Override the default prediction instance from the deployment when scoring this job.",
"properties": {
"apiKey": {
"description": "By default, prediction requests will use the API key of the user that created the job. This allows you to make requests on behalf of other users.",
"type": "string"
},
"datarobotKey": {
"description": "If running a job against a prediction instance in the Managed AI Cloud, you must provide the organization level DataRobot-Key.",
"type": "string"
},
"hostName": {
"description": "Override the default host name of the deployment with this.",
"type": "string"
},
"sslEnabled": {
"default": true,
"description": "Use SSL (HTTPS) when communicating with the overriden prediction server.",
"type": "boolean"
}
},
"required": [
"hostName",
"sslEnabled"
],
"type": "object"
},
"predictionThreshold": {
"description": "Threshold is the point that sets the class boundary for a predicted value. The model classifies an observation below the threshold as FALSE, and an observation above the threshold as TRUE. In other words, DataRobot automatically assigns the positive class label to any prediction exceeding the threshold. This value can be set between 0.0 and 1.0.",
"maximum": 1,
"minimum": 0,
"type": "number"
},
"predictionWarningEnabled": {
"description": "Enable prediction warnings.",
"type": [
"boolean",
"null"
]
},
"redactedFields": {
"description": "A list of qualified field names from intake- and/or outputSettings that was redacted due to permissions and sharing settings. For example: intakeSettings.dataStoreId",
"items": {
"description": "Field names that are potentially redacted",
"type": "string"
},
"type": "array",
"x-versionadded": "v2.30"
},
"secondaryDatasetsConfigId": {
"description": "Configuration id for secondary datasets to use when making a prediction.",
"type": "string",
"x-versionadded": "v2.33"
},
"skipDriftTracking": {
"default": false,
"description": "Skip drift tracking for this job.",
"type": "boolean"
},
"thresholdHigh": {
"description": "Compute explanations for predictions above this threshold",
"type": "number"
},
"thresholdLow": {
"description": "Compute explanations for predictions below this threshold",
"type": "number"
},
"timeseriesSettings": {
"description": "Time Series settings included of this job is a Time Series job.",
"oneOf": [
{
"properties": {
"forecastPoint": {
"description": "Used for forecast predictions in order to override the inferred forecast point from the dataset.",
"format": "date-time",
"type": "string"
},
"relaxKnownInAdvanceFeaturesCheck": {
"default": false,
"description": "If activated, missing values in the known in advance features are allowed in the forecast window at prediction time. If omitted or false, missing values are not allowed.",
"type": "boolean"
},
"type": {
"description": "Forecast mode makes predictions using forecastPoint or rows in the dataset without target.",
"enum": [
"forecast"
],
"type": "string"
}
},
"required": [
"type"
],
"type": "object"
},
{
"properties": {
"predictionsEndDate": {
"description": "Used for historical predictions in order to override date to which predictions should be calculated. By default value will be inferred automatically from the dataset.",
"format": "date-time",
"type": "string"
},
"predictionsStartDate": {
"description": "Used for historical predictions in order to override date from which predictions should be calculated. By default value will be inferred automatically from the dataset.",
"format": "date-time",
"type": "string"
},
"relaxKnownInAdvanceFeaturesCheck": {
"default": false,
"description": "If activated, missing values in the known in advance features are allowed in the forecast window at prediction time. If omitted or false, missing values are not allowed.",
"type": "boolean"
},
"type": {
"description": "Historical mode enables bulk predictions which calculates predictions for all possible forecast points and forecast distances in the dataset within the predictionsStartDate/predictionsEndDate range.",
"enum": [
"historical"
],
"type": "string"
}
},
"required": [
"type"
],
"type": "object"
},
{
"properties": {
"relaxKnownInAdvanceFeaturesCheck": {
"default": false,
"description": "If activated, missing values in the known in advance features are allowed in the forecast window at prediction time. If omitted or false, missing values are not allowed.",
"type": "boolean"
},
"type": {
"description": "Forecast mode used for making predictions on subsets of training data.",
"enum": [
"training"
],
"type": "string"
}
},
"required": [
"type"
],
"type": "object"
}
],
"x-versionadded": "v2.20"
}
},
"required": [
"abortOnError",
"csvSettings",
"disableRowLevelErrorHandling",
"includePredictionStatus",
"includeProbabilities",
"includeProbabilitiesClasses",
"intakeSettings",
"maxExplanations",
"redactedFields",
"skipDriftTracking"
],
"type": "object"
},
"links": {
"description": "Links useful for this job",
"properties": {
"csvUpload": {
"description": "The URL used to upload the dataset for this job. Only available for localFile intake.",
"format": "url",
"type": "string"
},
"download": {
"description": "The URL used to download the results from this job. Only available for localFile outputs. Will be null if the download is not yet available.",
"type": [
"string",
"null"
]
},
"self": {
"description": "The URL used access this job.",
"format": "url",
"type": "string"
}
},
"required": [
"self"
],
"type": "object"
},
"logs": {
"description": "The job log.",
"items": {
"description": "A log line from the job log.",
"type": "string"
},
"type": "array"
},
"monitoringBatchId": {
"description": "Id of the monitoring batch created by this job. Only present if the job runs on a deployment with batch monitoring enabled.",
"type": [
"string",
"null"
],
"x-versionadded": "v2.33"
},
"percentageCompleted": {
"description": "Indicates job progress which is based on number of already processed rows in dataset",
"maximum": 100,
"minimum": 0,
"type": "number"
},
"queuePosition": {
"description": "To ensure a dedicated prediction instance is not overloaded, only one job will be run against it at a time. This is the number of jobs that are awaiting processing before this job start running. May not be available in all environments.",
"minimum": 0,
"type": [
"integer",
"null"
],
"x-versionadded": "v2.21"
},
"queued": {
"description": "The job has been put on the queue for execution.",
"type": "boolean",
"x-versionadded": "v2.26"
},
"resultsDeleted": {
"description": "Indicates if the job was subject to garbage collection and had its artifacts deleted (output files, if any, and scoring data on local storage)",
"type": "boolean",
"x-versionadded": "v2.24"
},
"scoredRows": {
"description": "Number of rows that have been used in prediction computation",
"minimum": 0,
"type": "integer"
},
"skippedRows": {
"description": "Number of rows that have been skipped during scoring. May contain non-zero value only in time-series predictions case if provided dataset contains more than required historical rows.",
"minimum": 0,
"type": "integer",
"x-versionadded": "v2.20"
},
"source": {
"description": "Source from which batch job was started",
"type": "string",
"x-versionadded": "v2.24"
},
"status": {
"description": "The current job status",
"enum": [
"INITIALIZING",
"RUNNING",
"COMPLETED",
"ABORTED",
"FAILED"
],
"type": "string"
},
"statusDetails": {
"description": "Explanation for current status",
"type": "string"
}
},
"required": [
"created",
"createdBy",
"elapsedTimeSec",
"failedRows",
"id",
"jobIntakeSize",
"jobOutputSize",
"jobSpec",
"links",
"logs",
"monitoringBatchId",
"percentageCompleted",
"queued",
"scoredRows",
"skippedRows",
"status",
"statusDetails"
],
"type": "object"
}
Responses
To perform this operation, you must be authenticated by means of one of the following methods:
BearerAuth
POST /api/v2/batchPredictions/fromExisting/
Copies an existing job and submits it to the queue.
Body parameter
{
"properties": {
"partNumber": {
"default": 0,
"description": "The number of which csv part is being uploaded when using multipart upload ",
"minimum": 0,
"type": "integer",
"x-versionadded": "v2.27"
},
"predictionJobId": {
"description": "ID of the Batch Prediction job",
"type": "string"
}
},
"required": [
"partNumber",
"predictionJobId"
],
"type": "object"
}
Parameters
Example responses
202 Response
{
"properties": {
"batchPredictionJobDefinition": {
"description": "The Batch Prediction Job Definition linking to this job, if any.",
"properties": {
"createdBy": {
"description": "The ID of creator of this job definition",
"type": "string"
},
"id": {
"description": "The ID of the Batch Prediction job definition",
"type": "string"
},
"name": {
"description": "A human-readable name for the definition, must be unique across organisations",
"type": "string"
}
},
"required": [
"createdBy",
"id",
"name"
],
"type": "object"
},
"created": {
"description": "When was this job created",
"format": "date-time",
"type": "string",
"x-versionadded": "v2.21"
},
"createdBy": {
"description": "Who created this job",
"properties": {
"fullName": {
"description": "The full name of the user who created this job (if defined by the user)",
"type": [
"string",
"null"
]
},
"userId": {
"description": "The User ID of the user who created this job",
"type": "string"
},
"username": {
"description": "The username (e-mail address) of the user who created this job",
"type": "string"
}
},
"required": [
"fullName",
"userId",
"username"
],
"type": "object"
},
"elapsedTimeSec": {
"description": "Number of seconds the job has been processing for",
"minimum": 0,
"type": "integer"
},
"failedRows": {
"description": "Number of rows that have failed scoring",
"minimum": 0,
"type": "integer"
},
"hidden": {
"description": "When was this job was hidden last, blank if visible",
"format": "date-time",
"type": "string",
"x-versionadded": "v2.24"
},
"id": {
"description": "The ID of the Batch Prediction job",
"type": "string",
"x-versionadded": "v2.21"
},
"intakeDatasetDisplayName": {
"description": "If applicable (e.g. for AI catalog), will contain the dataset name used for the intake dataset.",
"type": [
"string",
"null"
],
"x-versionadded": "v2.23"
},
"jobIntakeSize": {
"description": "Number of bytes in the intake dataset for this job",
"minimum": 0,
"type": [
"integer",
"null"
]
},
"jobOutputSize": {
"description": "Number of bytes in the output dataset for this job",
"minimum": 0,
"type": [
"integer",
"null"
]
},
"jobSpec": {
"description": "The job configuration used to create this job",
"properties": {
"abortOnError": {
"default": true,
"description": "Should this job abort if too many errors are encountered",
"type": "boolean"
},
"batchJobType": {
"default": "prediction",
"description": "Batch job type.",
"enum": [
"monitoring",
"prediction"
],
"type": "string",
"x-versionadded": "v2.35"
},
"chunkSize": {
"default": "auto",
"description": "Which strategy should be used to determine the chunk size. Can be either a named strategy or a fixed size in bytes.",
"oneOf": [
{
"enum": [
"auto",
"fixed",
"dynamic"
],
"type": "string"
},
{
"maximum": 41943040,
"minimum": 20,
"type": "integer"
}
]
},
"columnNamesRemapping": {
"description": "Remap (rename or remove columns from) the output from this job",
"oneOf": [
{
"description": "Provide a dictionary with key/value pairs to remap (deprecated)",
"type": "object"
},
{
"description": "Provide a list of items to remap",
"items": {
"properties": {
"inputName": {
"description": "Rename column with this name",
"type": "string"
},
"outputName": {
"description": "Rename column to this name (leave as null to remove from the output)",
"type": [
"string",
"null"
]
}
},
"required": [
"inputName",
"outputName"
],
"type": "object"
},
"maxItems": 1000,
"type": "array"
}
]
},
"csvSettings": {
"description": "The CSV settings used for this job",
"properties": {
"delimiter": {
"default": ",",
"description": "CSV fields are delimited by this character. Use the string \"tab\" to denote TSV (TAB separated values).",
"oneOf": [
{
"enum": [
"tab"
],
"type": "string"
},
{
"maxLength": 1,
"minLength": 1,
"type": "string"
}
]
},
"encoding": {
"default": "utf-8",
"description": "The encoding to be used for intake and output. For example (but not limited to): \"shift_jis\", \"latin_1\" or \"mskanji\".",
"type": "string"
},
"quotechar": {
"default": "\"",
"description": "Fields containing the delimiter or newlines must be quoted using this character.",
"maxLength": 1,
"minLength": 1,
"type": "string"
}
},
"required": [
"delimiter",
"encoding",
"quotechar"
],
"type": "object"
},
"deploymentId": {
"description": "ID of deployment which is used in job for processing predictions dataset",
"type": "string"
},
"disableRowLevelErrorHandling": {
"default": false,
"description": "Skip row by row error handling",
"type": "boolean"
},
"explanationAlgorithm": {
"description": "Which algorithm will be used to calculate prediction explanations",
"enum": [
"shap",
"xemp"
],
"type": "string"
},
"explanationClassNames": {
"description": "List of class names that will be explained for each row for multiclass. Mutually exclusive with explanationNumTopClasses. If neither specified - we assume explanationNumTopClasses=1",
"items": {
"description": "Class name to explain",
"type": "string"
},
"maxItems": 10,
"minItems": 1,
"type": "array",
"x-versionadded": "v2.29"
},
"explanationNumTopClasses": {
"description": "Number of top predicted classes for each row that will be explained for multiclass. Mutually exclusive with explanationClassNames. If neither specified - we assume explanationNumTopClasses=1",
"maximum": 10,
"minimum": 1,
"type": "integer",
"x-versionadded": "v2.29"
},
"includePredictionStatus": {
"default": false,
"description": "Include prediction status column in the output",
"type": "boolean"
},
"includeProbabilities": {
"default": true,
"description": "Include probabilities for all classes",
"type": "boolean"
},
"includeProbabilitiesClasses": {
"default": [],
"description": "Include only probabilities for these specific class names.",
"items": {
"description": "Include probability for this class name",
"type": "string"
},
"maxItems": 100,
"type": "array"
},
"intakeSettings": {
"default": {
"type": "localFile"
},
"description": "The response option configured for this job",
"oneOf": [
{
"description": "Stream CSV data chunks from Azure",
"properties": {
"credentialId": {
"description": "Either the populated value of the field or [redacted] due to permission settings",
"oneOf": [
{
"description": "Use the specified credential to access the url",
"type": [
"string",
"null"
]
},
{
"enum": [
"[redacted]"
],
"type": "string"
}
]
},
"format": {
"default": "csv",
"description": "Type of input file format",
"enum": [
"csv",
"parquet"
],
"type": "string",
"x-versionadded": "v2.25"
},
"type": {
"description": "Type name for this intake type",
"enum": [
"azure"
],
"type": "string"
},
"url": {
"description": "URL for the CSV file",
"format": "url",
"type": "string"
}
},
"required": [
"type",
"url"
],
"type": "object"
},
{
"description": "Stream CSV data chunks from data stage storage",
"properties": {
"dataStageId": {
"description": "The ID of the data stage",
"type": "string"
},
"type": {
"description": "Type name for this intake type",
"enum": [
"dataStage"
],
"type": "string"
}
},
"required": [
"dataStageId",
"type"
],
"type": "object"
},
{
"description": "Stream CSV data chunks from AI catalog dataset",
"properties": {
"datasetId": {
"description": "Either the populated value of the field or [redacted] due to permission settings",
"oneOf": [
{
"description": "The ID of the AI catalog dataset",
"type": "string"
},
{
"enum": [
"[redacted]"
],
"type": "string"
}
]
},
"datasetVersionId": {
"description": "The ID of the AI catalog dataset version",
"type": "string"
},
"type": {
"description": "Type name for this intake type",
"enum": [
"dataset"
],
"type": "string"
}
},
"required": [
"datasetId",
"type"
],
"type": "object"
},
{
"description": "Stream CSV data chunks from Google Storage",
"properties": {
"credentialId": {
"description": "Either the populated value of the field or [redacted] due to permission settings",
"oneOf": [
{
"description": "Use the specified credential to access the url",
"type": [
"string",
"null"
]
},
{
"enum": [
"[redacted]"
],
"type": "string"
}
]
},
"format": {
"default": "csv",
"description": "Type of input file format",
"enum": [
"csv",
"parquet"
],
"type": "string",
"x-versionadded": "v2.25"
},
"type": {
"description": "Type name for this intake type",
"enum": [
"gcp"
],
"type": "string"
},
"url": {
"description": "URL for the CSV file",
"format": "url",
"type": "string"
}
},
"required": [
"type",
"url"
],
"type": "object"
},
{
"description": "Stream CSV data chunks from Big Query using GCS",
"properties": {
"bucket": {
"description": "The name of gcs bucket for data export",
"type": "string"
},
"credentialId": {
"description": "Either the populated value of the field or [redacted] due to permission settings",
"oneOf": [
{
"description": "The ID of the GCP credentials",
"type": "string"
},
{
"enum": [
"[redacted]"
],
"type": "string"
}
]
},
"dataset": {
"description": "The name of the specified big query dataset to read input data from",
"type": "string"
},
"table": {
"description": "The name of the specified big query table to read input data from",
"type": "string"
},
"type": {
"description": "Type name for this intake type",
"enum": [
"bigquery"
],
"type": "string"
}
},
"required": [
"bucket",
"credentialId",
"dataset",
"table",
"type"
],
"type": "object"
},
{
"description": "Stream CSV data chunks from Amazon Cloud Storage S3",
"properties": {
"credentialId": {
"description": "Either the populated value of the field or [redacted] due to permission settings",
"oneOf": [
{
"description": "Use the specified credential to access the url",
"type": [
"string",
"null"
]
},
{
"enum": [
"[redacted]"
],
"type": "string"
}
]
},
"endpointUrl": {
"description": "Endpoint URL for the S3 connection (omit to use the default)",
"format": "url",
"type": "string",
"x-versionadded": "v2.29"
},
"format": {
"default": "csv",
"description": "Type of input file format",
"enum": [
"csv",
"parquet"
],
"type": "string",
"x-versionadded": "v2.25"
},
"type": {
"description": "Type name for this intake type",
"enum": [
"s3"
],
"type": "string"
},
"url": {
"description": "URL for the CSV file",
"format": "url",
"type": "string"
}
},
"required": [
"type",
"url"
],
"type": "object"
},
{
"description": "Stream CSV data chunks from Snowflake",
"properties": {
"catalog": {
"description": "The name of the specified database catalog to read input data from.",
"type": "string",
"x-versionadded": "v2.28"
},
"cloudStorageCredentialId": {
"description": "Either the populated value of the field or [redacted] due to permission settings",
"oneOf": [
{
"description": "The ID of the credential holding information about a user with read access to the cloud storage.",
"type": [
"string",
"null"
]
},
{
"enum": [
"[redacted]"
],
"type": "string"
}
]
},
"cloudStorageType": {
"default": "s3",
"description": "Type name for cloud storage",
"enum": [
"azure",
"gcp",
"s3"
],
"type": "string"
},
"credentialId": {
"description": "Either the populated value of the field or [redacted] due to permission settings",
"oneOf": [
{
"description": "The ID of the credential holding information about a user with read access to the Snowflake data source.",
"type": [
"string",
"null"
]
},
{
"enum": [
"[redacted]"
],
"type": "string"
}
]
},
"dataStoreId": {
"description": "Either the populated value of the field or [redacted] due to permission settings",
"oneOf": [
{
"description": "ID of the data store to connect to",
"type": "string"
},
{
"enum": [
"[redacted]"
],
"type": "string"
}
]
},
"externalStage": {
"description": "External storage",
"type": "string"
},
"query": {
"description": "A self-supplied SELECT statement of the dataset you wish to score. Helpful for supplying a more fine-grained selection of data not achievable through specification of \"table\" and/or \"schema\" parameters exclusively.If this job is executed with a job definition, then template variables are available which will be substituted for timestamps: {{ current_run_timestamp }}, {{ last_completed_run_time }}, {{ last_scheduled_run_time }}, {{ next_scheduled_run_time }}, {{ current_run_time }}",
"type": "string"
},
"schema": {
"description": "The name of the specified database schema to read input data from.",
"type": "string"
},
"table": {
"description": "The name of the specified database table to read input data from.",
"type": "string"
},
"type": {
"description": "Type name for this intake type",
"enum": [
"snowflake"
],
"type": "string"
}
},
"required": [
"dataStoreId",
"externalStage",
"type"
],
"type": "object"
},
{
"description": "Stream CSV data chunks from Azure Synapse",
"properties": {
"cloudStorageCredentialId": {
"description": "Either the populated value of the field or [redacted] due to permission settings",
"oneOf": [
{
"description": "The ID of the Azure credential holding information about a user with read access to the cloud storage.",
"type": [
"string",
"null"
]
},
{
"enum": [
"[redacted]"
],
"type": "string"
}
]
},
"credentialId": {
"description": "Either the populated value of the field or [redacted] due to permission settings",
"oneOf": [
{
"description": "The ID of the credential holding information about a user with read access to the JDBC data source.",
"type": [
"string",
"null"
]
},
{
"enum": [
"[redacted]"
],
"type": "string"
}
]
},
"dataStoreId": {
"description": "Either the populated value of the field or [redacted] due to permission settings",
"oneOf": [
{
"description": "ID of the data store to connect to",
"type": "string"
},
{
"enum": [
"[redacted]"
],
"type": "string"
}
]
},
"externalDataSource": {
"description": "External datasource name",
"type": "string"
},
"query": {
"description": "A self-supplied SELECT statement of the dataset you wish to score. Helpful for supplying a more fine-grained selection of data not achievable through specification of \"table\" and/or \"schema\" parameters exclusively.If this job is executed with a job definition, then template variables are available which will be substituted for timestamps: {{ current_run_timestamp }}, {{ last_completed_run_time }}, {{ last_scheduled_run_time }}, {{ next_scheduled_run_time }}, {{ current_run_time }}",
"type": "string"
},
"schema": {
"description": "The name of the specified database schema to read input data from.",
"type": "string"
},
"table": {
"description": "The name of the specified database table to read input data from.",
"type": "string"
},
"type": {
"description": "Type name for this intake type",
"enum": [
"synapse"
],
"type": "string"
}
},
"required": [
"dataStoreId",
"externalDataSource",
"type"
],
"type": "object"
},
{
"description": "Stream CSV data chunks from DSS dataset",
"properties": {
"datasetId": {
"description": "Either the populated value of the field or [redacted] due to permission settings",
"oneOf": [
{
"description": "The ID of the dataset",
"type": "string"
},
{
"enum": [
"[redacted]"
],
"type": "string"
}
]
},
"partition": {
"default": null,
"description": "Partition used to predict",
"enum": [
"holdout",
"validation",
"allBacktests",
null
],
"type": [
"string",
"null"
]
},
"projectId": {
"description": "The ID of the project",
"type": "string"
},
"type": {
"description": "Type name for this intake type",
"enum": [
"dss"
],
"type": "string"
}
},
"required": [
"projectId",
"type"
],
"type": "object"
},
{
"properties": {
"path": {
"description": "Path to data on host filesystem",
"type": "string"
},
"type": {
"description": "Type name for this intake type",
"enum": [
"filesystem"
],
"type": "string"
}
},
"required": [
"path",
"type"
],
"type": "object"
},
{
"description": "Stream CSV data chunks from HTTP",
"properties": {
"type": {
"description": "Type name for this intake type",
"enum": [
"http"
],
"type": "string"
},
"url": {
"description": "URL for the CSV file",
"format": "url",
"type": "string"
}
},
"required": [
"type",
"url"
],
"type": "object"
},
{
"description": "Stream CSV data chunks from JDBC",
"properties": {
"catalog": {
"description": "The name of the specified database catalog to read input data from.",
"type": "string",
"x-versionadded": "v2.22"
},
"credentialId": {
"description": "Either the populated value of the field or [redacted] due to permission settings",
"oneOf": [
{
"description": "The ID of the credential holding information about a user with read access to the JDBC data source.",
"type": [
"string",
"null"
]
},
{
"enum": [
"[redacted]"
],
"type": "string"
}
]
},
"dataStoreId": {
"description": "Either the populated value of the field or [redacted] due to permission settings",
"oneOf": [
{
"description": "ID of the data store to connect to",
"type": "string"
},
{
"enum": [
"[redacted]"
],
"type": "string"
}
]
},
"fetchSize": {
"description": "A user specified fetch size. Changing it can be used to balance throughput and memory usage. Deprecated and ignored since v2.21.",
"maximum": 1000000,
"minimum": 1,
"type": "integer"
},
"query": {
"description": "A self-supplied SELECT statement of the dataset you wish to score. Helpful for supplying a more fine-grained selection of data not achievable through specification of \"table\" and/or \"schema\" parameters exclusively.If this job is executed with a job definition, then template variables are available which will be substituted for timestamps: {{ current_run_timestamp }}, {{ last_completed_run_time }}, {{ last_scheduled_run_time }}, {{ next_scheduled_run_time }}, {{ current_run_time }}",
"type": "string"
},
"schema": {
"description": "The name of the specified database schema to read input data from.",
"type": "string"
},
"table": {
"description": "The name of the specified database table to read input data from.",
"type": "string"
},
"type": {
"description": "Type name for this intake type",
"enum": [
"jdbc"
],
"type": "string"
}
},
"required": [
"dataStoreId",
"type"
],
"type": "object"
},
{
"description": "Stream CSV data chunks from local file storage",
"properties": {
"async": {
"description": "The default behavior (async: true) will still submit the job to the queue and start processing as soon as the upload is started.Setting it to false will postpone submitting the job to the queue until all data has been uploaded.This is helpful if the user is on a bad connection and bottlednecked by the upload speed. Instead of blocking the queue this will allow others to submit to the queue until the upload has finished.",
"type": [
"boolean",
"null"
],
"x-versionadded": "v2.28"
},
"multipart": {
"description": "specify if the data will be uploaded in multiple parts instead of a single file",
"type": "boolean",
"x-versionadded": "v2.27"
},
"type": {
"description": "Type name for this intake type",
"enum": [
"local_file",
"localFile"
],
"type": "string"
}
},
"required": [
"type"
],
"type": "object"
},
{
"description": "Stream CSV data chunks from Datasphere using browser-datasphere",
"properties": {
"catalog": {
"description": "The name of the specified database catalog to read input data from.",
"type": "string"
},
"credentialId": {
"description": "Either the populated value of the field or [redacted] due to permission settings",
"oneOf": [
{
"description": "The ID of the credential holding information about a user with read access to the data source.",
"type": "string"
},
{
"enum": [
"[redacted]"
],
"type": "string"
}
]
},
"dataStoreId": {
"description": "Either the populated value of the field or [redacted] due to permission settings",
"oneOf": [
{
"description": "ID of the data store to connect to",
"type": "string"
},
{
"enum": [
"[redacted]"
],
"type": "string"
}
]
},
"schema": {
"description": "The name of the specified database schema to read input data from.",
"type": "string"
},
"table": {
"description": "The name of the specified database table to read input data from.",
"type": "string"
},
"type": {
"description": "Type name for this intake type",
"enum": [
"datasphere"
],
"type": "string"
}
},
"required": [
"credentialId",
"dataStoreId",
"schema",
"table",
"type"
],
"type": "object",
"x-versionadded": "v2.35"
}
]
},
"maxExplanations": {
"default": 0,
"description": "Number of explanations requested. Will be ordered by strength.",
"maximum": 100,
"minimum": 0,
"type": "integer"
},
"modelId": {
"description": "ID of leaderboard model which is used in job for processing predictions dataset",
"type": "string",
"x-versionadded": "v2.28"
},
"modelPackageId": {
"description": "ID of model package from registry is used in job for processing predictions dataset",
"type": "string",
"x-versionadded": "v2.28"
},
"monitoringBatchPrefix": {
"description": "Name of the batch to create with this job",
"type": [
"string",
"null"
]
},
"numConcurrent": {
"description": "Number of simultaneous requests to run against the prediction instance",
"minimum": 1,
"type": "integer"
},
"outputSettings": {
"description": "The response option configured for this job",
"oneOf": [
{
"description": "Save CSV data chunks to Azure Blob Storage",
"properties": {
"credentialId": {
"description": "Either the populated value of the field or [redacted] due to permission settings",
"oneOf": [
{
"description": "Use the specified credential to access the url",
"type": [
"string",
"null"
]
},
{
"enum": [
"[redacted]"
],
"type": "string"
}
]
},
"format": {
"default": "csv",
"description": "Type of output file format",
"enum": [
"csv",
"parquet"
],
"type": "string",
"x-versionadded": "v2.25"
},
"partitionColumns": {
"description": "For Parquet directory-scoring only. The column names of the intake data of which to partition the dataset. Columns are partitioned in the order they are given. At least one value is required if scoring to a directory (meaning the output url ends with a slash (\"/\").",
"items": {
"type": "string"
},
"maxItems": 100,
"type": "array",
"x-versionadded": "v2.26"
},
"type": {
"description": "Type name for this output type",
"enum": [
"azure"
],
"type": "string"
},
"url": {
"description": "URL for the file or directory",
"format": "url",
"type": "string"
}
},
"required": [
"type",
"url"
],
"type": "object"
},
{
"description": "Save CSV data chunks to Google Storage",
"properties": {
"credentialId": {
"description": "Either the populated value of the field or [redacted] due to permission settings",
"oneOf": [
{
"description": "Use the specified credential to access the url",
"type": [
"string",
"null"
]
},
{
"enum": [
"[redacted]"
],
"type": "string"
}
]
},
"format": {
"default": "csv",
"description": "Type of input file format",
"enum": [
"csv",
"parquet"
],
"type": "string",
"x-versionadded": "v2.25"
},
"partitionColumns": {
"description": "For Parquet directory-scoring only. The column names of the intake data of which to partition the dataset. Columns are partitioned in the order they are given. At least one value is required if scoring to a directory (meaning the output url ends with a slash (\"/\").",
"items": {
"type": "string"
},
"maxItems": 100,
"type": "array",
"x-versionadded": "v2.26"
},
"type": {
"description": "Type name for this output type",
"enum": [
"gcp"
],
"type": "string"
},
"url": {
"description": "URL for the CSV file",
"format": "url",
"type": "string"
}
},
"required": [
"type",
"url"
],
"type": "object"
},
{
"description": "Save CSV data chunks to Google BigQuery in bulk",
"properties": {
"bucket": {
"description": "The name of gcs bucket for data loading",
"type": "string"
},
"credentialId": {
"description": "Either the populated value of the field or [redacted] due to permission settings",
"oneOf": [
{
"description": "The ID of the GCP credentials",
"type": "string"
},
{
"enum": [
"[redacted]"
],
"type": "string"
}
]
},
"dataset": {
"description": "The name of the specified big query dataset to write data back",
"type": "string"
},
"table": {
"description": "The name of the specified big query table to write data back",
"type": "string"
},
"type": {
"description": "Type name for this output type",
"enum": [
"bigquery"
],
"type": "string"
}
},
"required": [
"bucket",
"credentialId",
"dataset",
"table",
"type"
],
"type": "object"
},
{
"description": "Saves CSV data chunks to Amazon Cloud Storage S3",
"properties": {
"credentialId": {
"description": "Either the populated value of the field or [redacted] due to permission settings",
"oneOf": [
{
"description": "Use the specified credential to access the url",
"type": [
"string",
"null"
]
},
{
"enum": [
"[redacted]"
],
"type": "string"
}
]
},
"endpointUrl": {
"description": "Endpoint URL for the S3 connection (omit to use the default)",
"format": "url",
"type": "string",
"x-versionadded": "v2.29"
},
"format": {
"default": "csv",
"description": "Type of output file format",
"enum": [
"csv",
"parquet"
],
"type": "string",
"x-versionadded": "v2.25"
},
"partitionColumns": {
"description": "For Parquet directory-scoring only. The column names of the intake data of which to partition the dataset. Columns are partitioned in the order they are given. At least one value is required if scoring to a directory (meaning the output url ends with a slash (\"/\").",
"items": {
"type": "string"
},
"maxItems": 100,
"type": "array",
"x-versionadded": "v2.26"
},
"serverSideEncryption": {
"description": "Configure Server-Side Encryption for S3 output",
"properties": {
"algorithm": {
"description": "The server-side encryption algorithm used when storing this object in Amazon S3 (for example, AES256, aws:kms).",
"type": "string"
},
"customerAlgorithm": {
"description": "Specifies the algorithm to use to when encrypting the object (for example, AES256).",
"type": "string"
},
"customerKey": {
"description": "Specifies the customer-provided encryption key for Amazon S3 to use in encrypting data. This value is used to store the object and then it is discarded; Amazon S3 does not store the encryption key. The key must be appropriate for use with the algorithm specified in customerAlgorithm. The key must be sent as an base64 encoded string.",
"type": "string"
},
"kmsEncryptionContext": {
"description": "Specifies the Amazon Web Services KMS Encryption Context to use for object encryption. The value of this header is a base64-encoded UTF-8 string holding JSON with the encryption context key-value pairs.",
"type": "string"
},
"kmsKeyId": {
"description": "Specifies the ID of the symmetric customer managed key to use for object encryption.",
"type": "string"
}
},
"type": "object"
},
"type": {
"description": "Type name for this output type",
"enum": [
"s3"
],
"type": "string"
},
"url": {
"description": "URL for the CSV file",
"format": "url",
"type": "string"
}
},
"required": [
"type",
"url"
],
"type": "object"
},
{
"description": "Save CSV data chunks to Snowflake in bulk",
"properties": {
"catalog": {
"description": "The name of the specified database catalog to write output data to.",
"type": "string",
"x-versionadded": "v2.28"
},
"cloudStorageCredentialId": {
"description": "Either the populated value of the field or [redacted] due to permission settings",
"oneOf": [
{
"description": "The ID of the credential holding information about a user with write access to the cloud storage.",
"type": [
"string",
"null"
]
},
{
"enum": [
"[redacted]"
],
"type": "string"
}
]
},
"cloudStorageType": {
"default": "s3",
"description": "Type name for cloud storage",
"enum": [
"azure",
"gcp",
"s3"
],
"type": "string"
},
"createTableIfNotExists": {
"default": false,
"description": "Attempt to create the table first if no existing one is detected, before writing data with the strategy defined in the `statementType` parameter.",
"type": "boolean",
"x-versionadded": "v2.25"
},
"credentialId": {
"description": "Either the populated value of the field or [redacted] due to permission settings",
"oneOf": [
{
"description": "The ID of the credential holding information about a user with write access to the Snowflake data source.",
"type": [
"string",
"null"
]
},
{
"enum": [
"[redacted]"
],
"type": "string"
}
]
},
"dataStoreId": {
"description": "Either the populated value of the field or [redacted] due to permission settings",
"oneOf": [
{
"description": "ID of the data store to connect to",
"type": "string"
},
{
"enum": [
"[redacted]"
],
"type": "string"
}
]
},
"externalStage": {
"description": "External storage",
"type": "string"
},
"schema": {
"description": "The name of the specified database schema to write results to.",
"type": "string"
},
"statementType": {
"description": "The statement type to use when writing the results.",
"enum": [
"insert",
"create_table",
"createTable"
],
"type": "string"
},
"table": {
"description": "The name of the specified database table to write results to.",
"type": "string"
},
"type": {
"description": "Type name for this output type",
"enum": [
"snowflake"
],
"type": "string"
}
},
"required": [
"dataStoreId",
"externalStage",
"statementType",
"table",
"type"
],
"type": "object"
},
{
"description": "Save CSV data chunks to Azure Synapse in bulk",
"properties": {
"cloudStorageCredentialId": {
"description": "Either the populated value of the field or [redacted] due to permission settings",
"oneOf": [
{
"description": "The ID of the credential holding information about a user with write access to the cloud storage.",
"type": [
"string",
"null"
]
},
{
"enum": [
"[redacted]"
],
"type": "string"
}
]
},
"createTableIfNotExists": {
"default": false,
"description": "Attempt to create the table first if no existing one is detected, before writing data with the strategy defined in the `statementType` parameter.",
"type": "boolean",
"x-versionadded": "v2.25"
},
"credentialId": {
"description": "Either the populated value of the field or [redacted] due to permission settings",
"oneOf": [
{
"description": "The ID of the credential holding information about a user with write access to the JDBC data source.",
"type": [
"string",
"null"
]
},
{
"enum": [
"[redacted]"
],
"type": "string"
}
]
},
"dataStoreId": {
"description": "Either the populated value of the field or [redacted] due to permission settings",
"oneOf": [
{
"description": "ID of the data store to connect to",
"type": "string"
},
{
"enum": [
"[redacted]"
],
"type": "string"
}
]
},
"externalDataSource": {
"description": "External data source name",
"type": "string"
},
"schema": {
"description": "The name of the specified database schema to write results to.",
"type": "string"
},
"statementType": {
"description": "The statement type to use when writing the results.",
"enum": [
"insert",
"create_table",
"createTable"
],
"type": "string"
},
"table": {
"description": "The name of the specified database table to write results to.",
"type": "string"
},
"type": {
"description": "Type name for this output type",
"enum": [
"synapse"
],
"type": "string"
}
},
"required": [
"dataStoreId",
"externalDataSource",
"statementType",
"table",
"type"
],
"type": "object"
},
{
"properties": {
"path": {
"description": "Path to results on host filesystem",
"type": "string"
},
"type": {
"description": "Type name for this output type",
"enum": [
"filesystem"
],
"type": "string"
}
},
"required": [
"path",
"type"
],
"type": "object"
},
{
"description": "Save CSV data chunks to HTTP data endpoint",
"properties": {
"headers": {
"description": "Extra headers to send with the request",
"type": "object"
},
"method": {
"description": "Method to use when saving the CSV file",
"enum": [
"POST",
"PUT"
],
"type": "string"
},
"type": {
"description": "Type name for this output type",
"enum": [
"http"
],
"type": "string"
},
"url": {
"description": "URL for the CSV file",
"format": "url",
"type": "string"
}
},
"required": [
"method",
"type",
"url"
],
"type": "object"
},
{
"description": "Save CSV data chunks via JDBC",
"properties": {
"catalog": {
"description": "The name of the specified database catalog to write output data to.",
"type": "string",
"x-versionadded": "v2.22"
},
"commitInterval": {
"default": 600,
"description": "Defines a time interval in seconds between each commit is done to the JDBC source. If set to 0, the batch prediction operation will write the entire job before committing.",
"maximum": 86400,
"minimum": 0,
"type": "integer",
"x-versionadded": "v2.21"
},
"createTableIfNotExists": {
"default": false,
"description": "Attempt to create the table first if no existing one is detected, before writing data with the strategy defined in the `statementType` parameter.",
"type": "boolean",
"x-versionadded": "v2.24"
},
"credentialId": {
"description": "Either the populated value of the field or [redacted] due to permission settings",
"oneOf": [
{
"description": "The ID of the credential holding information about a user with write access to the JDBC data source.",
"type": [
"string",
"null"
]
},
{
"enum": [
"[redacted]"
],
"type": "string"
}
]
},
"dataStoreId": {
"description": "Either the populated value of the field or [redacted] due to permission settings",
"oneOf": [
{
"description": "ID of the data store to connect to",
"type": "string"
},
{
"enum": [
"[redacted]"
],
"type": "string"
}
]
},
"schema": {
"description": "The name of the specified database schema to write the results to.",
"type": "string"
},
"statementType": {
"description": "The statement type to use when writing the results. Deprecation Warning: Use of `create_table` is now discouraged. Use one of the other possibilities along with the parameter `createTableIfNotExists` set to `true`.",
"enum": [
"createTable",
"create_table",
"insert",
"insertUpdate",
"insert_update",
"update"
],
"type": "string"
},
"table": {
"description": "The name of the specified database table to write the results to.If this job is executed with a job definition, then template variables are available which will be substituted for timestamps: {{ current_run_timestamp }}, {{ last_completed_run_time }}, {{ last_scheduled_run_time }}, {{ next_scheduled_run_time }}, {{ current_run_time }}",
"type": "string"
},
"type": {
"description": "Type name for this intake type",
"enum": [
"jdbc"
],
"type": "string"
},
"updateColumns": {
"description": "The column names to be updated if statementType is set to either update or upsert.",
"items": {
"type": "string"
},
"maxItems": 100,
"type": "array"
},
"whereColumns": {
"description": "The column names to be used in the where clause if statementType is set to update or upsert.",
"items": {
"type": "string"
},
"maxItems": 100,
"type": "array"
}
},
"required": [
"dataStoreId",
"statementType",
"table",
"type"
],
"type": "object"
},
{
"description": "Save CSV data chunks to local file storage",
"properties": {
"type": {
"description": "Type name for this output type",
"enum": [
"local_file",
"localFile"
],
"type": "string"
}
},
"required": [
"type"
],
"type": "object"
},
{
"description": "Saves CSV data chunks to Datasphere using browser-datasphere",
"properties": {
"catalog": {
"description": "The name of the specified database catalog to read input data from.",
"type": "string"
},
"credentialId": {
"description": "Either the populated value of the field or [redacted] due to permission settings",
"oneOf": [
{
"description": "The ID of the credential holding information about a user with write access to the data destination.",
"type": "string"
},
{
"enum": [
"[redacted]"
],
"type": "string"
}
]
},
"dataStoreId": {
"description": "Either the populated value of the field or [redacted] due to permission settings",
"oneOf": [
{
"description": "The ID of the data store to connect to",
"type": "string"
},
{
"enum": [
"[redacted]"
],
"type": "string"
}
]
},
"schema": {
"description": "The name of the specified database schema to read input data from.",
"type": "string"
},
"table": {
"description": "The name of the specified database table to read input data from.",
"type": "string"
},
"type": {
"description": "The type name for this output type",
"enum": [
"datasphere"
],
"type": "string"
}
},
"required": [
"credentialId",
"dataStoreId",
"schema",
"table",
"type"
],
"type": "object",
"x-versionadded": "v2.35"
}
]
},
"passthroughColumns": {
"description": "Pass through columns from the original dataset",
"items": {
"description": "A column name from the original dataset to pass through to the resulting predictions",
"maxLength": 50,
"minLength": 1,
"type": "string"
},
"maxItems": 100,
"type": "array"
},
"passthroughColumnsSet": {
"description": "Pass through all columns from the original dataset",
"enum": [
"all"
],
"type": "string"
},
"pinnedModelId": {
"description": "Specify a model ID used for scoring",
"type": "string"
},
"predictionInstance": {
"description": "Override the default prediction instance from the deployment when scoring this job.",
"properties": {
"apiKey": {
"description": "By default, prediction requests will use the API key of the user that created the job. This allows you to make requests on behalf of other users.",
"type": "string"
},
"datarobotKey": {
"description": "If running a job against a prediction instance in the Managed AI Cloud, you must provide the organization level DataRobot-Key.",
"type": "string"
},
"hostName": {
"description": "Override the default host name of the deployment with this.",
"type": "string"
},
"sslEnabled": {
"default": true,
"description": "Use SSL (HTTPS) when communicating with the overriden prediction server.",
"type": "boolean"
}
},
"required": [
"hostName",
"sslEnabled"
],
"type": "object"
},
"predictionThreshold": {
"description": "Threshold is the point that sets the class boundary for a predicted value. The model classifies an observation below the threshold as FALSE, and an observation above the threshold as TRUE. In other words, DataRobot automatically assigns the positive class label to any prediction exceeding the threshold. This value can be set between 0.0 and 1.0.",
"maximum": 1,
"minimum": 0,
"type": "number"
},
"predictionWarningEnabled": {
"description": "Enable prediction warnings.",
"type": [
"boolean",
"null"
]
},
"redactedFields": {
"description": "A list of qualified field names from intake- and/or outputSettings that was redacted due to permissions and sharing settings. For example: intakeSettings.dataStoreId",
"items": {
"description": "Field names that are potentially redacted",
"type": "string"
},
"type": "array",
"x-versionadded": "v2.30"
},
"secondaryDatasetsConfigId": {
"description": "Configuration id for secondary datasets to use when making a prediction.",
"type": "string",
"x-versionadded": "v2.33"
},
"skipDriftTracking": {
"default": false,
"description": "Skip drift tracking for this job.",
"type": "boolean"
},
"thresholdHigh": {
"description": "Compute explanations for predictions above this threshold",
"type": "number"
},
"thresholdLow": {
"description": "Compute explanations for predictions below this threshold",
"type": "number"
},
"timeseriesSettings": {
"description": "Time Series settings included of this job is a Time Series job.",
"oneOf": [
{
"properties": {
"forecastPoint": {
"description": "Used for forecast predictions in order to override the inferred forecast point from the dataset.",
"format": "date-time",
"type": "string"
},
"relaxKnownInAdvanceFeaturesCheck": {
"default": false,
"description": "If activated, missing values in the known in advance features are allowed in the forecast window at prediction time. If omitted or false, missing values are not allowed.",
"type": "boolean"
},
"type": {
"description": "Forecast mode makes predictions using forecastPoint or rows in the dataset without target.",
"enum": [
"forecast"
],
"type": "string"
}
},
"required": [
"type"
],
"type": "object"
},
{
"properties": {
"predictionsEndDate": {
"description": "Used for historical predictions in order to override date to which predictions should be calculated. By default value will be inferred automatically from the dataset.",
"format": "date-time",
"type": "string"
},
"predictionsStartDate": {
"description": "Used for historical predictions in order to override date from which predictions should be calculated. By default value will be inferred automatically from the dataset.",
"format": "date-time",
"type": "string"
},
"relaxKnownInAdvanceFeaturesCheck": {
"default": false,
"description": "If activated, missing values in the known in advance features are allowed in the forecast window at prediction time. If omitted or false, missing values are not allowed.",
"type": "boolean"
},
"type": {
"description": "Historical mode enables bulk predictions which calculates predictions for all possible forecast points and forecast distances in the dataset within the predictionsStartDate/predictionsEndDate range.",
"enum": [
"historical"
],
"type": "string"
}
},
"required": [
"type"
],
"type": "object"
},
{
"properties": {
"relaxKnownInAdvanceFeaturesCheck": {
"default": false,
"description": "If activated, missing values in the known in advance features are allowed in the forecast window at prediction time. If omitted or false, missing values are not allowed.",
"type": "boolean"
},
"type": {
"description": "Forecast mode used for making predictions on subsets of training data.",
"enum": [
"training"
],
"type": "string"
}
},
"required": [
"type"
],
"type": "object"
}
],
"x-versionadded": "v2.20"
}
},
"required": [
"abortOnError",
"csvSettings",
"disableRowLevelErrorHandling",
"includePredictionStatus",
"includeProbabilities",
"includeProbabilitiesClasses",
"intakeSettings",
"maxExplanations",
"redactedFields",
"skipDriftTracking"
],
"type": "object"
},
"links": {
"description": "Links useful for this job",
"properties": {
"csvUpload": {
"description": "The URL used to upload the dataset for this job. Only available for localFile intake.",
"format": "url",
"type": "string"
},
"download": {
"description": "The URL used to download the results from this job. Only available for localFile outputs. Will be null if the download is not yet available.",
"type": [
"string",
"null"
]
},
"self": {
"description": "The URL used access this job.",
"format": "url",
"type": "string"
}
},
"required": [
"self"
],
"type": "object"
},
"logs": {
"description": "The job log.",
"items": {
"description": "A log line from the job log.",
"type": "string"
},
"type": "array"
},
"monitoringBatchId": {
"description": "Id of the monitoring batch created by this job. Only present if the job runs on a deployment with batch monitoring enabled.",
"type": [
"string",
"null"
],
"x-versionadded": "v2.33"
},
"percentageCompleted": {
"description": "Indicates job progress which is based on number of already processed rows in dataset",
"maximum": 100,
"minimum": 0,
"type": "number"
},
"queuePosition": {
"description": "To ensure a dedicated prediction instance is not overloaded, only one job will be run against it at a time. This is the number of jobs that are awaiting processing before this job start running. May not be available in all environments.",
"minimum": 0,
"type": [
"integer",
"null"
],
"x-versionadded": "v2.21"
},
"queued": {
"description": "The job has been put on the queue for execution.",
"type": "boolean",
"x-versionadded": "v2.26"
},
"resultsDeleted": {
"description": "Indicates if the job was subject to garbage collection and had its artifacts deleted (output files, if any, and scoring data on local storage)",
"type": "boolean",
"x-versionadded": "v2.24"
},
"scoredRows": {
"description": "Number of rows that have been used in prediction computation",
"minimum": 0,
"type": "integer"
},
"skippedRows": {
"description": "Number of rows that have been skipped during scoring. May contain non-zero value only in time-series predictions case if provided dataset contains more than required historical rows.",
"minimum": 0,
"type": "integer",
"x-versionadded": "v2.20"
},
"source": {
"description": "Source from which batch job was started",
"type": "string",
"x-versionadded": "v2.24"
},
"status": {
"description": "The current job status",
"enum": [
"INITIALIZING",
"RUNNING",
"COMPLETED",
"ABORTED",
"FAILED"
],
"type": "string"
},
"statusDetails": {
"description": "Explanation for current status",
"type": "string"
}
},
"required": [
"created",
"createdBy",
"elapsedTimeSec",
"failedRows",
"id",
"jobIntakeSize",
"jobOutputSize",
"jobSpec",
"links",
"logs",
"monitoringBatchId",
"percentageCompleted",
"queued",
"scoredRows",
"skippedRows",
"status",
"statusDetails"
],
"type": "object"
}
Responses
To perform this operation, you must be authenticated by means of one of the following methods:
BearerAuth
POST /api/v2/batchPredictions/fromJobDefinition/
Launches a one-time batch prediction job based off of the previously supplied definition referring to the job definition ID and puts it on the queue.
Body parameter
{
"properties": {
"jobDefinitionId": {
"description": "ID of the Batch Prediction job definition",
"type": "string"
}
},
"required": [
"jobDefinitionId"
],
"type": "object"
}
Parameters
Example responses
202 Response
{
"properties": {
"batchPredictionJobDefinition": {
"description": "The Batch Prediction Job Definition linking to this job, if any.",
"properties": {
"createdBy": {
"description": "The ID of creator of this job definition",
"type": "string"
},
"id": {
"description": "The ID of the Batch Prediction job definition",
"type": "string"
},
"name": {
"description": "A human-readable name for the definition, must be unique across organisations",
"type": "string"
}
},
"required": [
"createdBy",
"id",
"name"
],
"type": "object"
},
"created": {
"description": "When was this job created",
"format": "date-time",
"type": "string",
"x-versionadded": "v2.21"
},
"createdBy": {
"description": "Who created this job",
"properties": {
"fullName": {
"description": "The full name of the user who created this job (if defined by the user)",
"type": [
"string",
"null"
]
},
"userId": {
"description": "The User ID of the user who created this job",
"type": "string"
},
"username": {
"description": "The username (e-mail address) of the user who created this job",
"type": "string"
}
},
"required": [
"fullName",
"userId",
"username"
],
"type": "object"
},
"elapsedTimeSec": {
"description": "Number of seconds the job has been processing for",
"minimum": 0,
"type": "integer"
},
"failedRows": {
"description": "Number of rows that have failed scoring",
"minimum": 0,
"type": "integer"
},
"hidden": {
"description": "When was this job was hidden last, blank if visible",
"format": "date-time",
"type": "string",
"x-versionadded": "v2.24"
},
"id": {
"description": "The ID of the Batch Prediction job",
"type": "string",
"x-versionadded": "v2.21"
},
"intakeDatasetDisplayName": {
"description": "If applicable (e.g. for AI catalog), will contain the dataset name used for the intake dataset.",
"type": [
"string",
"null"
],
"x-versionadded": "v2.23"
},
"jobIntakeSize": {
"description": "Number of bytes in the intake dataset for this job",
"minimum": 0,
"type": [
"integer",
"null"
]
},
"jobOutputSize": {
"description": "Number of bytes in the output dataset for this job",
"minimum": 0,
"type": [
"integer",
"null"
]
},
"jobSpec": {
"description": "The job configuration used to create this job",
"properties": {
"abortOnError": {
"default": true,
"description": "Should this job abort if too many errors are encountered",
"type": "boolean"
},
"batchJobType": {
"default": "prediction",
"description": "Batch job type.",
"enum": [
"monitoring",
"prediction"
],
"type": "string",
"x-versionadded": "v2.35"
},
"chunkSize": {
"default": "auto",
"description": "Which strategy should be used to determine the chunk size. Can be either a named strategy or a fixed size in bytes.",
"oneOf": [
{
"enum": [
"auto",
"fixed",
"dynamic"
],
"type": "string"
},
{
"maximum": 41943040,
"minimum": 20,
"type": "integer"
}
]
},
"columnNamesRemapping": {
"description": "Remap (rename or remove columns from) the output from this job",
"oneOf": [
{
"description": "Provide a dictionary with key/value pairs to remap (deprecated)",
"type": "object"
},
{
"description": "Provide a list of items to remap",
"items": {
"properties": {
"inputName": {
"description": "Rename column with this name",
"type": "string"
},
"outputName": {
"description": "Rename column to this name (leave as null to remove from the output)",
"type": [
"string",
"null"
]
}
},
"required": [
"inputName",
"outputName"
],
"type": "object"
},
"maxItems": 1000,
"type": "array"
}
]
},
"csvSettings": {
"description": "The CSV settings used for this job",
"properties": {
"delimiter": {
"default": ",",
"description": "CSV fields are delimited by this character. Use the string \"tab\" to denote TSV (TAB separated values).",
"oneOf": [
{
"enum": [
"tab"
],
"type": "string"
},
{
"maxLength": 1,
"minLength": 1,
"type": "string"
}
]
},
"encoding": {
"default": "utf-8",
"description": "The encoding to be used for intake and output. For example (but not limited to): \"shift_jis\", \"latin_1\" or \"mskanji\".",
"type": "string"
},
"quotechar": {
"default": "\"",
"description": "Fields containing the delimiter or newlines must be quoted using this character.",
"maxLength": 1,
"minLength": 1,
"type": "string"
}
},
"required": [
"delimiter",
"encoding",
"quotechar"
],
"type": "object"
},
"deploymentId": {
"description": "ID of deployment which is used in job for processing predictions dataset",
"type": "string"
},
"disableRowLevelErrorHandling": {
"default": false,
"description": "Skip row by row error handling",
"type": "boolean"
},
"explanationAlgorithm": {
"description": "Which algorithm will be used to calculate prediction explanations",
"enum": [
"shap",
"xemp"
],
"type": "string"
},
"explanationClassNames": {
"description": "List of class names that will be explained for each row for multiclass. Mutually exclusive with explanationNumTopClasses. If neither specified - we assume explanationNumTopClasses=1",
"items": {
"description": "Class name to explain",
"type": "string"
},
"maxItems": 10,
"minItems": 1,
"type": "array",
"x-versionadded": "v2.29"
},
"explanationNumTopClasses": {
"description": "Number of top predicted classes for each row that will be explained for multiclass. Mutually exclusive with explanationClassNames. If neither specified - we assume explanationNumTopClasses=1",
"maximum": 10,
"minimum": 1,
"type": "integer",
"x-versionadded": "v2.29"
},
"includePredictionStatus": {
"default": false,
"description": "Include prediction status column in the output",
"type": "boolean"
},
"includeProbabilities": {
"default": true,
"description": "Include probabilities for all classes",
"type": "boolean"
},
"includeProbabilitiesClasses": {
"default": [],
"description": "Include only probabilities for these specific class names.",
"items": {
"description": "Include probability for this class name",
"type": "string"
},
"maxItems": 100,
"type": "array"
},
"intakeSettings": {
"default": {
"type": "localFile"
},
"description": "The response option configured for this job",
"oneOf": [
{
"description": "Stream CSV data chunks from Azure",
"properties": {
"credentialId": {
"description": "Either the populated value of the field or [redacted] due to permission settings",
"oneOf": [
{
"description": "Use the specified credential to access the url",
"type": [
"string",
"null"
]
},
{
"enum": [
"[redacted]"
],
"type": "string"
}
]
},
"format": {
"default": "csv",
"description": "Type of input file format",
"enum": [
"csv",
"parquet"
],
"type": "string",
"x-versionadded": "v2.25"
},
"type": {
"description": "Type name for this intake type",
"enum": [
"azure"
],
"type": "string"
},
"url": {
"description": "URL for the CSV file",
"format": "url",
"type": "string"
}
},
"required": [
"type",
"url"
],
"type": "object"
},
{
"description": "Stream CSV data chunks from data stage storage",
"properties": {
"dataStageId": {
"description": "The ID of the data stage",
"type": "string"
},
"type": {
"description": "Type name for this intake type",
"enum": [
"dataStage"
],
"type": "string"
}
},
"required": [
"dataStageId",
"type"
],
"type": "object"
},
{
"description": "Stream CSV data chunks from AI catalog dataset",
"properties": {
"datasetId": {
"description": "Either the populated value of the field or [redacted] due to permission settings",
"oneOf": [
{
"description": "The ID of the AI catalog dataset",
"type": "string"
},
{
"enum": [
"[redacted]"
],
"type": "string"
}
]
},
"datasetVersionId": {
"description": "The ID of the AI catalog dataset version",
"type": "string"
},
"type": {
"description": "Type name for this intake type",
"enum": [
"dataset"
],
"type": "string"
}
},
"required": [
"datasetId",
"type"
],
"type": "object"
},
{
"description": "Stream CSV data chunks from Google Storage",
"properties": {
"credentialId": {
"description": "Either the populated value of the field or [redacted] due to permission settings",
"oneOf": [
{
"description": "Use the specified credential to access the url",
"type": [
"string",
"null"
]
},
{
"enum": [
"[redacted]"
],
"type": "string"
}
]
},
"format": {
"default": "csv",
"description": "Type of input file format",
"enum": [
"csv",
"parquet"
],
"type": "string",
"x-versionadded": "v2.25"
},
"type": {
"description": "Type name for this intake type",
"enum": [
"gcp"
],
"type": "string"
},
"url": {
"description": "URL for the CSV file",
"format": "url",
"type": "string"
}
},
"required": [
"type",
"url"
],
"type": "object"
},
{
"description": "Stream CSV data chunks from Big Query using GCS",
"properties": {
"bucket": {
"description": "The name of gcs bucket for data export",
"type": "string"
},
"credentialId": {
"description": "Either the populated value of the field or [redacted] due to permission settings",
"oneOf": [
{
"description": "The ID of the GCP credentials",
"type": "string"
},
{
"enum": [
"[redacted]"
],
"type": "string"
}
]
},
"dataset": {
"description": "The name of the specified big query dataset to read input data from",
"type": "string"
},
"table": {
"description": "The name of the specified big query table to read input data from",
"type": "string"
},
"type": {
"description": "Type name for this intake type",
"enum": [
"bigquery"
],
"type": "string"
}
},
"required": [
"bucket",
"credentialId",
"dataset",
"table",
"type"
],
"type": "object"
},
{
"description": "Stream CSV data chunks from Amazon Cloud Storage S3",
"properties": {
"credentialId": {
"description": "Either the populated value of the field or [redacted] due to permission settings",
"oneOf": [
{
"description": "Use the specified credential to access the url",
"type": [
"string",
"null"
]
},
{
"enum": [
"[redacted]"
],
"type": "string"
}
]
},
"endpointUrl": {
"description": "Endpoint URL for the S3 connection (omit to use the default)",
"format": "url",
"type": "string",
"x-versionadded": "v2.29"
},
"format": {
"default": "csv",
"description": "Type of input file format",
"enum": [
"csv",
"parquet"
],
"type": "string",
"x-versionadded": "v2.25"
},
"type": {
"description": "Type name for this intake type",
"enum": [
"s3"
],
"type": "string"
},
"url": {
"description": "URL for the CSV file",
"format": "url",
"type": "string"
}
},
"required": [
"type",
"url"
],
"type": "object"
},
{
"description": "Stream CSV data chunks from Snowflake",
"properties": {
"catalog": {
"description": "The name of the specified database catalog to read input data from.",
"type": "string",
"x-versionadded": "v2.28"
},
"cloudStorageCredentialId": {
"description": "Either the populated value of the field or [redacted] due to permission settings",
"oneOf": [
{
"description": "The ID of the credential holding information about a user with read access to the cloud storage.",
"type": [
"string",
"null"
]
},
{
"enum": [
"[redacted]"
],
"type": "string"
}
]
},
"cloudStorageType": {
"default": "s3",
"description": "Type name for cloud storage",
"enum": [
"azure",
"gcp",
"s3"
],
"type": "string"
},
"credentialId": {
"description": "Either the populated value of the field or [redacted] due to permission settings",
"oneOf": [
{
"description": "The ID of the credential holding information about a user with read access to the Snowflake data source.",
"type": [
"string",
"null"
]
},
{
"enum": [
"[redacted]"
],
"type": "string"
}
]
},
"dataStoreId": {
"description": "Either the populated value of the field or [redacted] due to permission settings",
"oneOf": [
{
"description": "ID of the data store to connect to",
"type": "string"
},
{
"enum": [
"[redacted]"
],
"type": "string"
}
]
},
"externalStage": {
"description": "External storage",
"type": "string"
},
"query": {
"description": "A self-supplied SELECT statement of the dataset you wish to score. Helpful for supplying a more fine-grained selection of data not achievable through specification of \"table\" and/or \"schema\" parameters exclusively.If this job is executed with a job definition, then template variables are available which will be substituted for timestamps: {{ current_run_timestamp }}, {{ last_completed_run_time }}, {{ last_scheduled_run_time }}, {{ next_scheduled_run_time }}, {{ current_run_time }}",
"type": "string"
},
"schema": {
"description": "The name of the specified database schema to read input data from.",
"type": "string"
},
"table": {
"description": "The name of the specified database table to read input data from.",
"type": "string"
},
"type": {
"description": "Type name for this intake type",
"enum": [
"snowflake"
],
"type": "string"
}
},
"required": [
"dataStoreId",
"externalStage",
"type"
],
"type": "object"
},
{
"description": "Stream CSV data chunks from Azure Synapse",
"properties": {
"cloudStorageCredentialId": {
"description": "Either the populated value of the field or [redacted] due to permission settings",
"oneOf": [
{
"description": "The ID of the Azure credential holding information about a user with read access to the cloud storage.",
"type": [
"string",
"null"
]
},
{
"enum": [
"[redacted]"
],
"type": "string"
}
]
},
"credentialId": {
"description": "Either the populated value of the field or [redacted] due to permission settings",
"oneOf": [
{
"description": "The ID of the credential holding information about a user with read access to the JDBC data source.",
"type": [
"string",
"null"
]
},
{
"enum": [
"[redacted]"
],
"type": "string"
}
]
},
"dataStoreId": {
"description": "Either the populated value of the field or [redacted] due to permission settings",
"oneOf": [
{
"description": "ID of the data store to connect to",
"type": "string"
},
{
"enum": [
"[redacted]"
],
"type": "string"
}
]
},
"externalDataSource": {
"description": "External datasource name",
"type": "string"
},
"query": {
"description": "A self-supplied SELECT statement of the dataset you wish to score. Helpful for supplying a more fine-grained selection of data not achievable through specification of \"table\" and/or \"schema\" parameters exclusively.If this job is executed with a job definition, then template variables are available which will be substituted for timestamps: {{ current_run_timestamp }}, {{ last_completed_run_time }}, {{ last_scheduled_run_time }}, {{ next_scheduled_run_time }}, {{ current_run_time }}",
"type": "string"
},
"schema": {
"description": "The name of the specified database schema to read input data from.",
"type": "string"
},
"table": {
"description": "The name of the specified database table to read input data from.",
"type": "string"
},
"type": {
"description": "Type name for this intake type",
"enum": [
"synapse"
],
"type": "string"
}
},
"required": [
"dataStoreId",
"externalDataSource",
"type"
],
"type": "object"
},
{
"description": "Stream CSV data chunks from DSS dataset",
"properties": {
"datasetId": {
"description": "Either the populated value of the field or [redacted] due to permission settings",
"oneOf": [
{
"description": "The ID of the dataset",
"type": "string"
},
{
"enum": [
"[redacted]"
],
"type": "string"
}
]
},
"partition": {
"default": null,
"description": "Partition used to predict",
"enum": [
"holdout",
"validation",
"allBacktests",
null
],
"type": [
"string",
"null"
]
},
"projectId": {
"description": "The ID of the project",
"type": "string"
},
"type": {
"description": "Type name for this intake type",
"enum": [
"dss"
],
"type": "string"
}
},
"required": [
"projectId",
"type"
],
"type": "object"
},
{
"properties": {
"path": {
"description": "Path to data on host filesystem",
"type": "string"
},
"type": {
"description": "Type name for this intake type",
"enum": [
"filesystem"
],
"type": "string"
}
},
"required": [
"path",
"type"
],
"type": "object"
},
{
"description": "Stream CSV data chunks from HTTP",
"properties": {
"type": {
"description": "Type name for this intake type",
"enum": [
"http"
],
"type": "string"
},
"url": {
"description": "URL for the CSV file",
"format": "url",
"type": "string"
}
},
"required": [
"type",
"url"
],
"type": "object"
},
{
"description": "Stream CSV data chunks from JDBC",
"properties": {
"catalog": {
"description": "The name of the specified database catalog to read input data from.",
"type": "string",
"x-versionadded": "v2.22"
},
"credentialId": {
"description": "Either the populated value of the field or [redacted] due to permission settings",
"oneOf": [
{
"description": "The ID of the credential holding information about a user with read access to the JDBC data source.",
"type": [
"string",
"null"
]
},
{
"enum": [
"[redacted]"
],
"type": "string"
}
]
},
"dataStoreId": {
"description": "Either the populated value of the field or [redacted] due to permission settings",
"oneOf": [
{
"description": "ID of the data store to connect to",
"type": "string"
},
{
"enum": [
"[redacted]"
],
"type": "string"
}
]
},
"fetchSize": {
"description": "A user specified fetch size. Changing it can be used to balance throughput and memory usage. Deprecated and ignored since v2.21.",
"maximum": 1000000,
"minimum": 1,
"type": "integer"
},
"query": {
"description": "A self-supplied SELECT statement of the dataset you wish to score. Helpful for supplying a more fine-grained selection of data not achievable through specification of \"table\" and/or \"schema\" parameters exclusively.If this job is executed with a job definition, then template variables are available which will be substituted for timestamps: {{ current_run_timestamp }}, {{ last_completed_run_time }}, {{ last_scheduled_run_time }}, {{ next_scheduled_run_time }}, {{ current_run_time }}",
"type": "string"
},
"schema": {
"description": "The name of the specified database schema to read input data from.",
"type": "string"
},
"table": {
"description": "The name of the specified database table to read input data from.",
"type": "string"
},
"type": {
"description": "Type name for this intake type",
"enum": [
"jdbc"
],
"type": "string"
}
},
"required": [
"dataStoreId",
"type"
],
"type": "object"
},
{
"description": "Stream CSV data chunks from local file storage",
"properties": {
"async": {
"description": "The default behavior (async: true) will still submit the job to the queue and start processing as soon as the upload is started.Setting it to false will postpone submitting the job to the queue until all data has been uploaded.This is helpful if the user is on a bad connection and bottlednecked by the upload speed. Instead of blocking the queue this will allow others to submit to the queue until the upload has finished.",
"type": [
"boolean",
"null"
],
"x-versionadded": "v2.28"
},
"multipart": {
"description": "specify if the data will be uploaded in multiple parts instead of a single file",
"type": "boolean",
"x-versionadded": "v2.27"
},
"type": {
"description": "Type name for this intake type",
"enum": [
"local_file",
"localFile"
],
"type": "string"
}
},
"required": [
"type"
],
"type": "object"
},
{
"description": "Stream CSV data chunks from Datasphere using browser-datasphere",
"properties": {
"catalog": {
"description": "The name of the specified database catalog to read input data from.",
"type": "string"
},
"credentialId": {
"description": "Either the populated value of the field or [redacted] due to permission settings",
"oneOf": [
{
"description": "The ID of the credential holding information about a user with read access to the data source.",
"type": "string"
},
{
"enum": [
"[redacted]"
],
"type": "string"
}
]
},
"dataStoreId": {
"description": "Either the populated value of the field or [redacted] due to permission settings",
"oneOf": [
{
"description": "ID of the data store to connect to",
"type": "string"
},
{
"enum": [
"[redacted]"
],
"type": "string"
}
]
},
"schema": {
"description": "The name of the specified database schema to read input data from.",
"type": "string"
},
"table": {
"description": "The name of the specified database table to read input data from.",
"type": "string"
},
"type": {
"description": "Type name for this intake type",
"enum": [
"datasphere"
],
"type": "string"
}
},
"required": [
"credentialId",
"dataStoreId",
"schema",
"table",
"type"
],
"type": "object",
"x-versionadded": "v2.35"
}
]
},
"maxExplanations": {
"default": 0,
"description": "Number of explanations requested. Will be ordered by strength.",
"maximum": 100,
"minimum": 0,
"type": "integer"
},
"modelId": {
"description": "ID of leaderboard model which is used in job for processing predictions dataset",
"type": "string",
"x-versionadded": "v2.28"
},
"modelPackageId": {
"description": "ID of model package from registry is used in job for processing predictions dataset",
"type": "string",
"x-versionadded": "v2.28"
},
"monitoringBatchPrefix": {
"description": "Name of the batch to create with this job",
"type": [
"string",
"null"
]
},
"numConcurrent": {
"description": "Number of simultaneous requests to run against the prediction instance",
"minimum": 1,
"type": "integer"
},
"outputSettings": {
"description": "The response option configured for this job",
"oneOf": [
{
"description": "Save CSV data chunks to Azure Blob Storage",
"properties": {
"credentialId": {
"description": "Either the populated value of the field or [redacted] due to permission settings",
"oneOf": [
{
"description": "Use the specified credential to access the url",
"type": [
"string",
"null"
]
},
{
"enum": [
"[redacted]"
],
"type": "string"
}
]
},
"format": {
"default": "csv",
"description": "Type of output file format",
"enum": [
"csv",
"parquet"
],
"type": "string",
"x-versionadded": "v2.25"
},
"partitionColumns": {
"description": "For Parquet directory-scoring only. The column names of the intake data of which to partition the dataset. Columns are partitioned in the order they are given. At least one value is required if scoring to a directory (meaning the output url ends with a slash (\"/\").",
"items": {
"type": "string"
},
"maxItems": 100,
"type": "array",
"x-versionadded": "v2.26"
},
"type": {
"description": "Type name for this output type",
"enum": [
"azure"
],
"type": "string"
},
"url": {
"description": "URL for the file or directory",
"format": "url",
"type": "string"
}
},
"required": [
"type",
"url"
],
"type": "object"
},
{
"description": "Save CSV data chunks to Google Storage",
"properties": {
"credentialId": {
"description": "Either the populated value of the field or [redacted] due to permission settings",
"oneOf": [
{
"description": "Use the specified credential to access the url",
"type": [
"string",
"null"
]
},
{
"enum": [
"[redacted]"
],
"type": "string"
}
]
},
"format": {
"default": "csv",
"description": "Type of input file format",
"enum": [
"csv",
"parquet"
],
"type": "string",
"x-versionadded": "v2.25"
},
"partitionColumns": {
"description": "For Parquet directory-scoring only. The column names of the intake data of which to partition the dataset. Columns are partitioned in the order they are given. At least one value is required if scoring to a directory (meaning the output url ends with a slash (\"/\").",
"items": {
"type": "string"
},
"maxItems": 100,
"type": "array",
"x-versionadded": "v2.26"
},
"type": {
"description": "Type name for this output type",
"enum": [
"gcp"
],
"type": "string"
},
"url": {
"description": "URL for the CSV file",
"format": "url",
"type": "string"
}
},
"required": [
"type",
"url"
],
"type": "object"
},
{
"description": "Save CSV data chunks to Google BigQuery in bulk",
"properties": {
"bucket": {
"description": "The name of gcs bucket for data loading",
"type": "string"
},
"credentialId": {
"description": "Either the populated value of the field or [redacted] due to permission settings",
"oneOf": [
{
"description": "The ID of the GCP credentials",
"type": "string"
},
{
"enum": [
"[redacted]"
],
"type": "string"
}
]
},
"dataset": {
"description": "The name of the specified big query dataset to write data back",
"type": "string"
},
"table": {
"description": "The name of the specified big query table to write data back",
"type": "string"
},
"type": {
"description": "Type name for this output type",
"enum": [
"bigquery"
],
"type": "string"
}
},
"required": [
"bucket",
"credentialId",
"dataset",
"table",
"type"
],
"type": "object"
},
{
"description": "Saves CSV data chunks to Amazon Cloud Storage S3",
"properties": {
"credentialId": {
"description": "Either the populated value of the field or [redacted] due to permission settings",
"oneOf": [
{
"description": "Use the specified credential to access the url",
"type": [
"string",
"null"
]
},
{
"enum": [
"[redacted]"
],
"type": "string"
}
]
},
"endpointUrl": {
"description": "Endpoint URL for the S3 connection (omit to use the default)",
"format": "url",
"type": "string",
"x-versionadded": "v2.29"
},
"format": {
"default": "csv",
"description": "Type of output file format",
"enum": [
"csv",
"parquet"
],
"type": "string",
"x-versionadded": "v2.25"
},
"partitionColumns": {
"description": "For Parquet directory-scoring only. The column names of the intake data of which to partition the dataset. Columns are partitioned in the order they are given. At least one value is required if scoring to a directory (meaning the output url ends with a slash (\"/\").",
"items": {
"type": "string"
},
"maxItems": 100,
"type": "array",
"x-versionadded": "v2.26"
},
"serverSideEncryption": {
"description": "Configure Server-Side Encryption for S3 output",
"properties": {
"algorithm": {
"description": "The server-side encryption algorithm used when storing this object in Amazon S3 (for example, AES256, aws:kms).",
"type": "string"
},
"customerAlgorithm": {
"description": "Specifies the algorithm to use to when encrypting the object (for example, AES256).",
"type": "string"
},
"customerKey": {
"description": "Specifies the customer-provided encryption key for Amazon S3 to use in encrypting data. This value is used to store the object and then it is discarded; Amazon S3 does not store the encryption key. The key must be appropriate for use with the algorithm specified in customerAlgorithm. The key must be sent as an base64 encoded string.",
"type": "string"
},
"kmsEncryptionContext": {
"description": "Specifies the Amazon Web Services KMS Encryption Context to use for object encryption. The value of this header is a base64-encoded UTF-8 string holding JSON with the encryption context key-value pairs.",
"type": "string"
},
"kmsKeyId": {
"description": "Specifies the ID of the symmetric customer managed key to use for object encryption.",
"type": "string"
}
},
"type": "object"
},
"type": {
"description": "Type name for this output type",
"enum": [
"s3"
],
"type": "string"
},
"url": {
"description": "URL for the CSV file",
"format": "url",
"type": "string"
}
},
"required": [
"type",
"url"
],
"type": "object"
},
{
"description": "Save CSV data chunks to Snowflake in bulk",
"properties": {
"catalog": {
"description": "The name of the specified database catalog to write output data to.",
"type": "string",
"x-versionadded": "v2.28"
},
"cloudStorageCredentialId": {
"description": "Either the populated value of the field or [redacted] due to permission settings",
"oneOf": [
{
"description": "The ID of the credential holding information about a user with write access to the cloud storage.",
"type": [
"string",
"null"
]
},
{
"enum": [
"[redacted]"
],
"type": "string"
}
]
},
"cloudStorageType": {
"default": "s3",
"description": "Type name for cloud storage",
"enum": [
"azure",
"gcp",
"s3"
],
"type": "string"
},
"createTableIfNotExists": {
"default": false,
"description": "Attempt to create the table first if no existing one is detected, before writing data with the strategy defined in the `statementType` parameter.",
"type": "boolean",
"x-versionadded": "v2.25"
},
"credentialId": {
"description": "Either the populated value of the field or [redacted] due to permission settings",
"oneOf": [
{
"description": "The ID of the credential holding information about a user with write access to the Snowflake data source.",
"type": [
"string",
"null"
]
},
{
"enum": [
"[redacted]"
],
"type": "string"
}
]
},
"dataStoreId": {
"description": "Either the populated value of the field or [redacted] due to permission settings",
"oneOf": [
{
"description": "ID of the data store to connect to",
"type": "string"
},
{
"enum": [
"[redacted]"
],
"type": "string"
}
]
},
"externalStage": {
"description": "External storage",
"type": "string"
},
"schema": {
"description": "The name of the specified database schema to write results to.",
"type": "string"
},
"statementType": {
"description": "The statement type to use when writing the results.",
"enum": [
"insert",
"create_table",
"createTable"
],
"type": "string"
},
"table": {
"description": "The name of the specified database table to write results to.",
"type": "string"
},
"type": {
"description": "Type name for this output type",
"enum": [
"snowflake"
],
"type": "string"
}
},
"required": [
"dataStoreId",
"externalStage",
"statementType",
"table",
"type"
],
"type": "object"
},
{
"description": "Save CSV data chunks to Azure Synapse in bulk",
"properties": {
"cloudStorageCredentialId": {
"description": "Either the populated value of the field or [redacted] due to permission settings",
"oneOf": [
{
"description": "The ID of the credential holding information about a user with write access to the cloud storage.",
"type": [
"string",
"null"
]
},
{
"enum": [
"[redacted]"
],
"type": "string"
}
]
},
"createTableIfNotExists": {
"default": false,
"description": "Attempt to create the table first if no existing one is detected, before writing data with the strategy defined in the `statementType` parameter.",
"type": "boolean",
"x-versionadded": "v2.25"
},
"credentialId": {
"description": "Either the populated value of the field or [redacted] due to permission settings",
"oneOf": [
{
"description": "The ID of the credential holding information about a user with write access to the JDBC data source.",
"type": [
"string",
"null"
]
},
{
"enum": [
"[redacted]"
],
"type": "string"
}
]
},
"dataStoreId": {
"description": "Either the populated value of the field or [redacted] due to permission settings",
"oneOf": [
{
"description": "ID of the data store to connect to",
"type": "string"
},
{
"enum": [
"[redacted]"
],
"type": "string"
}
]
},
"externalDataSource": {
"description": "External data source name",
"type": "string"
},
"schema": {
"description": "The name of the specified database schema to write results to.",
"type": "string"
},
"statementType": {
"description": "The statement type to use when writing the results.",
"enum": [
"insert",
"create_table",
"createTable"
],
"type": "string"
},
"table": {
"description": "The name of the specified database table to write results to.",
"type": "string"
},
"type": {
"description": "Type name for this output type",
"enum": [
"synapse"
],
"type": "string"
}
},
"required": [
"dataStoreId",
"externalDataSource",
"statementType",
"table",
"type"
],
"type": "object"
},
{
"properties": {
"path": {
"description": "Path to results on host filesystem",
"type": "string"
},
"type": {
"description": "Type name for this output type",
"enum": [
"filesystem"
],
"type": "string"
}
},
"required": [
"path",
"type"
],
"type": "object"
},
{
"description": "Save CSV data chunks to HTTP data endpoint",
"properties": {
"headers": {
"description": "Extra headers to send with the request",
"type": "object"
},
"method": {
"description": "Method to use when saving the CSV file",
"enum": [
"POST",
"PUT"
],
"type": "string"
},
"type": {
"description": "Type name for this output type",
"enum": [
"http"
],
"type": "string"
},
"url": {
"description": "URL for the CSV file",
"format": "url",
"type": "string"
}
},
"required": [
"method",
"type",
"url"
],
"type": "object"
},
{
"description": "Save CSV data chunks via JDBC",
"properties": {
"catalog": {
"description": "The name of the specified database catalog to write output data to.",
"type": "string",
"x-versionadded": "v2.22"
},
"commitInterval": {
"default": 600,
"description": "Defines a time interval in seconds between each commit is done to the JDBC source. If set to 0, the batch prediction operation will write the entire job before committing.",
"maximum": 86400,
"minimum": 0,
"type": "integer",
"x-versionadded": "v2.21"
},
"createTableIfNotExists": {
"default": false,
"description": "Attempt to create the table first if no existing one is detected, before writing data with the strategy defined in the `statementType` parameter.",
"type": "boolean",
"x-versionadded": "v2.24"
},
"credentialId": {
"description": "Either the populated value of the field or [redacted] due to permission settings",
"oneOf": [
{
"description": "The ID of the credential holding information about a user with write access to the JDBC data source.",
"type": [
"string",
"null"
]
},
{
"enum": [
"[redacted]"
],
"type": "string"
}
]
},
"dataStoreId": {
"description": "Either the populated value of the field or [redacted] due to permission settings",
"oneOf": [
{
"description": "ID of the data store to connect to",
"type": "string"
},
{
"enum": [
"[redacted]"
],
"type": "string"
}
]
},
"schema": {
"description": "The name of the specified database schema to write the results to.",
"type": "string"
},
"statementType": {
"description": "The statement type to use when writing the results. Deprecation Warning: Use of `create_table` is now discouraged. Use one of the other possibilities along with the parameter `createTableIfNotExists` set to `true`.",
"enum": [
"createTable",
"create_table",
"insert",
"insertUpdate",
"insert_update",
"update"
],
"type": "string"
},
"table": {
"description": "The name of the specified database table to write the results to.If this job is executed with a job definition, then template variables are available which will be substituted for timestamps: {{ current_run_timestamp }}, {{ last_completed_run_time }}, {{ last_scheduled_run_time }}, {{ next_scheduled_run_time }}, {{ current_run_time }}",
"type": "string"
},
"type": {
"description": "Type name for this intake type",
"enum": [
"jdbc"
],
"type": "string"
},
"updateColumns": {
"description": "The column names to be updated if statementType is set to either update or upsert.",
"items": {
"type": "string"
},
"maxItems": 100,
"type": "array"
},
"whereColumns": {
"description": "The column names to be used in the where clause if statementType is set to update or upsert.",
"items": {
"type": "string"
},
"maxItems": 100,
"type": "array"
}
},
"required": [
"dataStoreId",
"statementType",
"table",
"type"
],
"type": "object"
},
{
"description": "Save CSV data chunks to local file storage",
"properties": {
"type": {
"description": "Type name for this output type",
"enum": [
"local_file",
"localFile"
],
"type": "string"
}
},
"required": [
"type"
],
"type": "object"
},
{
"description": "Saves CSV data chunks to Datasphere using browser-datasphere",
"properties": {
"catalog": {
"description": "The name of the specified database catalog to read input data from.",
"type": "string"
},
"credentialId": {
"description": "Either the populated value of the field or [redacted] due to permission settings",
"oneOf": [
{
"description": "The ID of the credential holding information about a user with write access to the data destination.",
"type": "string"
},
{
"enum": [
"[redacted]"
],
"type": "string"
}
]
},
"dataStoreId": {
"description": "Either the populated value of the field or [redacted] due to permission settings",
"oneOf": [
{
"description": "The ID of the data store to connect to",
"type": "string"
},
{
"enum": [
"[redacted]"
],
"type": "string"
}
]
},
"schema": {
"description": "The name of the specified database schema to read input data from.",
"type": "string"
},
"table": {
"description": "The name of the specified database table to read input data from.",
"type": "string"
},
"type": {
"description": "The type name for this output type",
"enum": [
"datasphere"
],
"type": "string"
}
},
"required": [
"credentialId",
"dataStoreId",
"schema",
"table",
"type"
],
"type": "object",
"x-versionadded": "v2.35"
}
]
},
"passthroughColumns": {
"description": "Pass through columns from the original dataset",
"items": {
"description": "A column name from the original dataset to pass through to the resulting predictions",
"maxLength": 50,
"minLength": 1,
"type": "string"
},
"maxItems": 100,
"type": "array"
},
"passthroughColumnsSet": {
"description": "Pass through all columns from the original dataset",
"enum": [
"all"
],
"type": "string"
},
"pinnedModelId": {
"description": "Specify a model ID used for scoring",
"type": "string"
},
"predictionInstance": {
"description": "Override the default prediction instance from the deployment when scoring this job.",
"properties": {
"apiKey": {
"description": "By default, prediction requests will use the API key of the user that created the job. This allows you to make requests on behalf of other users.",
"type": "string"
},
"datarobotKey": {
"description": "If running a job against a prediction instance in the Managed AI Cloud, you must provide the organization level DataRobot-Key.",
"type": "string"
},
"hostName": {
"description": "Override the default host name of the deployment with this.",
"type": "string"
},
"sslEnabled": {
"default": true,
"description": "Use SSL (HTTPS) when communicating with the overriden prediction server.",
"type": "boolean"
}
},
"required": [
"hostName",
"sslEnabled"
],
"type": "object"
},
"predictionThreshold": {
"description": "Threshold is the point that sets the class boundary for a predicted value. The model classifies an observation below the threshold as FALSE, and an observation above the threshold as TRUE. In other words, DataRobot automatically assigns the positive class label to any prediction exceeding the threshold. This value can be set between 0.0 and 1.0.",
"maximum": 1,
"minimum": 0,
"type": "number"
},
"predictionWarningEnabled": {
"description": "Enable prediction warnings.",
"type": [
"boolean",
"null"
]
},
"redactedFields": {
"description": "A list of qualified field names from intake- and/or outputSettings that was redacted due to permissions and sharing settings. For example: intakeSettings.dataStoreId",
"items": {
"description": "Field names that are potentially redacted",
"type": "string"
},
"type": "array",
"x-versionadded": "v2.30"
},
"secondaryDatasetsConfigId": {
"description": "Configuration id for secondary datasets to use when making a prediction.",
"type": "string",
"x-versionadded": "v2.33"
},
"skipDriftTracking": {
"default": false,
"description": "Skip drift tracking for this job.",
"type": "boolean"
},
"thresholdHigh": {
"description": "Compute explanations for predictions above this threshold",
"type": "number"
},
"thresholdLow": {
"description": "Compute explanations for predictions below this threshold",
"type": "number"
},
"timeseriesSettings": {
"description": "Time Series settings included of this job is a Time Series job.",
"oneOf": [
{
"properties": {
"forecastPoint": {
"description": "Used for forecast predictions in order to override the inferred forecast point from the dataset.",
"format": "date-time",
"type": "string"
},
"relaxKnownInAdvanceFeaturesCheck": {
"default": false,
"description": "If activated, missing values in the known in advance features are allowed in the forecast window at prediction time. If omitted or false, missing values are not allowed.",
"type": "boolean"
},
"type": {
"description": "Forecast mode makes predictions using forecastPoint or rows in the dataset without target.",
"enum": [
"forecast"
],
"type": "string"
}
},
"required": [
"type"
],
"type": "object"
},
{
"properties": {
"predictionsEndDate": {
"description": "Used for historical predictions in order to override date to which predictions should be calculated. By default value will be inferred automatically from the dataset.",
"format": "date-time",
"type": "string"
},
"predictionsStartDate": {
"description": "Used for historical predictions in order to override date from which predictions should be calculated. By default value will be inferred automatically from the dataset.",
"format": "date-time",
"type": "string"
},
"relaxKnownInAdvanceFeaturesCheck": {
"default": false,
"description": "If activated, missing values in the known in advance features are allowed in the forecast window at prediction time. If omitted or false, missing values are not allowed.",
"type": "boolean"
},
"type": {
"description": "Historical mode enables bulk predictions which calculates predictions for all possible forecast points and forecast distances in the dataset within the predictionsStartDate/predictionsEndDate range.",
"enum": [
"historical"
],
"type": "string"
}
},
"required": [
"type"
],
"type": "object"
},
{
"properties": {
"relaxKnownInAdvanceFeaturesCheck": {
"default": false,
"description": "If activated, missing values in the known in advance features are allowed in the forecast window at prediction time. If omitted or false, missing values are not allowed.",
"type": "boolean"
},
"type": {
"description": "Forecast mode used for making predictions on subsets of training data.",
"enum": [
"training"
],
"type": "string"
}
},
"required": [
"type"
],
"type": "object"
}
],
"x-versionadded": "v2.20"
}
},
"required": [
"abortOnError",
"csvSettings",
"disableRowLevelErrorHandling",
"includePredictionStatus",
"includeProbabilities",
"includeProbabilitiesClasses",
"intakeSettings",
"maxExplanations",
"redactedFields",
"skipDriftTracking"
],
"type": "object"
},
"links": {
"description": "Links useful for this job",
"properties": {
"csvUpload": {
"description": "The URL used to upload the dataset for this job. Only available for localFile intake.",
"format": "url",
"type": "string"
},
"download": {
"description": "The URL used to download the results from this job. Only available for localFile outputs. Will be null if the download is not yet available.",
"type": [
"string",
"null"
]
},
"self": {
"description": "The URL used access this job.",
"format": "url",
"type": "string"
}
},
"required": [
"self"
],
"type": "object"
},
"logs": {
"description": "The job log.",
"items": {
"description": "A log line from the job log.",
"type": "string"
},
"type": "array"
},
"monitoringBatchId": {
"description": "Id of the monitoring batch created by this job. Only present if the job runs on a deployment with batch monitoring enabled.",
"type": [
"string",
"null"
],
"x-versionadded": "v2.33"
},
"percentageCompleted": {
"description": "Indicates job progress which is based on number of already processed rows in dataset",
"maximum": 100,
"minimum": 0,
"type": "number"
},
"queuePosition": {
"description": "To ensure a dedicated prediction instance is not overloaded, only one job will be run against it at a time. This is the number of jobs that are awaiting processing before this job start running. May not be available in all environments.",
"minimum": 0,
"type": [
"integer",
"null"
],
"x-versionadded": "v2.21"
},
"queued": {
"description": "The job has been put on the queue for execution.",
"type": "boolean",
"x-versionadded": "v2.26"
},
"resultsDeleted": {
"description": "Indicates if the job was subject to garbage collection and had its artifacts deleted (output files, if any, and scoring data on local storage)",
"type": "boolean",
"x-versionadded": "v2.24"
},
"scoredRows": {
"description": "Number of rows that have been used in prediction computation",
"minimum": 0,
"type": "integer"
},
"skippedRows": {
"description": "Number of rows that have been skipped during scoring. May contain non-zero value only in time-series predictions case if provided dataset contains more than required historical rows.",
"minimum": 0,
"type": "integer",
"x-versionadded": "v2.20"
},
"source": {
"description": "Source from which batch job was started",
"type": "string",
"x-versionadded": "v2.24"
},
"status": {
"description": "The current job status",
"enum": [
"INITIALIZING",
"RUNNING",
"COMPLETED",
"ABORTED",
"FAILED"
],
"type": "string"
},
"statusDetails": {
"description": "Explanation for current status",
"type": "string"
}
},
"required": [
"created",
"createdBy",
"elapsedTimeSec",
"failedRows",
"id",
"jobIntakeSize",
"jobOutputSize",
"jobSpec",
"links",
"logs",
"monitoringBatchId",
"percentageCompleted",
"queued",
"scoredRows",
"skippedRows",
"status",
"statusDetails"
],
"type": "object"
}
Responses
To perform this operation, you must be authenticated by means of one of the following methods:
BearerAuth
DELETE /api/v2/batchPredictions/{predictionJobId}/
If the job is running, it will be aborted. Then it will be removed, meaning all underlying data will be deleted and the job is removed from the list of jobs.
Parameters
Name |
In |
Type |
Required |
Description |
predictionJobId |
path |
string |
true |
ID of the Batch Prediction job |
partNumber |
path |
integer |
true |
The number of which csv part is being uploaded when using multipart upload |
Responses
Status |
Meaning |
Description |
Schema |
202 |
Accepted |
Job cancelled |
None |
404 |
Not Found |
Job does not exist or was not submitted to the queue. |
None |
409 |
Conflict |
Job cannot be aborted for some reason. Possible reasons: job is already aborted or completed. |
None |
To perform this operation, you must be authenticated by means of one of the following methods:
BearerAuth
GET /api/v2/batchPredictions/{predictionJobId}/
Retrieve a Batch Prediction job.
Parameters
Name |
In |
Type |
Required |
Description |
predictionJobId |
path |
string |
true |
ID of the Batch Prediction job |
partNumber |
path |
integer |
true |
The number of which csv part is being uploaded when using multipart upload |
Example responses
200 Response
{
"properties": {
"batchPredictionJobDefinition": {
"description": "The Batch Prediction Job Definition linking to this job, if any.",
"properties": {
"createdBy": {
"description": "The ID of creator of this job definition",
"type": "string"
},
"id": {
"description": "The ID of the Batch Prediction job definition",
"type": "string"
},
"name": {
"description": "A human-readable name for the definition, must be unique across organisations",
"type": "string"
}
},
"required": [
"createdBy",
"id",
"name"
],
"type": "object"
},
"created": {
"description": "When was this job created",
"format": "date-time",
"type": "string",
"x-versionadded": "v2.21"
},
"createdBy": {
"description": "Who created this job",
"properties": {
"fullName": {
"description": "The full name of the user who created this job (if defined by the user)",
"type": [
"string",
"null"
]
},
"userId": {
"description": "The User ID of the user who created this job",
"type": "string"
},
"username": {
"description": "The username (e-mail address) of the user who created this job",
"type": "string"
}
},
"required": [
"fullName",
"userId",
"username"
],
"type": "object"
},
"elapsedTimeSec": {
"description": "Number of seconds the job has been processing for",
"minimum": 0,
"type": "integer"
},
"failedRows": {
"description": "Number of rows that have failed scoring",
"minimum": 0,
"type": "integer"
},
"hidden": {
"description": "When was this job was hidden last, blank if visible",
"format": "date-time",
"type": "string",
"x-versionadded": "v2.24"
},
"id": {
"description": "The ID of the Batch Prediction job",
"type": "string",
"x-versionadded": "v2.21"
},
"intakeDatasetDisplayName": {
"description": "If applicable (e.g. for AI catalog), will contain the dataset name used for the intake dataset.",
"type": [
"string",
"null"
],
"x-versionadded": "v2.23"
},
"jobIntakeSize": {
"description": "Number of bytes in the intake dataset for this job",
"minimum": 0,
"type": [
"integer",
"null"
]
},
"jobOutputSize": {
"description": "Number of bytes in the output dataset for this job",
"minimum": 0,
"type": [
"integer",
"null"
]
},
"jobSpec": {
"description": "The job configuration used to create this job",
"properties": {
"abortOnError": {
"default": true,
"description": "Should this job abort if too many errors are encountered",
"type": "boolean"
},
"batchJobType": {
"default": "prediction",
"description": "Batch job type.",
"enum": [
"monitoring",
"prediction"
],
"type": "string",
"x-versionadded": "v2.35"
},
"chunkSize": {
"default": "auto",
"description": "Which strategy should be used to determine the chunk size. Can be either a named strategy or a fixed size in bytes.",
"oneOf": [
{
"enum": [
"auto",
"fixed",
"dynamic"
],
"type": "string"
},
{
"maximum": 41943040,
"minimum": 20,
"type": "integer"
}
]
},
"columnNamesRemapping": {
"description": "Remap (rename or remove columns from) the output from this job",
"oneOf": [
{
"description": "Provide a dictionary with key/value pairs to remap (deprecated)",
"type": "object"
},
{
"description": "Provide a list of items to remap",
"items": {
"properties": {
"inputName": {
"description": "Rename column with this name",
"type": "string"
},
"outputName": {
"description": "Rename column to this name (leave as null to remove from the output)",
"type": [
"string",
"null"
]
}
},
"required": [
"inputName",
"outputName"
],
"type": "object"
},
"maxItems": 1000,
"type": "array"
}
]
},
"csvSettings": {
"description": "The CSV settings used for this job",
"properties": {
"delimiter": {
"default": ",",
"description": "CSV fields are delimited by this character. Use the string \"tab\" to denote TSV (TAB separated values).",
"oneOf": [
{
"enum": [
"tab"
],
"type": "string"
},
{
"maxLength": 1,
"minLength": 1,
"type": "string"
}
]
},
"encoding": {
"default": "utf-8",
"description": "The encoding to be used for intake and output. For example (but not limited to): \"shift_jis\", \"latin_1\" or \"mskanji\".",
"type": "string"
},
"quotechar": {
"default": "\"",
"description": "Fields containing the delimiter or newlines must be quoted using this character.",
"maxLength": 1,
"minLength": 1,
"type": "string"
}
},
"required": [
"delimiter",
"encoding",
"quotechar"
],
"type": "object"
},
"deploymentId": {
"description": "ID of deployment which is used in job for processing predictions dataset",
"type": "string"
},
"disableRowLevelErrorHandling": {
"default": false,
"description": "Skip row by row error handling",
"type": "boolean"
},
"explanationAlgorithm": {
"description": "Which algorithm will be used to calculate prediction explanations",
"enum": [
"shap",
"xemp"
],
"type": "string"
},
"explanationClassNames": {
"description": "List of class names that will be explained for each row for multiclass. Mutually exclusive with explanationNumTopClasses. If neither specified - we assume explanationNumTopClasses=1",
"items": {
"description": "Class name to explain",
"type": "string"
},
"maxItems": 10,
"minItems": 1,
"type": "array",
"x-versionadded": "v2.29"
},
"explanationNumTopClasses": {
"description": "Number of top predicted classes for each row that will be explained for multiclass. Mutually exclusive with explanationClassNames. If neither specified - we assume explanationNumTopClasses=1",
"maximum": 10,
"minimum": 1,
"type": "integer",
"x-versionadded": "v2.29"
},
"includePredictionStatus": {
"default": false,
"description": "Include prediction status column in the output",
"type": "boolean"
},
"includeProbabilities": {
"default": true,
"description": "Include probabilities for all classes",
"type": "boolean"
},
"includeProbabilitiesClasses": {
"default": [],
"description": "Include only probabilities for these specific class names.",
"items": {
"description": "Include probability for this class name",
"type": "string"
},
"maxItems": 100,
"type": "array"
},
"intakeSettings": {
"default": {
"type": "localFile"
},
"description": "The response option configured for this job",
"oneOf": [
{
"description": "Stream CSV data chunks from Azure",
"properties": {
"credentialId": {
"description": "Either the populated value of the field or [redacted] due to permission settings",
"oneOf": [
{
"description": "Use the specified credential to access the url",
"type": [
"string",
"null"
]
},
{
"enum": [
"[redacted]"
],
"type": "string"
}
]
},
"format": {
"default": "csv",
"description": "Type of input file format",
"enum": [
"csv",
"parquet"
],
"type": "string",
"x-versionadded": "v2.25"
},
"type": {
"description": "Type name for this intake type",
"enum": [
"azure"
],
"type": "string"
},
"url": {
"description": "URL for the CSV file",
"format": "url",
"type": "string"
}
},
"required": [
"type",
"url"
],
"type": "object"
},
{
"description": "Stream CSV data chunks from data stage storage",
"properties": {
"dataStageId": {
"description": "The ID of the data stage",
"type": "string"
},
"type": {
"description": "Type name for this intake type",
"enum": [
"dataStage"
],
"type": "string"
}
},
"required": [
"dataStageId",
"type"
],
"type": "object"
},
{
"description": "Stream CSV data chunks from AI catalog dataset",
"properties": {
"datasetId": {
"description": "Either the populated value of the field or [redacted] due to permission settings",
"oneOf": [
{
"description": "The ID of the AI catalog dataset",
"type": "string"
},
{
"enum": [
"[redacted]"
],
"type": "string"
}
]
},
"datasetVersionId": {
"description": "The ID of the AI catalog dataset version",
"type": "string"
},
"type": {
"description": "Type name for this intake type",
"enum": [
"dataset"
],
"type": "string"
}
},
"required": [
"datasetId",
"type"
],
"type": "object"
},
{
"description": "Stream CSV data chunks from Google Storage",
"properties": {
"credentialId": {
"description": "Either the populated value of the field or [redacted] due to permission settings",
"oneOf": [
{
"description": "Use the specified credential to access the url",
"type": [
"string",
"null"
]
},
{
"enum": [
"[redacted]"
],
"type": "string"
}
]
},
"format": {
"default": "csv",
"description": "Type of input file format",
"enum": [
"csv",
"parquet"
],
"type": "string",
"x-versionadded": "v2.25"
},
"type": {
"description": "Type name for this intake type",
"enum": [
"gcp"
],
"type": "string"
},
"url": {
"description": "URL for the CSV file",
"format": "url",
"type": "string"
}
},
"required": [
"type",
"url"
],
"type": "object"
},
{
"description": "Stream CSV data chunks from Big Query using GCS",
"properties": {
"bucket": {
"description": "The name of gcs bucket for data export",
"type": "string"
},
"credentialId": {
"description": "Either the populated value of the field or [redacted] due to permission settings",
"oneOf": [
{
"description": "The ID of the GCP credentials",
"type": "string"
},
{
"enum": [
"[redacted]"
],
"type": "string"
}
]
},
"dataset": {
"description": "The name of the specified big query dataset to read input data from",
"type": "string"
},
"table": {
"description": "The name of the specified big query table to read input data from",
"type": "string"
},
"type": {
"description": "Type name for this intake type",
"enum": [
"bigquery"
],
"type": "string"
}
},
"required": [
"bucket",
"credentialId",
"dataset",
"table",
"type"
],
"type": "object"
},
{
"description": "Stream CSV data chunks from Amazon Cloud Storage S3",
"properties": {
"credentialId": {
"description": "Either the populated value of the field or [redacted] due to permission settings",
"oneOf": [
{
"description": "Use the specified credential to access the url",
"type": [
"string",
"null"
]
},
{
"enum": [
"[redacted]"
],
"type": "string"
}
]
},
"endpointUrl": {
"description": "Endpoint URL for the S3 connection (omit to use the default)",
"format": "url",
"type": "string",
"x-versionadded": "v2.29"
},
"format": {
"default": "csv",
"description": "Type of input file format",
"enum": [
"csv",
"parquet"
],
"type": "string",
"x-versionadded": "v2.25"
},
"type": {
"description": "Type name for this intake type",
"enum": [
"s3"
],
"type": "string"
},
"url": {
"description": "URL for the CSV file",
"format": "url",
"type": "string"
}
},
"required": [
"type",
"url"
],
"type": "object"
},
{
"description": "Stream CSV data chunks from Snowflake",
"properties": {
"catalog": {
"description": "The name of the specified database catalog to read input data from.",
"type": "string",
"x-versionadded": "v2.28"
},
"cloudStorageCredentialId": {
"description": "Either the populated value of the field or [redacted] due to permission settings",
"oneOf": [
{
"description": "The ID of the credential holding information about a user with read access to the cloud storage.",
"type": [
"string",
"null"
]
},
{
"enum": [
"[redacted]"
],
"type": "string"
}
]
},
"cloudStorageType": {
"default": "s3",
"description": "Type name for cloud storage",
"enum": [
"azure",
"gcp",
"s3"
],
"type": "string"
},
"credentialId": {
"description": "Either the populated value of the field or [redacted] due to permission settings",
"oneOf": [
{
"description": "The ID of the credential holding information about a user with read access to the Snowflake data source.",
"type": [
"string",
"null"
]
},
{
"enum": [
"[redacted]"
],
"type": "string"
}
]
},
"dataStoreId": {
"description": "Either the populated value of the field or [redacted] due to permission settings",
"oneOf": [
{
"description": "ID of the data store to connect to",
"type": "string"
},
{
"enum": [
"[redacted]"
],
"type": "string"
}
]
},
"externalStage": {
"description": "External storage",
"type": "string"
},
"query": {
"description": "A self-supplied SELECT statement of the dataset you wish to score. Helpful for supplying a more fine-grained selection of data not achievable through specification of \"table\" and/or \"schema\" parameters exclusively.If this job is executed with a job definition, then template variables are available which will be substituted for timestamps: {{ current_run_timestamp }}, {{ last_completed_run_time }}, {{ last_scheduled_run_time }}, {{ next_scheduled_run_time }}, {{ current_run_time }}",
"type": "string"
},
"schema": {
"description": "The name of the specified database schema to read input data from.",
"type": "string"
},
"table": {
"description": "The name of the specified database table to read input data from.",
"type": "string"
},
"type": {
"description": "Type name for this intake type",
"enum": [
"snowflake"
],
"type": "string"
}
},
"required": [
"dataStoreId",
"externalStage",
"type"
],
"type": "object"
},
{
"description": "Stream CSV data chunks from Azure Synapse",
"properties": {
"cloudStorageCredentialId": {
"description": "Either the populated value of the field or [redacted] due to permission settings",
"oneOf": [
{
"description": "The ID of the Azure credential holding information about a user with read access to the cloud storage.",
"type": [
"string",
"null"
]
},
{
"enum": [
"[redacted]"
],
"type": "string"
}
]
},
"credentialId": {
"description": "Either the populated value of the field or [redacted] due to permission settings",
"oneOf": [
{
"description": "The ID of the credential holding information about a user with read access to the JDBC data source.",
"type": [
"string",
"null"
]
},
{
"enum": [
"[redacted]"
],
"type": "string"
}
]
},
"dataStoreId": {
"description": "Either the populated value of the field or [redacted] due to permission settings",
"oneOf": [
{
"description": "ID of the data store to connect to",
"type": "string"
},
{
"enum": [
"[redacted]"
],
"type": "string"
}
]
},
"externalDataSource": {
"description": "External datasource name",
"type": "string"
},
"query": {
"description": "A self-supplied SELECT statement of the dataset you wish to score. Helpful for supplying a more fine-grained selection of data not achievable through specification of \"table\" and/or \"schema\" parameters exclusively.If this job is executed with a job definition, then template variables are available which will be substituted for timestamps: {{ current_run_timestamp }}, {{ last_completed_run_time }}, {{ last_scheduled_run_time }}, {{ next_scheduled_run_time }}, {{ current_run_time }}",
"type": "string"
},
"schema": {
"description": "The name of the specified database schema to read input data from.",
"type": "string"
},
"table": {
"description": "The name of the specified database table to read input data from.",
"type": "string"
},
"type": {
"description": "Type name for this intake type",
"enum": [
"synapse"
],
"type": "string"
}
},
"required": [
"dataStoreId",
"externalDataSource",
"type"
],
"type": "object"
},
{
"description": "Stream CSV data chunks from DSS dataset",
"properties": {
"datasetId": {
"description": "Either the populated value of the field or [redacted] due to permission settings",
"oneOf": [
{
"description": "The ID of the dataset",
"type": "string"
},
{
"enum": [
"[redacted]"
],
"type": "string"
}
]
},
"partition": {
"default": null,
"description": "Partition used to predict",
"enum": [
"holdout",
"validation",
"allBacktests",
null
],
"type": [
"string",
"null"
]
},
"projectId": {
"description": "The ID of the project",
"type": "string"
},
"type": {
"description": "Type name for this intake type",
"enum": [
"dss"
],
"type": "string"
}
},
"required": [
"projectId",
"type"
],
"type": "object"
},
{
"properties": {
"path": {
"description": "Path to data on host filesystem",
"type": "string"
},
"type": {
"description": "Type name for this intake type",
"enum": [
"filesystem"
],
"type": "string"
}
},
"required": [
"path",
"type"
],
"type": "object"
},
{
"description": "Stream CSV data chunks from HTTP",
"properties": {
"type": {
"description": "Type name for this intake type",
"enum": [
"http"
],
"type": "string"
},
"url": {
"description": "URL for the CSV file",
"format": "url",
"type": "string"
}
},
"required": [
"type",
"url"
],
"type": "object"
},
{
"description": "Stream CSV data chunks from JDBC",
"properties": {
"catalog": {
"description": "The name of the specified database catalog to read input data from.",
"type": "string",
"x-versionadded": "v2.22"
},
"credentialId": {
"description": "Either the populated value of the field or [redacted] due to permission settings",
"oneOf": [
{
"description": "The ID of the credential holding information about a user with read access to the JDBC data source.",
"type": [
"string",
"null"
]
},
{
"enum": [
"[redacted]"
],
"type": "string"
}
]
},
"dataStoreId": {
"description": "Either the populated value of the field or [redacted] due to permission settings",
"oneOf": [
{
"description": "ID of the data store to connect to",
"type": "string"
},
{
"enum": [
"[redacted]"
],
"type": "string"
}
]
},
"fetchSize": {
"description": "A user specified fetch size. Changing it can be used to balance throughput and memory usage. Deprecated and ignored since v2.21.",
"maximum": 1000000,
"minimum": 1,
"type": "integer"
},
"query": {
"description": "A self-supplied SELECT statement of the dataset you wish to score. Helpful for supplying a more fine-grained selection of data not achievable through specification of \"table\" and/or \"schema\" parameters exclusively.If this job is executed with a job definition, then template variables are available which will be substituted for timestamps: {{ current_run_timestamp }}, {{ last_completed_run_time }}, {{ last_scheduled_run_time }}, {{ next_scheduled_run_time }}, {{ current_run_time }}",
"type": "string"
},
"schema": {
"description": "The name of the specified database schema to read input data from.",
"type": "string"
},
"table": {
"description": "The name of the specified database table to read input data from.",
"type": "string"
},
"type": {
"description": "Type name for this intake type",
"enum": [
"jdbc"
],
"type": "string"
}
},
"required": [
"dataStoreId",
"type"
],
"type": "object"
},
{
"description": "Stream CSV data chunks from local file storage",
"properties": {
"async": {
"description": "The default behavior (async: true) will still submit the job to the queue and start processing as soon as the upload is started.Setting it to false will postpone submitting the job to the queue until all data has been uploaded.This is helpful if the user is on a bad connection and bottlednecked by the upload speed. Instead of blocking the queue this will allow others to submit to the queue until the upload has finished.",
"type": [
"boolean",
"null"
],
"x-versionadded": "v2.28"
},
"multipart": {
"description": "specify if the data will be uploaded in multiple parts instead of a single file",
"type": "boolean",
"x-versionadded": "v2.27"
},
"type": {
"description": "Type name for this intake type",
"enum": [
"local_file",
"localFile"
],
"type": "string"
}
},
"required": [
"type"
],
"type": "object"
},
{
"description": "Stream CSV data chunks from Datasphere using browser-datasphere",
"properties": {
"catalog": {
"description": "The name of the specified database catalog to read input data from.",
"type": "string"
},
"credentialId": {
"description": "Either the populated value of the field or [redacted] due to permission settings",
"oneOf": [
{
"description": "The ID of the credential holding information about a user with read access to the data source.",
"type": "string"
},
{
"enum": [
"[redacted]"
],
"type": "string"
}
]
},
"dataStoreId": {
"description": "Either the populated value of the field or [redacted] due to permission settings",
"oneOf": [
{
"description": "ID of the data store to connect to",
"type": "string"
},
{
"enum": [
"[redacted]"
],
"type": "string"
}
]
},
"schema": {
"description": "The name of the specified database schema to read input data from.",
"type": "string"
},
"table": {
"description": "The name of the specified database table to read input data from.",
"type": "string"
},
"type": {
"description": "Type name for this intake type",
"enum": [
"datasphere"
],
"type": "string"
}
},
"required": [
"credentialId",
"dataStoreId",
"schema",
"table",
"type"
],
"type": "object",
"x-versionadded": "v2.35"
}
]
},
"maxExplanations": {
"default": 0,
"description": "Number of explanations requested. Will be ordered by strength.",
"maximum": 100,
"minimum": 0,
"type": "integer"
},
"modelId": {
"description": "ID of leaderboard model which is used in job for processing predictions dataset",
"type": "string",
"x-versionadded": "v2.28"
},
"modelPackageId": {
"description": "ID of model package from registry is used in job for processing predictions dataset",
"type": "string",
"x-versionadded": "v2.28"
},
"monitoringBatchPrefix": {
"description": "Name of the batch to create with this job",
"type": [
"string",
"null"
]
},
"numConcurrent": {
"description": "Number of simultaneous requests to run against the prediction instance",
"minimum": 1,
"type": "integer"
},
"outputSettings": {
"description": "The response option configured for this job",
"oneOf": [
{
"description": "Save CSV data chunks to Azure Blob Storage",
"properties": {
"credentialId": {
"description": "Either the populated value of the field or [redacted] due to permission settings",
"oneOf": [
{
"description": "Use the specified credential to access the url",
"type": [
"string",
"null"
]
},
{
"enum": [
"[redacted]"
],
"type": "string"
}
]
},
"format": {
"default": "csv",
"description": "Type of output file format",
"enum": [
"csv",
"parquet"
],
"type": "string",
"x-versionadded": "v2.25"
},
"partitionColumns": {
"description": "For Parquet directory-scoring only. The column names of the intake data of which to partition the dataset. Columns are partitioned in the order they are given. At least one value is required if scoring to a directory (meaning the output url ends with a slash (\"/\").",
"items": {
"type": "string"
},
"maxItems": 100,
"type": "array",
"x-versionadded": "v2.26"
},
"type": {
"description": "Type name for this output type",
"enum": [
"azure"
],
"type": "string"
},
"url": {
"description": "URL for the file or directory",
"format": "url",
"type": "string"
}
},
"required": [
"type",
"url"
],
"type": "object"
},
{
"description": "Save CSV data chunks to Google Storage",
"properties": {
"credentialId": {
"description": "Either the populated value of the field or [redacted] due to permission settings",
"oneOf": [
{
"description": "Use the specified credential to access the url",
"type": [
"string",
"null"
]
},
{
"enum": [
"[redacted]"
],
"type": "string"
}
]
},
"format": {
"default": "csv",
"description": "Type of input file format",
"enum": [
"csv",
"parquet"
],
"type": "string",
"x-versionadded": "v2.25"
},
"partitionColumns": {
"description": "For Parquet directory-scoring only. The column names of the intake data of which to partition the dataset. Columns are partitioned in the order they are given. At least one value is required if scoring to a directory (meaning the output url ends with a slash (\"/\").",
"items": {
"type": "string"
},
"maxItems": 100,
"type": "array",
"x-versionadded": "v2.26"
},
"type": {
"description": "Type name for this output type",
"enum": [
"gcp"
],
"type": "string"
},
"url": {
"description": "URL for the CSV file",
"format": "url",
"type": "string"
}
},
"required": [
"type",
"url"
],
"type": "object"
},
{
"description": "Save CSV data chunks to Google BigQuery in bulk",
"properties": {
"bucket": {
"description": "The name of gcs bucket for data loading",
"type": "string"
},
"credentialId": {
"description": "Either the populated value of the field or [redacted] due to permission settings",
"oneOf": [
{
"description": "The ID of the GCP credentials",
"type": "string"
},
{
"enum": [
"[redacted]"
],
"type": "string"
}
]
},
"dataset": {
"description": "The name of the specified big query dataset to write data back",
"type": "string"
},
"table": {
"description": "The name of the specified big query table to write data back",
"type": "string"
},
"type": {
"description": "Type name for this output type",
"enum": [
"bigquery"
],
"type": "string"
}
},
"required": [
"bucket",
"credentialId",
"dataset",
"table",
"type"
],
"type": "object"
},
{
"description": "Saves CSV data chunks to Amazon Cloud Storage S3",
"properties": {
"credentialId": {
"description": "Either the populated value of the field or [redacted] due to permission settings",
"oneOf": [
{
"description": "Use the specified credential to access the url",
"type": [
"string",
"null"
]
},
{
"enum": [
"[redacted]"
],
"type": "string"
}
]
},
"endpointUrl": {
"description": "Endpoint URL for the S3 connection (omit to use the default)",
"format": "url",
"type": "string",
"x-versionadded": "v2.29"
},
"format": {
"default": "csv",
"description": "Type of output file format",
"enum": [
"csv",
"parquet"
],
"type": "string",
"x-versionadded": "v2.25"
},
"partitionColumns": {
"description": "For Parquet directory-scoring only. The column names of the intake data of which to partition the dataset. Columns are partitioned in the order they are given. At least one value is required if scoring to a directory (meaning the output url ends with a slash (\"/\").",
"items": {
"type": "string"
},
"maxItems": 100,
"type": "array",
"x-versionadded": "v2.26"
},
"serverSideEncryption": {
"description": "Configure Server-Side Encryption for S3 output",
"properties": {
"algorithm": {
"description": "The server-side encryption algorithm used when storing this object in Amazon S3 (for example, AES256, aws:kms).",
"type": "string"
},
"customerAlgorithm": {
"description": "Specifies the algorithm to use to when encrypting the object (for example, AES256).",
"type": "string"
},
"customerKey": {
"description": "Specifies the customer-provided encryption key for Amazon S3 to use in encrypting data. This value is used to store the object and then it is discarded; Amazon S3 does not store the encryption key. The key must be appropriate for use with the algorithm specified in customerAlgorithm. The key must be sent as an base64 encoded string.",
"type": "string"
},
"kmsEncryptionContext": {
"description": "Specifies the Amazon Web Services KMS Encryption Context to use for object encryption. The value of this header is a base64-encoded UTF-8 string holding JSON with the encryption context key-value pairs.",
"type": "string"
},
"kmsKeyId": {
"description": "Specifies the ID of the symmetric customer managed key to use for object encryption.",
"type": "string"
}
},
"type": "object"
},
"type": {
"description": "Type name for this output type",
"enum": [
"s3"
],
"type": "string"
},
"url": {
"description": "URL for the CSV file",
"format": "url",
"type": "string"
}
},
"required": [
"type",
"url"
],
"type": "object"
},
{
"description": "Save CSV data chunks to Snowflake in bulk",
"properties": {
"catalog": {
"description": "The name of the specified database catalog to write output data to.",
"type": "string",
"x-versionadded": "v2.28"
},
"cloudStorageCredentialId": {
"description": "Either the populated value of the field or [redacted] due to permission settings",
"oneOf": [
{
"description": "The ID of the credential holding information about a user with write access to the cloud storage.",
"type": [
"string",
"null"
]
},
{
"enum": [
"[redacted]"
],
"type": "string"
}
]
},
"cloudStorageType": {
"default": "s3",
"description": "Type name for cloud storage",
"enum": [
"azure",
"gcp",
"s3"
],
"type": "string"
},
"createTableIfNotExists": {
"default": false,
"description": "Attempt to create the table first if no existing one is detected, before writing data with the strategy defined in the `statementType` parameter.",
"type": "boolean",
"x-versionadded": "v2.25"
},
"credentialId": {
"description": "Either the populated value of the field or [redacted] due to permission settings",
"oneOf": [
{
"description": "The ID of the credential holding information about a user with write access to the Snowflake data source.",
"type": [
"string",
"null"
]
},
{
"enum": [
"[redacted]"
],
"type": "string"
}
]
},
"dataStoreId": {
"description": "Either the populated value of the field or [redacted] due to permission settings",
"oneOf": [
{
"description": "ID of the data store to connect to",
"type": "string"
},
{
"enum": [
"[redacted]"
],
"type": "string"
}
]
},
"externalStage": {
"description": "External storage",
"type": "string"
},
"schema": {
"description": "The name of the specified database schema to write results to.",
"type": "string"
},
"statementType": {
"description": "The statement type to use when writing the results.",
"enum": [
"insert",
"create_table",
"createTable"
],
"type": "string"
},
"table": {
"description": "The name of the specified database table to write results to.",
"type": "string"
},
"type": {
"description": "Type name for this output type",
"enum": [
"snowflake"
],
"type": "string"
}
},
"required": [
"dataStoreId",
"externalStage",
"statementType",
"table",
"type"
],
"type": "object"
},
{
"description": "Save CSV data chunks to Azure Synapse in bulk",
"properties": {
"cloudStorageCredentialId": {
"description": "Either the populated value of the field or [redacted] due to permission settings",
"oneOf": [
{
"description": "The ID of the credential holding information about a user with write access to the cloud storage.",
"type": [
"string",
"null"
]
},
{
"enum": [
"[redacted]"
],
"type": "string"
}
]
},
"createTableIfNotExists": {
"default": false,
"description": "Attempt to create the table first if no existing one is detected, before writing data with the strategy defined in the `statementType` parameter.",
"type": "boolean",
"x-versionadded": "v2.25"
},
"credentialId": {
"description": "Either the populated value of the field or [redacted] due to permission settings",
"oneOf": [
{
"description": "The ID of the credential holding information about a user with write access to the JDBC data source.",
"type": [
"string",
"null"
]
},
{
"enum": [
"[redacted]"
],
"type": "string"
}
]
},
"dataStoreId": {
"description": "Either the populated value of the field or [redacted] due to permission settings",
"oneOf": [
{
"description": "ID of the data store to connect to",
"type": "string"
},
{
"enum": [
"[redacted]"
],
"type": "string"
}
]
},
"externalDataSource": {
"description": "External data source name",
"type": "string"
},
"schema": {
"description": "The name of the specified database schema to write results to.",
"type": "string"
},
"statementType": {
"description": "The statement type to use when writing the results.",
"enum": [
"insert",
"create_table",
"createTable"
],
"type": "string"
},
"table": {
"description": "The name of the specified database table to write results to.",
"type": "string"
},
"type": {
"description": "Type name for this output type",
"enum": [
"synapse"
],
"type": "string"
}
},
"required": [
"dataStoreId",
"externalDataSource",
"statementType",
"table",
"type"
],
"type": "object"
},
{
"properties": {
"path": {
"description": "Path to results on host filesystem",
"type": "string"
},
"type": {
"description": "Type name for this output type",
"enum": [
"filesystem"
],
"type": "string"
}
},
"required": [
"path",
"type"
],
"type": "object"
},
{
"description": "Save CSV data chunks to HTTP data endpoint",
"properties": {
"headers": {
"description": "Extra headers to send with the request",
"type": "object"
},
"method": {
"description": "Method to use when saving the CSV file",
"enum": [
"POST",
"PUT"
],
"type": "string"
},
"type": {
"description": "Type name for this output type",
"enum": [
"http"
],
"type": "string"
},
"url": {
"description": "URL for the CSV file",
"format": "url",
"type": "string"
}
},
"required": [
"method",
"type",
"url"
],
"type": "object"
},
{
"description": "Save CSV data chunks via JDBC",
"properties": {
"catalog": {
"description": "The name of the specified database catalog to write output data to.",
"type": "string",
"x-versionadded": "v2.22"
},
"commitInterval": {
"default": 600,
"description": "Defines a time interval in seconds between each commit is done to the JDBC source. If set to 0, the batch prediction operation will write the entire job before committing.",
"maximum": 86400,
"minimum": 0,
"type": "integer",
"x-versionadded": "v2.21"
},
"createTableIfNotExists": {
"default": false,
"description": "Attempt to create the table first if no existing one is detected, before writing data with the strategy defined in the `statementType` parameter.",
"type": "boolean",
"x-versionadded": "v2.24"
},
"credentialId": {
"description": "Either the populated value of the field or [redacted] due to permission settings",
"oneOf": [
{
"description": "The ID of the credential holding information about a user with write access to the JDBC data source.",
"type": [
"string",
"null"
]
},
{
"enum": [
"[redacted]"
],
"type": "string"
}
]
},
"dataStoreId": {
"description": "Either the populated value of the field or [redacted] due to permission settings",
"oneOf": [
{
"description": "ID of the data store to connect to",
"type": "string"
},
{
"enum": [
"[redacted]"
],
"type": "string"
}
]
},
"schema": {
"description": "The name of the specified database schema to write the results to.",
"type": "string"
},
"statementType": {
"description": "The statement type to use when writing the results. Deprecation Warning: Use of `create_table` is now discouraged. Use one of the other possibilities along with the parameter `createTableIfNotExists` set to `true`.",
"enum": [
"createTable",
"create_table",
"insert",
"insertUpdate",
"insert_update",
"update"
],
"type": "string"
},
"table": {
"description": "The name of the specified database table to write the results to.If this job is executed with a job definition, then template variables are available which will be substituted for timestamps: {{ current_run_timestamp }}, {{ last_completed_run_time }}, {{ last_scheduled_run_time }}, {{ next_scheduled_run_time }}, {{ current_run_time }}",
"type": "string"
},
"type": {
"description": "Type name for this intake type",
"enum": [
"jdbc"
],
"type": "string"
},
"updateColumns": {
"description": "The column names to be updated if statementType is set to either update or upsert.",
"items": {
"type": "string"
},
"maxItems": 100,
"type": "array"
},
"whereColumns": {
"description": "The column names to be used in the where clause if statementType is set to update or upsert.",
"items": {
"type": "string"
},
"maxItems": 100,
"type": "array"
}
},
"required": [
"dataStoreId",
"statementType",
"table",
"type"
],
"type": "object"
},
{
"description": "Save CSV data chunks to local file storage",
"properties": {
"type": {
"description": "Type name for this output type",
"enum": [
"local_file",
"localFile"
],
"type": "string"
}
},
"required": [
"type"
],
"type": "object"
},
{
"description": "Saves CSV data chunks to Datasphere using browser-datasphere",
"properties": {
"catalog": {
"description": "The name of the specified database catalog to read input data from.",
"type": "string"
},
"credentialId": {
"description": "Either the populated value of the field or [redacted] due to permission settings",
"oneOf": [
{
"description": "The ID of the credential holding information about a user with write access to the data destination.",
"type": "string"
},
{
"enum": [
"[redacted]"
],
"type": "string"
}
]
},
"dataStoreId": {
"description": "Either the populated value of the field or [redacted] due to permission settings",
"oneOf": [
{
"description": "The ID of the data store to connect to",
"type": "string"
},
{
"enum": [
"[redacted]"
],
"type": "string"
}
]
},
"schema": {
"description": "The name of the specified database schema to read input data from.",
"type": "string"
},
"table": {
"description": "The name of the specified database table to read input data from.",
"type": "string"
},
"type": {
"description": "The type name for this output type",
"enum": [
"datasphere"
],
"type": "string"
}
},
"required": [
"credentialId",
"dataStoreId",
"schema",
"table",
"type"
],
"type": "object",
"x-versionadded": "v2.35"
}
]
},
"passthroughColumns": {
"description": "Pass through columns from the original dataset",
"items": {
"description": "A column name from the original dataset to pass through to the resulting predictions",
"maxLength": 50,
"minLength": 1,
"type": "string"
},
"maxItems": 100,
"type": "array"
},
"passthroughColumnsSet": {
"description": "Pass through all columns from the original dataset",
"enum": [
"all"
],
"type": "string"
},
"pinnedModelId": {
"description": "Specify a model ID used for scoring",
"type": "string"
},
"predictionInstance": {
"description": "Override the default prediction instance from the deployment when scoring this job.",
"properties": {
"apiKey": {
"description": "By default, prediction requests will use the API key of the user that created the job. This allows you to make requests on behalf of other users.",
"type": "string"
},
"datarobotKey": {
"description": "If running a job against a prediction instance in the Managed AI Cloud, you must provide the organization level DataRobot-Key.",
"type": "string"
},
"hostName": {
"description": "Override the default host name of the deployment with this.",
"type": "string"
},
"sslEnabled": {
"default": true,
"description": "Use SSL (HTTPS) when communicating with the overriden prediction server.",
"type": "boolean"
}
},
"required": [
"hostName",
"sslEnabled"
],
"type": "object"
},
"predictionThreshold": {
"description": "Threshold is the point that sets the class boundary for a predicted value. The model classifies an observation below the threshold as FALSE, and an observation above the threshold as TRUE. In other words, DataRobot automatically assigns the positive class label to any prediction exceeding the threshold. This value can be set between 0.0 and 1.0.",
"maximum": 1,
"minimum": 0,
"type": "number"
},
"predictionWarningEnabled": {
"description": "Enable prediction warnings.",
"type": [
"boolean",
"null"
]
},
"redactedFields": {
"description": "A list of qualified field names from intake- and/or outputSettings that was redacted due to permissions and sharing settings. For example: intakeSettings.dataStoreId",
"items": {
"description": "Field names that are potentially redacted",
"type": "string"
},
"type": "array",
"x-versionadded": "v2.30"
},
"secondaryDatasetsConfigId": {
"description": "Configuration id for secondary datasets to use when making a prediction.",
"type": "string",
"x-versionadded": "v2.33"
},
"skipDriftTracking": {
"default": false,
"description": "Skip drift tracking for this job.",
"type": "boolean"
},
"thresholdHigh": {
"description": "Compute explanations for predictions above this threshold",
"type": "number"
},
"thresholdLow": {
"description": "Compute explanations for predictions below this threshold",
"type": "number"
},
"timeseriesSettings": {
"description": "Time Series settings included of this job is a Time Series job.",
"oneOf": [
{
"properties": {
"forecastPoint": {
"description": "Used for forecast predictions in order to override the inferred forecast point from the dataset.",
"format": "date-time",
"type": "string"
},
"relaxKnownInAdvanceFeaturesCheck": {
"default": false,
"description": "If activated, missing values in the known in advance features are allowed in the forecast window at prediction time. If omitted or false, missing values are not allowed.",
"type": "boolean"
},
"type": {
"description": "Forecast mode makes predictions using forecastPoint or rows in the dataset without target.",
"enum": [
"forecast"
],
"type": "string"
}
},
"required": [
"type"
],
"type": "object"
},
{
"properties": {
"predictionsEndDate": {
"description": "Used for historical predictions in order to override date to which predictions should be calculated. By default value will be inferred automatically from the dataset.",
"format": "date-time",
"type": "string"
},
"predictionsStartDate": {
"description": "Used for historical predictions in order to override date from which predictions should be calculated. By default value will be inferred automatically from the dataset.",
"format": "date-time",
"type": "string"
},
"relaxKnownInAdvanceFeaturesCheck": {
"default": false,
"description": "If activated, missing values in the known in advance features are allowed in the forecast window at prediction time. If omitted or false, missing values are not allowed.",
"type": "boolean"
},
"type": {
"description": "Historical mode enables bulk predictions which calculates predictions for all possible forecast points and forecast distances in the dataset within the predictionsStartDate/predictionsEndDate range.",
"enum": [
"historical"
],
"type": "string"
}
},
"required": [
"type"
],
"type": "object"
},
{
"properties": {
"relaxKnownInAdvanceFeaturesCheck": {
"default": false,
"description": "If activated, missing values in the known in advance features are allowed in the forecast window at prediction time. If omitted or false, missing values are not allowed.",
"type": "boolean"
},
"type": {
"description": "Forecast mode used for making predictions on subsets of training data.",
"enum": [
"training"
],
"type": "string"
}
},
"required": [
"type"
],
"type": "object"
}
],
"x-versionadded": "v2.20"
}
},
"required": [
"abortOnError",
"csvSettings",
"disableRowLevelErrorHandling",
"includePredictionStatus",
"includeProbabilities",
"includeProbabilitiesClasses",
"intakeSettings",
"maxExplanations",
"redactedFields",
"skipDriftTracking"
],
"type": "object"
},
"links": {
"description": "Links useful for this job",
"properties": {
"csvUpload": {
"description": "The URL used to upload the dataset for this job. Only available for localFile intake.",
"format": "url",
"type": "string"
},
"download": {
"description": "The URL used to download the results from this job. Only available for localFile outputs. Will be null if the download is not yet available.",
"type": [
"string",
"null"
]
},
"self": {
"description": "The URL used access this job.",
"format": "url",
"type": "string"
}
},
"required": [
"self"
],
"type": "object"
},
"logs": {
"description": "The job log.",
"items": {
"description": "A log line from the job log.",
"type": "string"
},
"type": "array"
},
"monitoringBatchId": {
"description": "Id of the monitoring batch created by this job. Only present if the job runs on a deployment with batch monitoring enabled.",
"type": [
"string",
"null"
],
"x-versionadded": "v2.33"
},
"percentageCompleted": {
"description": "Indicates job progress which is based on number of already processed rows in dataset",
"maximum": 100,
"minimum": 0,
"type": "number"
},
"queuePosition": {
"description": "To ensure a dedicated prediction instance is not overloaded, only one job will be run against it at a time. This is the number of jobs that are awaiting processing before this job start running. May not be available in all environments.",
"minimum": 0,
"type": [
"integer",
"null"
],
"x-versionadded": "v2.21"
},
"queued": {
"description": "The job has been put on the queue for execution.",
"type": "boolean",
"x-versionadded": "v2.26"
},
"resultsDeleted": {
"description": "Indicates if the job was subject to garbage collection and had its artifacts deleted (output files, if any, and scoring data on local storage)",
"type": "boolean",
"x-versionadded": "v2.24"
},
"scoredRows": {
"description": "Number of rows that have been used in prediction computation",
"minimum": 0,
"type": "integer"
},
"skippedRows": {
"description": "Number of rows that have been skipped during scoring. May contain non-zero value only in time-series predictions case if provided dataset contains more than required historical rows.",
"minimum": 0,
"type": "integer",
"x-versionadded": "v2.20"
},
"source": {
"description": "Source from which batch job was started",
"type": "string",
"x-versionadded": "v2.24"
},
"status": {
"description": "The current job status",
"enum": [
"INITIALIZING",
"RUNNING",
"COMPLETED",
"ABORTED",
"FAILED"
],
"type": "string"
},
"statusDetails": {
"description": "Explanation for current status",
"type": "string"
}
},
"required": [
"created",
"createdBy",
"elapsedTimeSec",
"failedRows",
"id",
"jobIntakeSize",
"jobOutputSize",
"jobSpec",
"links",
"logs",
"monitoringBatchId",
"percentageCompleted",
"queued",
"scoredRows",
"skippedRows",
"status",
"statusDetails"
],
"type": "object"
}
Responses
To perform this operation, you must be authenticated by means of one of the following methods:
BearerAuth
PATCH /api/v2/batchPredictions/{predictionJobId}/
If a job has finished execution regardless of the result, it can have parameters changed to ensure better filtering in the job list upon retrieval. Another case: updating job scoring status externally.
Body parameter
{
"properties": {
"aborted": {
"description": "Time when job abortion happened",
"format": "date-time",
"type": [
"string",
"null"
],
"x-versionadded": "v2.26"
},
"completed": {
"description": "Time when job completed scoring",
"format": "date-time",
"type": [
"string",
"null"
],
"x-versionadded": "v2.26"
},
"failedRows": {
"description": "Number of rows that have failed scoring",
"type": "integer",
"x-versionadded": "v2.26"
},
"hidden": {
"description": "Hides or unhides the job from the job list",
"type": "boolean",
"x-versionadded": "v2.24"
},
"jobIntakeSize": {
"description": "Number of bytes in the intake dataset for this job",
"type": [
"integer",
"null"
],
"x-versionadded": "v2.26"
},
"jobOutputSize": {
"description": "Number of bytes in the output dataset for this job",
"type": [
"integer",
"null"
],
"x-versionadded": "v2.26"
},
"logs": {
"description": "The job log.",
"items": {
"description": "A log line from the job log.",
"type": "string"
},
"type": "array",
"x-versionadded": "v2.26"
},
"scoredRows": {
"description": "Number of rows that have been used in prediction computation",
"type": "integer",
"x-versionadded": "v2.26"
},
"skippedRows": {
"description": "Number of rows that have been skipped during scoring. May contain non-zero value only in time-series predictions case if provided dataset contains more than required historical rows.",
"type": "integer",
"x-versionadded": "v2.26"
},
"started": {
"description": "Time when job scoring begin",
"format": "date-time",
"type": [
"string",
"null"
],
"x-versionadded": "v2.26"
},
"status": {
"description": "The current job status",
"enum": [
"INITIALIZING",
"RUNNING",
"COMPLETED",
"ABORTED",
"FAILED"
],
"type": "string",
"x-versionadded": "v2.26"
}
},
"type": "object"
}
Parameters
Name |
In |
Type |
Required |
Description |
predictionJobId |
path |
string |
true |
ID of the Batch Prediction job |
partNumber |
path |
integer |
true |
The number of which csv part is being uploaded when using multipart upload |
body |
body |
BatchPredictionJobUpdate |
false |
none |
Example responses
200 Response
{
"properties": {
"batchPredictionJobDefinition": {
"description": "The Batch Prediction Job Definition linking to this job, if any.",
"properties": {
"createdBy": {
"description": "The ID of creator of this job definition",
"type": "string"
},
"id": {
"description": "The ID of the Batch Prediction job definition",
"type": "string"
},
"name": {
"description": "A human-readable name for the definition, must be unique across organisations",
"type": "string"
}
},
"required": [
"createdBy",
"id",
"name"
],
"type": "object"
},
"created": {
"description": "When was this job created",
"format": "date-time",
"type": "string",
"x-versionadded": "v2.21"
},
"createdBy": {
"description": "Who created this job",
"properties": {
"fullName": {
"description": "The full name of the user who created this job (if defined by the user)",
"type": [
"string",
"null"
]
},
"userId": {
"description": "The User ID of the user who created this job",
"type": "string"
},
"username": {
"description": "The username (e-mail address) of the user who created this job",
"type": "string"
}
},
"required": [
"fullName",
"userId",
"username"
],
"type": "object"
},
"elapsedTimeSec": {
"description": "Number of seconds the job has been processing for",
"minimum": 0,
"type": "integer"
},
"failedRows": {
"description": "Number of rows that have failed scoring",
"minimum": 0,
"type": "integer"
},
"hidden": {
"description": "When was this job was hidden last, blank if visible",
"format": "date-time",
"type": "string",
"x-versionadded": "v2.24"
},
"id": {
"description": "The ID of the Batch Prediction job",
"type": "string",
"x-versionadded": "v2.21"
},
"intakeDatasetDisplayName": {
"description": "If applicable (e.g. for AI catalog), will contain the dataset name used for the intake dataset.",
"type": [
"string",
"null"
],
"x-versionadded": "v2.23"
},
"jobIntakeSize": {
"description": "Number of bytes in the intake dataset for this job",
"minimum": 0,
"type": [
"integer",
"null"
]
},
"jobOutputSize": {
"description": "Number of bytes in the output dataset for this job",
"minimum": 0,
"type": [
"integer",
"null"
]
},
"jobSpec": {
"description": "The job configuration used to create this job",
"properties": {
"abortOnError": {
"default": true,
"description": "Should this job abort if too many errors are encountered",
"type": "boolean"
},
"batchJobType": {
"default": "prediction",
"description": "Batch job type.",
"enum": [
"monitoring",
"prediction"
],
"type": "string",
"x-versionadded": "v2.35"
},
"chunkSize": {
"default": "auto",
"description": "Which strategy should be used to determine the chunk size. Can be either a named strategy or a fixed size in bytes.",
"oneOf": [
{
"enum": [
"auto",
"fixed",
"dynamic"
],
"type": "string"
},
{
"maximum": 41943040,
"minimum": 20,
"type": "integer"
}
]
},
"columnNamesRemapping": {
"description": "Remap (rename or remove columns from) the output from this job",
"oneOf": [
{
"description": "Provide a dictionary with key/value pairs to remap (deprecated)",
"type": "object"
},
{
"description": "Provide a list of items to remap",
"items": {
"properties": {
"inputName": {
"description": "Rename column with this name",
"type": "string"
},
"outputName": {
"description": "Rename column to this name (leave as null to remove from the output)",
"type": [
"string",
"null"
]
}
},
"required": [
"inputName",
"outputName"
],
"type": "object"
},
"maxItems": 1000,
"type": "array"
}
]
},
"csvSettings": {
"description": "The CSV settings used for this job",
"properties": {
"delimiter": {
"default": ",",
"description": "CSV fields are delimited by this character. Use the string \"tab\" to denote TSV (TAB separated values).",
"oneOf": [
{
"enum": [
"tab"
],
"type": "string"
},
{
"maxLength": 1,
"minLength": 1,
"type": "string"
}
]
},
"encoding": {
"default": "utf-8",
"description": "The encoding to be used for intake and output. For example (but not limited to): \"shift_jis\", \"latin_1\" or \"mskanji\".",
"type": "string"
},
"quotechar": {
"default": "\"",
"description": "Fields containing the delimiter or newlines must be quoted using this character.",
"maxLength": 1,
"minLength": 1,
"type": "string"
}
},
"required": [
"delimiter",
"encoding",
"quotechar"
],
"type": "object"
},
"deploymentId": {
"description": "ID of deployment which is used in job for processing predictions dataset",
"type": "string"
},
"disableRowLevelErrorHandling": {
"default": false,
"description": "Skip row by row error handling",
"type": "boolean"
},
"explanationAlgorithm": {
"description": "Which algorithm will be used to calculate prediction explanations",
"enum": [
"shap",
"xemp"
],
"type": "string"
},
"explanationClassNames": {
"description": "List of class names that will be explained for each row for multiclass. Mutually exclusive with explanationNumTopClasses. If neither specified - we assume explanationNumTopClasses=1",
"items": {
"description": "Class name to explain",
"type": "string"
},
"maxItems": 10,
"minItems": 1,
"type": "array",
"x-versionadded": "v2.29"
},
"explanationNumTopClasses": {
"description": "Number of top predicted classes for each row that will be explained for multiclass. Mutually exclusive with explanationClassNames. If neither specified - we assume explanationNumTopClasses=1",
"maximum": 10,
"minimum": 1,
"type": "integer",
"x-versionadded": "v2.29"
},
"includePredictionStatus": {
"default": false,
"description": "Include prediction status column in the output",
"type": "boolean"
},
"includeProbabilities": {
"default": true,
"description": "Include probabilities for all classes",
"type": "boolean"
},
"includeProbabilitiesClasses": {
"default": [],
"description": "Include only probabilities for these specific class names.",
"items": {
"description": "Include probability for this class name",
"type": "string"
},
"maxItems": 100,
"type": "array"
},
"intakeSettings": {
"default": {
"type": "localFile"
},
"description": "The response option configured for this job",
"oneOf": [
{
"description": "Stream CSV data chunks from Azure",
"properties": {
"credentialId": {
"description": "Either the populated value of the field or [redacted] due to permission settings",
"oneOf": [
{
"description": "Use the specified credential to access the url",
"type": [
"string",
"null"
]
},
{
"enum": [
"[redacted]"
],
"type": "string"
}
]
},
"format": {
"default": "csv",
"description": "Type of input file format",
"enum": [
"csv",
"parquet"
],
"type": "string",
"x-versionadded": "v2.25"
},
"type": {
"description": "Type name for this intake type",
"enum": [
"azure"
],
"type": "string"
},
"url": {
"description": "URL for the CSV file",
"format": "url",
"type": "string"
}
},
"required": [
"type",
"url"
],
"type": "object"
},
{
"description": "Stream CSV data chunks from data stage storage",
"properties": {
"dataStageId": {
"description": "The ID of the data stage",
"type": "string"
},
"type": {
"description": "Type name for this intake type",
"enum": [
"dataStage"
],
"type": "string"
}
},
"required": [
"dataStageId",
"type"
],
"type": "object"
},
{
"description": "Stream CSV data chunks from AI catalog dataset",
"properties": {
"datasetId": {
"description": "Either the populated value of the field or [redacted] due to permission settings",
"oneOf": [
{
"description": "The ID of the AI catalog dataset",
"type": "string"
},
{
"enum": [
"[redacted]"
],
"type": "string"
}
]
},
"datasetVersionId": {
"description": "The ID of the AI catalog dataset version",
"type": "string"
},
"type": {
"description": "Type name for this intake type",
"enum": [
"dataset"
],
"type": "string"
}
},
"required": [
"datasetId",
"type"
],
"type": "object"
},
{
"description": "Stream CSV data chunks from Google Storage",
"properties": {
"credentialId": {
"description": "Either the populated value of the field or [redacted] due to permission settings",
"oneOf": [
{
"description": "Use the specified credential to access the url",
"type": [
"string",
"null"
]
},
{
"enum": [
"[redacted]"
],
"type": "string"
}
]
},
"format": {
"default": "csv",
"description": "Type of input file format",
"enum": [
"csv",
"parquet"
],
"type": "string",
"x-versionadded": "v2.25"
},
"type": {
"description": "Type name for this intake type",
"enum": [
"gcp"
],
"type": "string"
},
"url": {
"description": "URL for the CSV file",
"format": "url",
"type": "string"
}
},
"required": [
"type",
"url"
],
"type": "object"
},
{
"description": "Stream CSV data chunks from Big Query using GCS",
"properties": {
"bucket": {
"description": "The name of gcs bucket for data export",
"type": "string"
},
"credentialId": {
"description": "Either the populated value of the field or [redacted] due to permission settings",
"oneOf": [
{
"description": "The ID of the GCP credentials",
"type": "string"
},
{
"enum": [
"[redacted]"
],
"type": "string"
}
]
},
"dataset": {
"description": "The name of the specified big query dataset to read input data from",
"type": "string"
},
"table": {
"description": "The name of the specified big query table to read input data from",
"type": "string"
},
"type": {
"description": "Type name for this intake type",
"enum": [
"bigquery"
],
"type": "string"
}
},
"required": [
"bucket",
"credentialId",
"dataset",
"table",
"type"
],
"type": "object"
},
{
"description": "Stream CSV data chunks from Amazon Cloud Storage S3",
"properties": {
"credentialId": {
"description": "Either the populated value of the field or [redacted] due to permission settings",
"oneOf": [
{
"description": "Use the specified credential to access the url",
"type": [
"string",
"null"
]
},
{
"enum": [
"[redacted]"
],
"type": "string"
}
]
},
"endpointUrl": {
"description": "Endpoint URL for the S3 connection (omit to use the default)",
"format": "url",
"type": "string",
"x-versionadded": "v2.29"
},
"format": {
"default": "csv",
"description": "Type of input file format",
"enum": [
"csv",
"parquet"
],
"type": "string",
"x-versionadded": "v2.25"
},
"type": {
"description": "Type name for this intake type",
"enum": [
"s3"
],
"type": "string"
},
"url": {
"description": "URL for the CSV file",
"format": "url",
"type": "string"
}
},
"required": [
"type",
"url"
],
"type": "object"
},
{
"description": "Stream CSV data chunks from Snowflake",
"properties": {
"catalog": {
"description": "The name of the specified database catalog to read input data from.",
"type": "string",
"x-versionadded": "v2.28"
},
"cloudStorageCredentialId": {
"description": "Either the populated value of the field or [redacted] due to permission settings",
"oneOf": [
{
"description": "The ID of the credential holding information about a user with read access to the cloud storage.",
"type": [
"string",
"null"
]
},
{
"enum": [
"[redacted]"
],
"type": "string"
}
]
},
"cloudStorageType": {
"default": "s3",
"description": "Type name for cloud storage",
"enum": [
"azure",
"gcp",
"s3"
],
"type": "string"
},
"credentialId": {
"description": "Either the populated value of the field or [redacted] due to permission settings",
"oneOf": [
{
"description": "The ID of the credential holding information about a user with read access to the Snowflake data source.",
"type": [
"string",
"null"
]
},
{
"enum": [
"[redacted]"
],
"type": "string"
}
]
},
"dataStoreId": {
"description": "Either the populated value of the field or [redacted] due to permission settings",
"oneOf": [
{
"description": "ID of the data store to connect to",
"type": "string"
},
{
"enum": [
"[redacted]"
],
"type": "string"
}
]
},
"externalStage": {
"description": "External storage",
"type": "string"
},
"query": {
"description": "A self-supplied SELECT statement of the dataset you wish to score. Helpful for supplying a more fine-grained selection of data not achievable through specification of \"table\" and/or \"schema\" parameters exclusively.If this job is executed with a job definition, then template variables are available which will be substituted for timestamps: {{ current_run_timestamp }}, {{ last_completed_run_time }}, {{ last_scheduled_run_time }}, {{ next_scheduled_run_time }}, {{ current_run_time }}",
"type": "string"
},
"schema": {
"description": "The name of the specified database schema to read input data from.",
"type": "string"
},
"table": {
"description": "The name of the specified database table to read input data from.",
"type": "string"
},
"type": {
"description": "Type name for this intake type",
"enum": [
"snowflake"
],
"type": "string"
}
},
"required": [
"dataStoreId",
"externalStage",
"type"
],
"type": "object"
},
{
"description": "Stream CSV data chunks from Azure Synapse",
"properties": {
"cloudStorageCredentialId": {
"description": "Either the populated value of the field or [redacted] due to permission settings",
"oneOf": [
{
"description": "The ID of the Azure credential holding information about a user with read access to the cloud storage.",
"type": [
"string",
"null"
]
},
{
"enum": [
"[redacted]"
],
"type": "string"
}
]
},
"credentialId": {
"description": "Either the populated value of the field or [redacted] due to permission settings",
"oneOf": [
{
"description": "The ID of the credential holding information about a user with read access to the JDBC data source.",
"type": [
"string",
"null"
]
},
{
"enum": [
"[redacted]"
],
"type": "string"
}
]
},
"dataStoreId": {
"description": "Either the populated value of the field or [redacted] due to permission settings",
"oneOf": [
{
"description": "ID of the data store to connect to",
"type": "string"
},
{
"enum": [
"[redacted]"
],
"type": "string"
}
]
},
"externalDataSource": {
"description": "External datasource name",
"type": "string"
},
"query": {
"description": "A self-supplied SELECT statement of the dataset you wish to score. Helpful for supplying a more fine-grained selection of data not achievable through specification of \"table\" and/or \"schema\" parameters exclusively.If this job is executed with a job definition, then template variables are available which will be substituted for timestamps: {{ current_run_timestamp }}, {{ last_completed_run_time }}, {{ last_scheduled_run_time }}, {{ next_scheduled_run_time }}, {{ current_run_time }}",
"type": "string"
},
"schema": {
"description": "The name of the specified database schema to read input data from.",
"type": "string"
},
"table": {
"description": "The name of the specified database table to read input data from.",
"type": "string"
},
"type": {
"description": "Type name for this intake type",
"enum": [
"synapse"
],
"type": "string"
}
},
"required": [
"dataStoreId",
"externalDataSource",
"type"
],
"type": "object"
},
{
"description": "Stream CSV data chunks from DSS dataset",
"properties": {
"datasetId": {
"description": "Either the populated value of the field or [redacted] due to permission settings",
"oneOf": [
{
"description": "The ID of the dataset",
"type": "string"
},
{
"enum": [
"[redacted]"
],
"type": "string"
}
]
},
"partition": {
"default": null,
"description": "Partition used to predict",
"enum": [
"holdout",
"validation",
"allBacktests",
null
],
"type": [
"string",
"null"
]
},
"projectId": {
"description": "The ID of the project",
"type": "string"
},
"type": {
"description": "Type name for this intake type",
"enum": [
"dss"
],
"type": "string"
}
},
"required": [
"projectId",
"type"
],
"type": "object"
},
{
"properties": {
"path": {
"description": "Path to data on host filesystem",
"type": "string"
},
"type": {
"description": "Type name for this intake type",
"enum": [
"filesystem"
],
"type": "string"
}
},
"required": [
"path",
"type"
],
"type": "object"
},
{
"description": "Stream CSV data chunks from HTTP",
"properties": {
"type": {
"description": "Type name for this intake type",
"enum": [
"http"
],
"type": "string"
},
"url": {
"description": "URL for the CSV file",
"format": "url",
"type": "string"
}
},
"required": [
"type",
"url"
],
"type": "object"
},
{
"description": "Stream CSV data chunks from JDBC",
"properties": {
"catalog": {
"description": "The name of the specified database catalog to read input data from.",
"type": "string",
"x-versionadded": "v2.22"
},
"credentialId": {
"description": "Either the populated value of the field or [redacted] due to permission settings",
"oneOf": [
{
"description": "The ID of the credential holding information about a user with read access to the JDBC data source.",
"type": [
"string",
"null"
]
},
{
"enum": [
"[redacted]"
],
"type": "string"
}
]
},
"dataStoreId": {
"description": "Either the populated value of the field or [redacted] due to permission settings",
"oneOf": [
{
"description": "ID of the data store to connect to",
"type": "string"
},
{
"enum": [
"[redacted]"
],
"type": "string"
}
]
},
"fetchSize": {
"description": "A user specified fetch size. Changing it can be used to balance throughput and memory usage. Deprecated and ignored since v2.21.",
"maximum": 1000000,
"minimum": 1,
"type": "integer"
},
"query": {
"description": "A self-supplied SELECT statement of the dataset you wish to score. Helpful for supplying a more fine-grained selection of data not achievable through specification of \"table\" and/or \"schema\" parameters exclusively.If this job is executed with a job definition, then template variables are available which will be substituted for timestamps: {{ current_run_timestamp }}, {{ last_completed_run_time }}, {{ last_scheduled_run_time }}, {{ next_scheduled_run_time }}, {{ current_run_time }}",
"type": "string"
},
"schema": {
"description": "The name of the specified database schema to read input data from.",
"type": "string"
},
"table": {
"description": "The name of the specified database table to read input data from.",
"type": "string"
},
"type": {
"description": "Type name for this intake type",
"enum": [
"jdbc"
],
"type": "string"
}
},
"required": [
"dataStoreId",
"type"
],
"type": "object"
},
{
"description": "Stream CSV data chunks from local file storage",
"properties": {
"async": {
"description": "The default behavior (async: true) will still submit the job to the queue and start processing as soon as the upload is started.Setting it to false will postpone submitting the job to the queue until all data has been uploaded.This is helpful if the user is on a bad connection and bottlednecked by the upload speed. Instead of blocking the queue this will allow others to submit to the queue until the upload has finished.",
"type": [
"boolean",
"null"
],
"x-versionadded": "v2.28"
},
"multipart": {
"description": "specify if the data will be uploaded in multiple parts instead of a single file",
"type": "boolean",
"x-versionadded": "v2.27"
},
"type": {
"description": "Type name for this intake type",
"enum": [
"local_file",
"localFile"
],
"type": "string"
}
},
"required": [
"type"
],
"type": "object"
},
{
"description": "Stream CSV data chunks from Datasphere using browser-datasphere",
"properties": {
"catalog": {
"description": "The name of the specified database catalog to read input data from.",
"type": "string"
},
"credentialId": {
"description": "Either the populated value of the field or [redacted] due to permission settings",
"oneOf": [
{
"description": "The ID of the credential holding information about a user with read access to the data source.",
"type": "string"
},
{
"enum": [
"[redacted]"
],
"type": "string"
}
]
},
"dataStoreId": {
"description": "Either the populated value of the field or [redacted] due to permission settings",
"oneOf": [
{
"description": "ID of the data store to connect to",
"type": "string"
},
{
"enum": [
"[redacted]"
],
"type": "string"
}
]
},
"schema": {
"description": "The name of the specified database schema to read input data from.",
"type": "string"
},
"table": {
"description": "The name of the specified database table to read input data from.",
"type": "string"
},
"type": {
"description": "Type name for this intake type",
"enum": [
"datasphere"
],
"type": "string"
}
},
"required": [
"credentialId",
"dataStoreId",
"schema",
"table",
"type"
],
"type": "object",
"x-versionadded": "v2.35"
}
]
},
"maxExplanations": {
"default": 0,
"description": "Number of explanations requested. Will be ordered by strength.",
"maximum": 100,
"minimum": 0,
"type": "integer"
},
"modelId": {
"description": "ID of leaderboard model which is used in job for processing predictions dataset",
"type": "string",
"x-versionadded": "v2.28"
},
"modelPackageId": {
"description": "ID of model package from registry is used in job for processing predictions dataset",
"type": "string",
"x-versionadded": "v2.28"
},
"monitoringBatchPrefix": {
"description": "Name of the batch to create with this job",
"type": [
"string",
"null"
]
},
"numConcurrent": {
"description": "Number of simultaneous requests to run against the prediction instance",
"minimum": 1,
"type": "integer"
},
"outputSettings": {
"description": "The response option configured for this job",
"oneOf": [
{
"description": "Save CSV data chunks to Azure Blob Storage",
"properties": {
"credentialId": {
"description": "Either the populated value of the field or [redacted] due to permission settings",
"oneOf": [
{
"description": "Use the specified credential to access the url",
"type": [
"string",
"null"
]
},
{
"enum": [
"[redacted]"
],
"type": "string"
}
]
},
"format": {
"default": "csv",
"description": "Type of output file format",
"enum": [
"csv",
"parquet"
],
"type": "string",
"x-versionadded": "v2.25"
},
"partitionColumns": {
"description": "For Parquet directory-scoring only. The column names of the intake data of which to partition the dataset. Columns are partitioned in the order they are given. At least one value is required if scoring to a directory (meaning the output url ends with a slash (\"/\").",
"items": {
"type": "string"
},
"maxItems": 100,
"type": "array",
"x-versionadded": "v2.26"
},
"type": {
"description": "Type name for this output type",
"enum": [
"azure"
],
"type": "string"
},
"url": {
"description": "URL for the file or directory",
"format": "url",
"type": "string"
}
},
"required": [
"type",
"url"
],
"type": "object"
},
{
"description": "Save CSV data chunks to Google Storage",
"properties": {
"credentialId": {
"description": "Either the populated value of the field or [redacted] due to permission settings",
"oneOf": [
{
"description": "Use the specified credential to access the url",
"type": [
"string",
"null"
]
},
{
"enum": [
"[redacted]"
],
"type": "string"
}
]
},
"format": {
"default": "csv",
"description": "Type of input file format",
"enum": [
"csv",
"parquet"
],
"type": "string",
"x-versionadded": "v2.25"
},
"partitionColumns": {
"description": "For Parquet directory-scoring only. The column names of the intake data of which to partition the dataset. Columns are partitioned in the order they are given. At least one value is required if scoring to a directory (meaning the output url ends with a slash (\"/\").",
"items": {
"type": "string"
},
"maxItems": 100,
"type": "array",
"x-versionadded": "v2.26"
},
"type": {
"description": "Type name for this output type",
"enum": [
"gcp"
],
"type": "string"
},
"url": {
"description": "URL for the CSV file",
"format": "url",
"type": "string"
}
},
"required": [
"type",
"url"
],
"type": "object"
},
{
"description": "Save CSV data chunks to Google BigQuery in bulk",
"properties": {
"bucket": {
"description": "The name of gcs bucket for data loading",
"type": "string"
},
"credentialId": {
"description": "Either the populated value of the field or [redacted] due to permission settings",
"oneOf": [
{
"description": "The ID of the GCP credentials",
"type": "string"
},
{
"enum": [
"[redacted]"
],
"type": "string"
}
]
},
"dataset": {
"description": "The name of the specified big query dataset to write data back",
"type": "string"
},
"table": {
"description": "The name of the specified big query table to write data back",
"type": "string"
},
"type": {
"description": "Type name for this output type",
"enum": [
"bigquery"
],
"type": "string"
}
},
"required": [
"bucket",
"credentialId",
"dataset",
"table",
"type"
],
"type": "object"
},
{
"description": "Saves CSV data chunks to Amazon Cloud Storage S3",
"properties": {
"credentialId": {
"description": "Either the populated value of the field or [redacted] due to permission settings",
"oneOf": [
{
"description": "Use the specified credential to access the url",
"type": [
"string",
"null"
]
},
{
"enum": [
"[redacted]"
],
"type": "string"
}
]
},
"endpointUrl": {
"description": "Endpoint URL for the S3 connection (omit to use the default)",
"format": "url",
"type": "string",
"x-versionadded": "v2.29"
},
"format": {
"default": "csv",
"description": "Type of output file format",
"enum": [
"csv",
"parquet"
],
"type": "string",
"x-versionadded": "v2.25"
},
"partitionColumns": {
"description": "For Parquet directory-scoring only. The column names of the intake data of which to partition the dataset. Columns are partitioned in the order they are given. At least one value is required if scoring to a directory (meaning the output url ends with a slash (\"/\").",
"items": {
"type": "string"
},
"maxItems": 100,
"type": "array",
"x-versionadded": "v2.26"
},
"serverSideEncryption": {
"description": "Configure Server-Side Encryption for S3 output",
"properties": {
"algorithm": {
"description": "The server-side encryption algorithm used when storing this object in Amazon S3 (for example, AES256, aws:kms).",
"type": "string"
},
"customerAlgorithm": {
"description": "Specifies the algorithm to use to when encrypting the object (for example, AES256).",
"type": "string"
},
"customerKey": {
"description": "Specifies the customer-provided encryption key for Amazon S3 to use in encrypting data. This value is used to store the object and then it is discarded; Amazon S3 does not store the encryption key. The key must be appropriate for use with the algorithm specified in customerAlgorithm. The key must be sent as an base64 encoded string.",
"type": "string"
},
"kmsEncryptionContext": {
"description": "Specifies the Amazon Web Services KMS Encryption Context to use for object encryption. The value of this header is a base64-encoded UTF-8 string holding JSON with the encryption context key-value pairs.",
"type": "string"
},
"kmsKeyId": {
"description": "Specifies the ID of the symmetric customer managed key to use for object encryption.",
"type": "string"
}
},
"type": "object"
},
"type": {
"description": "Type name for this output type",
"enum": [
"s3"
],
"type": "string"
},
"url": {
"description": "URL for the CSV file",
"format": "url",
"type": "string"
}
},
"required": [
"type",
"url"
],
"type": "object"
},
{
"description": "Save CSV data chunks to Snowflake in bulk",
"properties": {
"catalog": {
"description": "The name of the specified database catalog to write output data to.",
"type": "string",
"x-versionadded": "v2.28"
},
"cloudStorageCredentialId": {
"description": "Either the populated value of the field or [redacted] due to permission settings",
"oneOf": [
{
"description": "The ID of the credential holding information about a user with write access to the cloud storage.",
"type": [
"string",
"null"
]
},
{
"enum": [
"[redacted]"
],
"type": "string"
}
]
},
"cloudStorageType": {
"default": "s3",
"description": "Type name for cloud storage",
"enum": [
"azure",
"gcp",
"s3"
],
"type": "string"
},
"createTableIfNotExists": {
"default": false,
"description": "Attempt to create the table first if no existing one is detected, before writing data with the strategy defined in the `statementType` parameter.",
"type": "boolean",
"x-versionadded": "v2.25"
},
"credentialId": {
"description": "Either the populated value of the field or [redacted] due to permission settings",
"oneOf": [
{
"description": "The ID of the credential holding information about a user with write access to the Snowflake data source.",
"type": [
"string",
"null"
]
},
{
"enum": [
"[redacted]"
],
"type": "string"
}
]
},
"dataStoreId": {
"description": "Either the populated value of the field or [redacted] due to permission settings",
"oneOf": [
{
"description": "ID of the data store to connect to",
"type": "string"
},
{
"enum": [
"[redacted]"
],
"type": "string"
}
]
},
"externalStage": {
"description": "External storage",
"type": "string"
},
"schema": {
"description": "The name of the specified database schema to write results to.",
"type": "string"
},
"statementType": {
"description": "The statement type to use when writing the results.",
"enum": [
"insert",
"create_table",
"createTable"
],
"type": "string"
},
"table": {
"description": "The name of the specified database table to write results to.",
"type": "string"
},
"type": {
"description": "Type name for this output type",
"enum": [
"snowflake"
],
"type": "string"
}
},
"required": [
"dataStoreId",
"externalStage",
"statementType",
"table",
"type"
],
"type": "object"
},
{
"description": "Save CSV data chunks to Azure Synapse in bulk",
"properties": {
"cloudStorageCredentialId": {
"description": "Either the populated value of the field or [redacted] due to permission settings",
"oneOf": [
{
"description": "The ID of the credential holding information about a user with write access to the cloud storage.",
"type": [
"string",
"null"
]
},
{
"enum": [
"[redacted]"
],
"type": "string"
}
]
},
"createTableIfNotExists": {
"default": false,
"description": "Attempt to create the table first if no existing one is detected, before writing data with the strategy defined in the `statementType` parameter.",
"type": "boolean",
"x-versionadded": "v2.25"
},
"credentialId": {
"description": "Either the populated value of the field or [redacted] due to permission settings",
"oneOf": [
{
"description": "The ID of the credential holding information about a user with write access to the JDBC data source.",
"type": [
"string",
"null"
]
},
{
"enum": [
"[redacted]"
],
"type": "string"
}
]
},
"dataStoreId": {
"description": "Either the populated value of the field or [redacted] due to permission settings",
"oneOf": [
{
"description": "ID of the data store to connect to",
"type": "string"
},
{
"enum": [
"[redacted]"
],
"type": "string"
}
]
},
"externalDataSource": {
"description": "External data source name",
"type": "string"
},
"schema": {
"description": "The name of the specified database schema to write results to.",
"type": "string"
},
"statementType": {
"description": "The statement type to use when writing the results.",
"enum": [
"insert",
"create_table",
"createTable"
],
"type": "string"
},
"table": {
"description": "The name of the specified database table to write results to.",
"type": "string"
},
"type": {
"description": "Type name for this output type",
"enum": [
"synapse"
],
"type": "string"
}
},
"required": [
"dataStoreId",
"externalDataSource",
"statementType",
"table",
"type"
],
"type": "object"
},
{
"properties": {
"path": {
"description": "Path to results on host filesystem",
"type": "string"
},
"type": {
"description": "Type name for this output type",
"enum": [
"filesystem"
],
"type": "string"
}
},
"required": [
"path",
"type"
],
"type": "object"
},
{
"description": "Save CSV data chunks to HTTP data endpoint",
"properties": {
"headers": {
"description": "Extra headers to send with the request",
"type": "object"
},
"method": {
"description": "Method to use when saving the CSV file",
"enum": [
"POST",
"PUT"
],
"type": "string"
},
"type": {
"description": "Type name for this output type",
"enum": [
"http"
],
"type": "string"
},
"url": {
"description": "URL for the CSV file",
"format": "url",
"type": "string"
}
},
"required": [
"method",
"type",
"url"
],
"type": "object"
},
{
"description": "Save CSV data chunks via JDBC",
"properties": {
"catalog": {
"description": "The name of the specified database catalog to write output data to.",
"type": "string",
"x-versionadded": "v2.22"
},
"commitInterval": {
"default": 600,
"description": "Defines a time interval in seconds between each commit is done to the JDBC source. If set to 0, the batch prediction operation will write the entire job before committing.",
"maximum": 86400,
"minimum": 0,
"type": "integer",
"x-versionadded": "v2.21"
},
"createTableIfNotExists": {
"default": false,
"description": "Attempt to create the table first if no existing one is detected, before writing data with the strategy defined in the `statementType` parameter.",
"type": "boolean",
"x-versionadded": "v2.24"
},
"credentialId": {
"description": "Either the populated value of the field or [redacted] due to permission settings",
"oneOf": [
{
"description": "The ID of the credential holding information about a user with write access to the JDBC data source.",
"type": [
"string",
"null"
]
},
{
"enum": [
"[redacted]"
],
"type": "string"
}
]
},
"dataStoreId": {
"description": "Either the populated value of the field or [redacted] due to permission settings",
"oneOf": [
{
"description": "ID of the data store to connect to",
"type": "string"
},
{
"enum": [
"[redacted]"
],
"type": "string"
}
]
},
"schema": {
"description": "The name of the specified database schema to write the results to.",
"type": "string"
},
"statementType": {
"description": "The statement type to use when writing the results. Deprecation Warning: Use of `create_table` is now discouraged. Use one of the other possibilities along with the parameter `createTableIfNotExists` set to `true`.",
"enum": [
"createTable",
"create_table",
"insert",
"insertUpdate",
"insert_update",
"update"
],
"type": "string"
},
"table": {
"description": "The name of the specified database table to write the results to.If this job is executed with a job definition, then template variables are available which will be substituted for timestamps: {{ current_run_timestamp }}, {{ last_completed_run_time }}, {{ last_scheduled_run_time }}, {{ next_scheduled_run_time }}, {{ current_run_time }}",
"type": "string"
},
"type": {
"description": "Type name for this intake type",
"enum": [
"jdbc"
],
"type": "string"
},
"updateColumns": {
"description": "The column names to be updated if statementType is set to either update or upsert.",
"items": {
"type": "string"
},
"maxItems": 100,
"type": "array"
},
"whereColumns": {
"description": "The column names to be used in the where clause if statementType is set to update or upsert.",
"items": {
"type": "string"
},
"maxItems": 100,
"type": "array"
}
},
"required": [
"dataStoreId",
"statementType",
"table",
"type"
],
"type": "object"
},
{
"description": "Save CSV data chunks to local file storage",
"properties": {
"type": {
"description": "Type name for this output type",
"enum": [
"local_file",
"localFile"
],
"type": "string"
}
},
"required": [
"type"
],
"type": "object"
},
{
"description": "Saves CSV data chunks to Datasphere using browser-datasphere",
"properties": {
"catalog": {
"description": "The name of the specified database catalog to read input data from.",
"type": "string"
},
"credentialId": {
"description": "Either the populated value of the field or [redacted] due to permission settings",
"oneOf": [
{
"description": "The ID of the credential holding information about a user with write access to the data destination.",
"type": "string"
},
{
"enum": [
"[redacted]"
],
"type": "string"
}
]
},
"dataStoreId": {
"description": "Either the populated value of the field or [redacted] due to permission settings",
"oneOf": [
{
"description": "The ID of the data store to connect to",
"type": "string"
},
{
"enum": [
"[redacted]"
],
"type": "string"
}
]
},
"schema": {
"description": "The name of the specified database schema to read input data from.",
"type": "string"
},
"table": {
"description": "The name of the specified database table to read input data from.",
"type": "string"
},
"type": {
"description": "The type name for this output type",
"enum": [
"datasphere"
],
"type": "string"
}
},
"required": [
"credentialId",
"dataStoreId",
"schema",
"table",
"type"
],
"type": "object",
"x-versionadded": "v2.35"
}
]
},
"passthroughColumns": {
"description": "Pass through columns from the original dataset",
"items": {
"description": "A column name from the original dataset to pass through to the resulting predictions",
"maxLength": 50,
"minLength": 1,
"type": "string"
},
"maxItems": 100,
"type": "array"
},
"passthroughColumnsSet": {
"description": "Pass through all columns from the original dataset",
"enum": [
"all"
],
"type": "string"
},
"pinnedModelId": {
"description": "Specify a model ID used for scoring",
"type": "string"
},
"predictionInstance": {
"description": "Override the default prediction instance from the deployment when scoring this job.",
"properties": {
"apiKey": {
"description": "By default, prediction requests will use the API key of the user that created the job. This allows you to make requests on behalf of other users.",
"type": "string"
},
"datarobotKey": {
"description": "If running a job against a prediction instance in the Managed AI Cloud, you must provide the organization level DataRobot-Key.",
"type": "string"
},
"hostName": {
"description": "Override the default host name of the deployment with this.",
"type": "string"
},
"sslEnabled": {
"default": true,
"description": "Use SSL (HTTPS) when communicating with the overriden prediction server.",
"type": "boolean"
}
},
"required": [
"hostName",
"sslEnabled"
],
"type": "object"
},
"predictionThreshold": {
"description": "Threshold is the point that sets the class boundary for a predicted value. The model classifies an observation below the threshold as FALSE, and an observation above the threshold as TRUE. In other words, DataRobot automatically assigns the positive class label to any prediction exceeding the threshold. This value can be set between 0.0 and 1.0.",
"maximum": 1,
"minimum": 0,
"type": "number"
},
"predictionWarningEnabled": {
"description": "Enable prediction warnings.",
"type": [
"boolean",
"null"
]
},
"redactedFields": {
"description": "A list of qualified field names from intake- and/or outputSettings that was redacted due to permissions and sharing settings. For example: intakeSettings.dataStoreId",
"items": {
"description": "Field names that are potentially redacted",
"type": "string"
},
"type": "array",
"x-versionadded": "v2.30"
},
"secondaryDatasetsConfigId": {
"description": "Configuration id for secondary datasets to use when making a prediction.",
"type": "string",
"x-versionadded": "v2.33"
},
"skipDriftTracking": {
"default": false,
"description": "Skip drift tracking for this job.",
"type": "boolean"
},
"thresholdHigh": {
"description": "Compute explanations for predictions above this threshold",
"type": "number"
},
"thresholdLow": {
"description": "Compute explanations for predictions below this threshold",
"type": "number"
},
"timeseriesSettings": {
"description": "Time Series settings included of this job is a Time Series job.",
"oneOf": [
{
"properties": {
"forecastPoint": {
"description": "Used for forecast predictions in order to override the inferred forecast point from the dataset.",
"format": "date-time",
"type": "string"
},
"relaxKnownInAdvanceFeaturesCheck": {
"default": false,
"description": "If activated, missing values in the known in advance features are allowed in the forecast window at prediction time. If omitted or false, missing values are not allowed.",
"type": "boolean"
},
"type": {
"description": "Forecast mode makes predictions using forecastPoint or rows in the dataset without target.",
"enum": [
"forecast"
],
"type": "string"
}
},
"required": [
"type"
],
"type": "object"
},
{
"properties": {
"predictionsEndDate": {
"description": "Used for historical predictions in order to override date to which predictions should be calculated. By default value will be inferred automatically from the dataset.",
"format": "date-time",
"type": "string"
},
"predictionsStartDate": {
"description": "Used for historical predictions in order to override date from which predictions should be calculated. By default value will be inferred automatically from the dataset.",
"format": "date-time",
"type": "string"
},
"relaxKnownInAdvanceFeaturesCheck": {
"default": false,
"description": "If activated, missing values in the known in advance features are allowed in the forecast window at prediction time. If omitted or false, missing values are not allowed.",
"type": "boolean"
},
"type": {
"description": "Historical mode enables bulk predictions which calculates predictions for all possible forecast points and forecast distances in the dataset within the predictionsStartDate/predictionsEndDate range.",
"enum": [
"historical"
],
"type": "string"
}
},
"required": [
"type"
],
"type": "object"
},
{
"properties": {
"relaxKnownInAdvanceFeaturesCheck": {
"default": false,
"description": "If activated, missing values in the known in advance features are allowed in the forecast window at prediction time. If omitted or false, missing values are not allowed.",
"type": "boolean"
},
"type": {
"description": "Forecast mode used for making predictions on subsets of training data.",
"enum": [
"training"
],
"type": "string"
}
},
"required": [
"type"
],
"type": "object"
}
],
"x-versionadded": "v2.20"
}
},
"required": [
"abortOnError",
"csvSettings",
"disableRowLevelErrorHandling",
"includePredictionStatus",
"includeProbabilities",
"includeProbabilitiesClasses",
"intakeSettings",
"maxExplanations",
"redactedFields",
"skipDriftTracking"
],
"type": "object"
},
"links": {
"description": "Links useful for this job",
"properties": {
"csvUpload": {
"description": "The URL used to upload the dataset for this job. Only available for localFile intake.",
"format": "url",
"type": "string"
},
"download": {
"description": "The URL used to download the results from this job. Only available for localFile outputs. Will be null if the download is not yet available.",
"type": [
"string",
"null"
]
},
"self": {
"description": "The URL used access this job.",
"format": "url",
"type": "string"
}
},
"required": [
"self"
],
"type": "object"
},
"logs": {
"description": "The job log.",
"items": {
"description": "A log line from the job log.",
"type": "string"
},
"type": "array"
},
"monitoringBatchId": {
"description": "Id of the monitoring batch created by this job. Only present if the job runs on a deployment with batch monitoring enabled.",
"type": [
"string",
"null"
],
"x-versionadded": "v2.33"
},
"percentageCompleted": {
"description": "Indicates job progress which is based on number of already processed rows in dataset",
"maximum": 100,
"minimum": 0,
"type": "number"
},
"queuePosition": {
"description": "To ensure a dedicated prediction instance is not overloaded, only one job will be run against it at a time. This is the number of jobs that are awaiting processing before this job start running. May not be available in all environments.",
"minimum": 0,
"type": [
"integer",
"null"
],
"x-versionadded": "v2.21"
},
"queued": {
"description": "The job has been put on the queue for execution.",
"type": "boolean",
"x-versionadded": "v2.26"
},
"resultsDeleted": {
"description": "Indicates if the job was subject to garbage collection and had its artifacts deleted (output files, if any, and scoring data on local storage)",
"type": "boolean",
"x-versionadded": "v2.24"
},
"scoredRows": {
"description": "Number of rows that have been used in prediction computation",
"minimum": 0,
"type": "integer"
},
"skippedRows": {
"description": "Number of rows that have been skipped during scoring. May contain non-zero value only in time-series predictions case if provided dataset contains more than required historical rows.",
"minimum": 0,
"type": "integer",
"x-versionadded": "v2.20"
},
"source": {
"description": "Source from which batch job was started",
"type": "string",
"x-versionadded": "v2.24"
},
"status": {
"description": "The current job status",
"enum": [
"INITIALIZING",
"RUNNING",
"COMPLETED",
"ABORTED",
"FAILED"
],
"type": "string"
},
"statusDetails": {
"description": "Explanation for current status",
"type": "string"
}
},
"required": [
"created",
"createdBy",
"elapsedTimeSec",
"failedRows",
"id",
"jobIntakeSize",
"jobOutputSize",
"jobSpec",
"links",
"logs",
"monitoringBatchId",
"percentageCompleted",
"queued",
"scoredRows",
"skippedRows",
"status",
"statusDetails"
],
"type": "object"
}
Responses
Status |
Meaning |
Description |
Schema |
200 |
OK |
Job updated |
BatchPredictionJobResponse |
404 |
Not Found |
Job does not exist or was not submitted to the queue. |
None |
409 |
Conflict |
Job cannot be hidden for some reason. Possible reasons: job is not in a deletable state. |
None |
To perform this operation, you must be authenticated by means of one of the following methods:
BearerAuth
PUT /api/v2/batchPredictions/{predictionJobId}/csvUpload/
Stream CSV data to the prediction job. Only available for jobs thatuses the localFile intake option.
Parameters
Name |
In |
Type |
Required |
Description |
predictionJobId |
path |
string |
true |
ID of the Batch Prediction job |
partNumber |
path |
integer |
true |
The number of which csv part is being uploaded when using multipart upload |
Responses
Status |
Meaning |
Description |
Schema |
202 |
Accepted |
Job data was successfully submitted |
None |
404 |
Not Found |
Job does not exist or does not require data |
None |
406 |
Not Acceptable |
Not acceptable MIME type |
None |
409 |
Conflict |
Dataset upload has already begun |
None |
422 |
Unprocessable Entity |
Job was "ABORTED" due to too many errors in the data |
None |
To perform this operation, you must be authenticated by means of one of the following methods:
BearerAuth
POST /api/v2/batchPredictions/{predictionJobId}/csvUpload/finalizeMultipart/
Finalize a multipart upload, indicating that no further chunks will be sent
Parameters
Name |
In |
Type |
Required |
Description |
predictionJobId |
path |
string |
true |
ID of the Batch Prediction job |
partNumber |
path |
integer |
true |
The number of which csv part is being uploaded when using multipart upload |
Responses
Status |
Meaning |
Description |
Schema |
202 |
Accepted |
Acknowledgement that the request was accepted or an error message |
None |
404 |
Not Found |
Job was deleted, never existed or you do not have access to it |
None |
409 |
Conflict |
Only multipart jobs can be finalized. |
None |
422 |
Unprocessable Entity |
No data was uploaded |
None |
To perform this operation, you must be authenticated by means of one of the following methods:
BearerAuth
PUT /api/v2/batchPredictions/{predictionJobId}/csvUpload/part/{partNumber}/
Stream CSV data to the prediction job in many parts.Only available for jobs that uses the localFile intake option.
Parameters
Name |
In |
Type |
Required |
Description |
predictionJobId |
path |
string |
true |
ID of the Batch Prediction job |
partNumber |
path |
integer |
true |
The number of which csv part is being uploaded when using multipart upload |
Responses
Status |
Meaning |
Description |
Schema |
202 |
Accepted |
Job data was successfully submitted |
None |
404 |
Not Found |
Job does not exist or does not require data |
None |
406 |
Not Acceptable |
Not acceptable MIME type |
None |
409 |
Conflict |
Dataset upload has already begun |
None |
422 |
Unprocessable Entity |
Job was "ABORTED" due to too many errors in the data |
None |
To perform this operation, you must be authenticated by means of one of the following methods:
BearerAuth
GET /api/v2/batchPredictions/{predictionJobId}/download/
This is only valid for jobs scored using the "localFile" output option
Parameters
Name |
In |
Type |
Required |
Description |
predictionJobId |
path |
string |
true |
ID of the Batch Prediction job |
partNumber |
path |
integer |
true |
The number of which csv part is being uploaded when using multipart upload |
Responses
Status |
Meaning |
Description |
Schema |
200 |
OK |
Job was downloaded correctly |
None |
404 |
Not Found |
Job does not exist or is not completed |
None |
406 |
Not Acceptable |
Not acceptable MIME type |
None |
422 |
Unprocessable Entity |
Job was "ABORTED" due to too many errors in the data |
None |
Status |
Header |
Type |
Format |
Description |
200 |
Content-Disposition |
string |
|
Contains an auto generated filename for this download ("attachment;filename=result-.csv"). |
200 |
Content-Type |
string |
|
MIME type of the returned data |
To perform this operation, you must be authenticated by means of one of the following methods:
BearerAuth
DELETE /api/v2/projects/{projectId}/models/{modelId}/predictionExplanationsInitialization/
Delete an existing PredictionExplanationsInitialization.
Parameters
Name |
In |
Type |
Required |
Description |
projectId |
path |
string |
true |
The project ID |
modelId |
path |
string |
true |
The model ID |
Responses
Status |
Meaning |
Description |
Schema |
204 |
No Content |
The deletion was successful. |
None |
To perform this operation, you must be authenticated by means of one of the following methods:
BearerAuth
GET /api/v2/projects/{projectId}/models/{modelId}/predictionExplanationsInitialization/
Retrieve the current PredictionExplanationsInitialization.
A PredictionExplanationsInitialization is a pre-requisite for successfully computing prediction explanations using a particular model, and can be used to preview the prediction explanations that would be generated for a complete dataset.
Body parameter
{
"properties": {
"modelId": {
"description": "The model ID.",
"type": "string"
},
"predictionExplanationsSample": {
"description": "Each is a PredictionExplanationsRow. They represent a small sample of prediction explanations that could be generated for a particular dataset. They will have the same schema as the `data` array in the response from [GET /api/v2/projects/{projectId}/predictionExplanations/{predictionExplanationsId}/][get-apiv2projectsprojectidpredictionexplanationspredictionexplanationsid]. As of v2.21 only difference is that there is no forecastPoint in response for time series projects.",
"items": {
"properties": {
"adjustedPrediction": {
"description": "The exposure-adjusted output of the model for this row.",
"type": "number",
"x-versionadded": "v2.8"
},
"adjustedPredictionValues": {
"description": "The exposure-adjusted output of the model for this row.",
"items": {
"properties": {
"label": {
"description": "Describes what this model output corresponds to. For regression projects, it is the name of the target feature. For classification projects, it is a level from the target feature. For Anomaly Detection models it is an Anomaly Score.",
"type": "string"
},
"value": {
"description": "The output of the prediction. For regression projects, it is the predicted value of the target. For classification projects, it is the predicted probability the row belongs to the class identified by the label.",
"type": "number"
}
},
"required": [
"label",
"value"
],
"type": "object"
},
"type": "array",
"x-versionadded": "v2.8"
},
"forecastDistance": {
"description": "Forecast distance for the row. For time series projects only.",
"type": "integer",
"x-versionadded": "v2.21"
},
"forecastPoint": {
"description": "Forecast point for the row. For time series projects only.",
"type": "string",
"x-versionadded": "v2.21"
},
"prediction": {
"description": "The output of the model for this row.",
"type": "number"
},
"predictionExplanations": {
"description": "A list of prediction explanations.",
"items": {
"properties": {
"feature": {
"description": "The name of the feature contributing to the prediction.",
"type": "string"
},
"featureValue": {
"description": "The value the feature took on for this row. For image features, this value is the URL of the input image (New in v2.21).",
"type": "string"
},
"imageExplanationUrl": {
"description": "For image features, the URL of the image containing the input image overlaid by the activation heatmap. For non-image features, this field is null.",
"type": [
"string",
"null"
],
"x-versionadded": "v2.21"
},
"label": {
"description": "Describes what this model output corresponds to. For regression projects, it is the name of the target feature. For classification projects, it is a level from the target feature. For Anomaly Detection models it is an Anomaly Score.",
"type": "string"
},
"perNgramTextExplanations": {
"description": "For text features, an array of JSON object containing the per ngram based text prediction explanations.",
"items": {
"properties": {
"isUnknown": {
"description": "Whether the ngram is identifiable by the blueprint or not.",
"type": "boolean",
"x-versionadded": "v2.28"
},
"ngrams": {
"description": "List of JSON objects with the ngram starting index, ngram ending index and unknown ngram information.",
"items": {
"properties": {
"label": {
"description": "Describes what this model output corresponds to. For regression projects, it is the name of the target feature. For classification projects, it is a level from the target feature. For Anomaly Detection models it is an Anomaly Score.",
"type": "string"
},
"value": {
"description": "The output of the prediction. For regression projects, it is the predicted value of the target. For classification projects, it is the predicted probability the row belongs to the class identified by the label.",
"type": "number"
}
},
"required": [
"label",
"value"
],
"type": "object"
},
"maxItems": 1000,
"type": "array",
"x-versionadded": "v2.28"
},
"qualitativateStrength": {
"description": "A human-readable description of how strongly these ngrams's affected the prediction(e.g. '+++', '--', '+', '<+', '<-').",
"type": "string",
"x-versionadded": "v2.28"
},
"strength": {
"description": "The amount these ngrams's affected the prediction.",
"type": "number",
"x-versionadded": "v2.28"
}
},
"required": [
"isUnknown",
"ngrams",
"qualitativateStrength",
"strength"
],
"type": "object"
},
"maxItems": 10000,
"type": "array",
"x-versionadded": "v2.28"
},
"qualitativateStrength": {
"description": "A human-readable description of how strongly the feature affected the prediction. A large positive effect is denoted '+++', medium '++', small '+', very small '<+'. A large negative effect is denoted '---', medium '--', small '-', very small '<-'.",
"type": "string"
},
"strength": {
"description": "The amount this feature's value affected the prediction.",
"type": "number"
}
},
"required": [
"feature",
"featureValue",
"imageExplanationUrl",
"label",
"qualitativateStrength",
"strength"
],
"type": "object"
},
"type": "array"
},
"predictionThreshold": {
"description": "The threshold value used for classification prediction.",
"type": [
"number",
"null"
]
},
"predictionValues": {
"description": "A list of prediction values.",
"items": {
"properties": {
"label": {
"description": "Describes what this model output corresponds to. For regression projects, it is the name of the target feature. For classification projects, it is a level from the target feature. For Anomaly Detection models it is an Anomaly Score.",
"type": "string"
},
"value": {
"description": "The output of the prediction. For regression projects, it is the predicted value of the target. For classification projects, it is the predicted probability the row belongs to the class identified by the label.",
"type": "number"
}
},
"required": [
"label",
"value"
],
"type": "object"
},
"type": "array"
},
"rowId": {
"description": "Which row this PredictionExplanationsRow describes.",
"type": "integer"
},
"seriesId": {
"description": "The ID of the series value for the row in a multiseries project. For a single series project this will be null. For time series projects only.",
"type": [
"string",
"null"
],
"x-versionadded": "v2.21"
},
"timestamp": {
"description": "Timestamp for the row. For time series projects only.",
"type": "string",
"x-versionadded": "v2.21"
}
},
"required": [
"adjustedPrediction",
"adjustedPredictionValues",
"forecastDistance",
"forecastPoint",
"prediction",
"predictionExplanations",
"predictionThreshold",
"predictionValues",
"rowId",
"seriesId",
"timestamp"
],
"type": "object"
},
"type": "array"
},
"projectId": {
"description": "The project ID.",
"type": "string"
}
},
"required": [
"modelId",
"predictionExplanationsSample",
"projectId"
],
"type": "object"
}
Parameters
Name |
In |
Type |
Required |
Description |
excludeAdjustedPredictions |
query |
string |
false |
Whether to include adjusted prediction in the PredictionExplanationsSample response. |
projectId |
path |
string |
true |
The project ID |
modelId |
path |
string |
true |
The model ID |
body |
body |
PredictionExplanationsInitializationRetrieve |
false |
none |
Enumerated Values
Parameter |
Value |
excludeAdjustedPredictions |
[false , False , true , True ] |
Responses
Status |
Meaning |
Description |
Schema |
200 |
OK |
none |
None |
To perform this operation, you must be authenticated by means of one of the following methods:
BearerAuth
POST /api/v2/projects/{projectId}/models/{modelId}/predictionExplanationsInitialization/
Create a new prediction explanations initialization. This is a necessary prerequisite for generating prediction explanations.
Body parameter
{
"properties": {
"maxExplanations": {
"default": 3,
"description": "The maximum number of prediction explanations to supply per row of the dataset.",
"maximum": 10,
"minimum": 1,
"type": "integer"
},
"thresholdHigh": {
"default": null,
"description": "The high threshold, above which a prediction must score in order for prediction explanations to be computed. If neither thresholdHigh nor thresholdLow is specified, prediction explanations will be computed for all rows.",
"type": [
"number",
"null"
]
},
"thresholdLow": {
"default": null,
"description": "The lower threshold, below which a prediction must score in order for prediction explanations to be computed for a row in the dataset. If neither thresholdHigh nor thresholdLow is specified, prediction explanations will be computed for all rows.",
"type": [
"number",
"null"
]
}
},
"type": "object"
}
Parameters
Responses
Status |
Meaning |
Description |
Schema |
202 |
Accepted |
The request was accepted and will be worked on. |
None |
Status |
Header |
Type |
Format |
Description |
202 |
Location |
string |
|
A url that can be polled to check the status. |
To perform this operation, you must be authenticated by means of one of the following methods:
BearerAuth
GET /api/v2/projects/{projectId}/predictJobs/
List all prediction jobs for a project
Parameters
Name |
In |
Type |
Required |
Description |
status |
query |
string |
false |
If provided, only jobs with the same status will be included in the results; otherwise, queued and inprogress jobs (but not errored jobs) will be returned. |
projectId |
path |
string |
true |
The project ID. |
Enumerated Values
Parameter |
Value |
status |
[queue , inprogress , error ] |
Example responses
200 Response
{
"items": {
"properties": {
"id": {
"description": "the job ID of the job",
"type": "string"
},
"isBlocked": {
"description": "True if a job is waiting for its dependencies to be resolved first.",
"type": "boolean"
},
"message": {
"description": "An optional message about the job",
"type": "string"
},
"modelId": {
"description": "The ID of the model",
"type": "string"
},
"projectId": {
"description": "the project the job belongs to",
"type": "string"
},
"status": {
"description": "the status of the job",
"enum": [
"queue",
"inprogress",
"error",
"ABORTED",
"COMPLETED"
],
"type": "string"
}
},
"required": [
"id",
"isBlocked",
"message",
"modelId",
"projectId",
"status"
],
"type": "object"
},
"type": "array"
}
Responses
Status |
Meaning |
Description |
Schema |
200 |
OK |
A list of prediction jobs for a project |
Inline |
404 |
Not Found |
Job was not found |
None |
Response Schema
Status Code 200
Name |
Type |
Required |
Restrictions |
Description |
anonymous |
[PredictJobDetailsResponse] |
false |
|
none |
» id |
string |
true |
|
the job ID of the job |
» isBlocked |
boolean |
true |
|
True if a job is waiting for its dependencies to be resolved first. |
» message |
string |
true |
|
An optional message about the job |
» modelId |
string |
true |
|
The ID of the model |
» projectId |
string |
true |
|
the project the job belongs to |
» status |
string |
true |
|
the status of the job |
Enumerated Values
Property |
Value |
status |
[queue , inprogress , error , ABORTED , COMPLETED ] |
To perform this operation, you must be authenticated by means of one of the following methods:
BearerAuth
DELETE /api/v2/projects/{projectId}/predictJobs/{jobId}/
Cancel a queued prediction job
Parameters
Name |
In |
Type |
Required |
Description |
projectId |
path |
string |
true |
The project ID. |
jobId |
path |
string |
true |
The job ID |
Responses
Status |
Meaning |
Description |
Schema |
204 |
No Content |
The job has been successfully cancelled |
None |
404 |
Not Found |
Job was not found or the job has already completed |
None |
To perform this operation, you must be authenticated by means of one of the following methods:
BearerAuth
GET /api/v2/projects/{projectId}/predictJobs/{jobId}/
Look up a particular prediction job
Parameters
Name |
In |
Type |
Required |
Description |
projectId |
path |
string |
true |
The project ID. |
jobId |
path |
string |
true |
The job ID |
Example responses
200 Response
{
"properties": {
"id": {
"description": "the job ID of the job",
"type": "string"
},
"isBlocked": {
"description": "True if a job is waiting for its dependencies to be resolved first.",
"type": "boolean"
},
"message": {
"description": "An optional message about the job",
"type": "string"
},
"modelId": {
"description": "The ID of the model",
"type": "string"
},
"projectId": {
"description": "the project the job belongs to",
"type": "string"
},
"status": {
"description": "the status of the job",
"enum": [
"queue",
"inprogress",
"error",
"ABORTED",
"COMPLETED"
],
"type": "string"
}
},
"required": [
"id",
"isBlocked",
"message",
"modelId",
"projectId",
"status"
],
"type": "object"
}
Responses
Status |
Meaning |
Description |
Schema |
200 |
OK |
The job has been successfully retrieved and has not yet finished. |
PredictJobDetailsResponse |
303 |
See Other |
The job has been successfully retrieved and has been completed. See Location header. The response json is also included. |
None |
To perform this operation, you must be authenticated by means of one of the following methods:
BearerAuth
GET /api/v2/projects/{projectId}/predictionDatasets/
List predictions datasets uploaded to a project.
Parameters
Name |
In |
Type |
Required |
Description |
offset |
query |
integer |
true |
This many results will be skipped. |
limit |
query |
integer |
true |
At most this many results are returned. If 0, all results. |
projectId |
path |
string |
true |
The project ID to query. |
Example responses
200 Response
{
"properties": {
"count": {
"description": "The number of items returned on this page.",
"minimum": 0,
"type": "integer"
},
"data": {
"description": "Each has the same schema as if retrieving the dataset individually from [GET /api/v2/projects/{projectId}/predictionDatasets/{datasetId}/][get-apiv2projectsprojectidpredictiondatasetsdatasetid]",
"items": {
"properties": {
"actualValueColumn": {
"description": "Optional, only available for unsupervised projects, in case dataset was uploaded with actual value column specified. Name of the column which will be used to calculate the classification metrics and insights.",
"type": [
"string",
"null"
],
"x-versionadded": "v2.21"
},
"catalogId": {
"description": "The ID of the AI catalog entry used to create the prediction, dataset or None if not created from the AI catalog.",
"type": [
"string",
"null"
],
"x-versionadded": "v2.21"
},
"catalogVersionId": {
"description": "The ID of the AI catalog version used to create the prediction dataset, or None if not created from the AI catalog.",
"type": [
"string",
"null"
],
"x-versionadded": "v2.21"
},
"containsTargetValues": {
"description": "If True, dataset contains target values and can be used to calculate the classification metrics and insights. Only applies for supervised projects.",
"type": [
"boolean",
"null"
],
"x-versionadded": "v2.21"
},
"created": {
"description": "The date string of when the dataset was created, of the format`YYYY-mm-ddTHH:MM:SS.ssssssZ`, like ``2016-06-09T11:32:34.170338Z``.",
"format": "date-time",
"type": "string"
},
"dataEndDate": {
"description": "Only available for time series projects, a date string representing the maximum primary date of the prediction dataset.",
"format": "date-time",
"type": "string",
"x-versionadded": "v2.20"
},
"dataQualityWarnings": {
"description": "A Json object of available warnings about potential problems in this prediction dataset. Empty if no warnings.",
"properties": {
"hasKiaMissingValuesInForecastWindow": {
"description": "If true, known-in-advance features in this dataset have missing values in the forecast window. Absence of the known-in-advance values can negatively impact prediction quality. Only applies for time series projects.",
"type": "boolean",
"x-versionadded": "v2.15"
},
"insufficientRowsForEvaluatingModels": {
"description": "If true, the dataset has a target column present indicating it can be used to evaluate model performance but too few rows to be trustworthy in so doing. If false, either it has no target column at all or it has sufficient rows for model evaluation. Only applies for regression, binary classification, multiclass classification projects and time series unsupervised projects.",
"type": "boolean",
"x-versionadded": "v2.19"
},
"singleClassActualValueColumn": {
"description": "If true, actual value column has only one class and such insights as ROC curve can not be calculated. Only applies for binary classification projects or unsupervised projects.",
"type": "boolean",
"x-versionadded": "v2.21"
}
},
"type": "object"
},
"dataStartDate": {
"description": "Only available for time series projects, a date string representing the minimum primary date of the prediction dataset.",
"format": "date-time",
"type": "string",
"x-versionadded": "v2.20"
},
"detectedActualValueColumns": {
"description": "Only available for unsupervised projects, a list of detected `actualValueColumnInfo` objects which can be used to calculate the classification metrics and insights.",
"items": {
"properties": {
"missingCount": {
"description": "Count of the missing values in the column.",
"type": "integer",
"x-versionadded": "v2.21"
},
"name": {
"description": "Name of the column.",
"type": "string",
"x-versionadded": "v2.21"
}
},
"required": [
"missingCount",
"name"
],
"type": "object"
},
"type": "array",
"x-versionadded": "v2.21"
},
"forecastPoint": {
"description": "The date string of the forecastPoint of this prediction dataset. Only non-null for time series projects.",
"type": [
"string",
"null"
],
"x-versionadded": "v2.8"
},
"forecastPointRange": {
"description": "Only available for time series projects, the start and end of the range of dates available for use as the forecast point, detected based on the uploaded prediction dataset.",
"items": {
"description": "Date string of a forecast point.",
"format": "date-time",
"type": "string"
},
"type": "array",
"x-versionadded": "v2.20"
},
"id": {
"description": "The ID of this dataset.",
"type": "string"
},
"maxForecastDate": {
"description": "Only available for time series projects, a date string representing the maximum forecast date of this prediction dataset.",
"format": "date-time",
"type": "string",
"x-versionadded": "v2.20"
},
"name": {
"description": "The name of the dataset when it was uploaded.",
"type": "string"
},
"numColumns": {
"description": "The number of columns in this dataset.",
"type": "integer"
},
"numRows": {
"description": "The number of rows in this dataset.",
"type": "integer"
},
"predictionsEndDate": {
"description": "The date string of the prediction end date of this prediction dataset. Used for bulk predictions. Note that this parameter is for generating historical predictions using the training data. Only non-null for time series projects.",
"format": "date-time",
"type": [
"string",
"null"
]
},
"predictionsStartDate": {
"description": "The date string of the prediction start date of this prediction dataset. Used for bulk predictions. Note that this parameter is for generating historical predictions using the training data. Only non-null for time series projects.",
"format": "date-time",
"type": [
"string",
"null"
]
},
"projectId": {
"description": "The project ID that owns this dataset.",
"type": "string"
},
"secondaryDatasetsConfigId": {
"description": "Only available for Feature discovery projects. Id of the secondary dataset config used by the dataset for the prediction.",
"type": "string",
"x-versionadded": "v2.21"
}
},
"required": [
"catalogId",
"catalogVersionId",
"created",
"dataQualityWarnings",
"forecastPoint",
"id",
"name",
"numColumns",
"numRows",
"predictionsEndDate",
"predictionsStartDate",
"projectId"
],
"type": "object"
},
"type": "array"
},
"next": {
"description": "A URL pointing to the next page (if `null`, there is no next page).",
"type": [
"string",
"null"
]
},
"previous": {
"description": "A URL pointing to the previous page (if `null`, there is no previous page).",
"type": [
"string",
"null"
]
}
},
"required": [
"count",
"data",
"next",
"previous"
],
"type": "object"
}
Responses
To perform this operation, you must be authenticated by means of one of the following methods:
BearerAuth
POST /api/v2/projects/{projectId}/predictionDatasets/dataSourceUploads/
Upload a dataset for predictions from a DataSource
.
Body parameter
{
"properties": {
"actualValueColumn": {
"description": "The actual value column name, valid for the prediction files if the project is unsupervised and the dataset is considered as bulk predictions dataset.",
"type": "string",
"x-versionadded": "v2.21"
},
"credentialData": {
"description": "The credentials to authenticate with the database, to use instead of user/password or credential ID.",
"oneOf": [
{
"properties": {
"credentialType": {
"description": "The type of these credentials, 'basic' here.",
"enum": [
"basic"
],
"type": "string"
},
"password": {
"description": "The password for database authentication. The password is encrypted at rest and never saved / stored.",
"type": "string"
},
"user": {
"description": "The username for database authentication.",
"type": "string"
}
},
"required": [
"credentialType",
"password",
"user"
],
"type": "object"
},
{
"properties": {
"awsAccessKeyId": {
"description": "The S3 AWS access key ID. Required if configId is not specified.Cannot include this parameter if configId is specified.",
"type": "string"
},
"awsSecretAccessKey": {
"description": "The S3 AWS secret access key. Required if configId is not specified.Cannot include this parameter if configId is specified.",
"type": "string"
},
"awsSessionToken": {
"default": null,
"description": "The S3 AWS session token for AWS temporary credentials.Cannot include this parameter if configId is specified.",
"type": [
"string",
"null"
]
},
"configId": {
"description": "ID of Secure configurations of credentials shared by admin.If specified, cannot include awsAccessKeyId, awsSecretAccessKey or awsSessionToken",
"type": "string"
},
"credentialType": {
"description": "The type of these credentials, 's3' here.",
"enum": [
"s3"
],
"type": "string"
}
},
"required": [
"credentialType"
],
"type": "object"
},
{
"properties": {
"credentialType": {
"description": "The type of these credentials, 'oauth' here.",
"enum": [
"oauth"
],
"type": "string"
},
"oauthAccessToken": {
"default": null,
"description": "The oauth access token.",
"type": [
"string",
"null"
]
},
"oauthClientId": {
"default": null,
"description": "The oauth client ID.",
"type": [
"string",
"null"
]
},
"oauthClientSecret": {
"default": null,
"description": "The oauth client secret.",
"type": [
"string",
"null"
]
},
"oauthRefreshToken": {
"description": "The oauth refresh token.",
"type": "string"
}
},
"required": [
"credentialType",
"oauthRefreshToken"
],
"type": "object"
}
],
"x-versionadded": "v2.23"
},
"credentialId": {
"description": "The credential ID to use for database authentication.",
"type": "string",
"x-versionadded": "v2.19"
},
"credentials": {
"description": "A list of credentials for the secondary datasets used in feature discovery project.",
"items": {
"oneOf": [
{
"properties": {
"catalogVersionId": {
"description": "The ID of the latest version of the catalog entry.",
"type": "string"
},
"password": {
"description": "The password (in cleartext) for database authentication. The password will be encrypted on the server side in scope of HTTP request and never saved or stored.",
"type": "string"
},
"url": {
"description": "The link to retrieve more detailed information about the entity that uses this catalog dataset.",
"type": "string"
},
"user": {
"description": "The username for database authentication.",
"type": "string"
}
},
"required": [
"password",
"user"
],
"type": "object"
},
{
"properties": {
"catalogVersionId": {
"description": "The ID of the latest version of the catalog entry.",
"type": "string"
},
"credentialId": {
"description": "The ID of the set of credentials to use instead of user and password. Note that with this change, username and password will become optional.",
"type": "string"
},
"url": {
"description": "The link to retrieve more detailed information about the entity that uses this catalog dataset.",
"type": "string"
}
},
"required": [
"credentialId"
],
"type": "object"
}
]
},
"maxItems": 30,
"type": "array",
"x-versionadded": "v2.19"
},
"dataSourceId": {
"description": "The ID of ``DataSource``.",
"type": "string"
},
"forecastPoint": {
"description": "For time series projects only. The time in the dataset relative to which predictions are generated. This value is optional. If not specified the default value is the value in the row with the latest specified timestamp. Specifying this value for a project that is not a time series project will result in an error.",
"format": "date-time",
"type": "string"
},
"password": {
"description": "The password (in cleartext) for database authentication. The password will be encrypted on the server side in scope of HTTP request and never saved or stored. DEPRECATED: please use ``credentialId`` or ``credentialData`` instead.",
"type": "string",
"x-versiondeprecated": "v2.23"
},
"predictionsEndDate": {
"description": "The end date for bulk predictions, exclusive. Used for time series projects only. Note that this parameter is used for generating historical predictions using the training data, not for future predictions. If not specified, the dataset is not considered as a bulk predictions dataset. This parameter should be provided in conjunction with a ``predictionsStartDate``, and cannot be provided with the ``forecastPoint`` parameter.",
"format": "date-time",
"type": "string"
},
"predictionsStartDate": {
"description": "The start date for bulk predictions. Used for time series projects only. Note that this parameter is used for generating historical predictions using the training data, not for future predictions. If not specified, the dataset is not considered as a bulk predictions dataset. This parameter should be provided in conjunction with a ``predictionsEndDate``, and cannot be provided with the ``forecastPoint`` parameter.",
"format": "date-time",
"type": "string"
},
"relaxKnownInAdvanceFeaturesCheck": {
"description": "For time series projects only. If true, missing values in the known in advance features are allowed in the forecast window at the prediction time. This value is optional. If omitted or false, missing values are not allowed.",
"type": "boolean",
"x-versionadded": "v2.15"
},
"secondaryDatasetsConfigId": {
"description": "For feature discovery projects only. The ID of the alternative secondary dataset config to use during prediction.",
"type": "string",
"x-versionadded": "v2.19"
},
"useKerberos": {
"default": false,
"description": "If true, use kerberos authentication for database authentication. Default is false.",
"type": "boolean",
"x-versionadded": "v2.19"
},
"user": {
"description": "The username for database authentication. DEPRECATED: please use ``credentialId`` or ``credentialData`` instead.",
"type": "string",
"x-versiondeprecated": "v2.23"
}
},
"required": [
"dataSourceId"
],
"type": "object"
}
Parameters
Name |
In |
Type |
Required |
Description |
projectId |
path |
string |
true |
The project ID to which the data source will be uploaded to. |
body |
body |
PredictionDataSource |
false |
none |
Responses
Status |
Meaning |
Description |
Schema |
202 |
Accepted |
Upload successfully started. See the Location header. |
None |
Status |
Header |
Type |
Format |
Description |
202 |
Location |
string |
|
A url that can be polled to check the status. |
To perform this operation, you must be authenticated by means of one of the following methods:
BearerAuth
POST /api/v2/projects/{projectId}/predictionDatasets/datasetUploads/
Create a prediction dataset from a Dataset Asset referenced by AI Catalog item/version ID.
Body parameter
{
"properties": {
"actualValueColumn": {
"description": "Actual value column name, valid for the prediction files if the project is unsupervised and the dataset is considered as bulk predictions dataset.",
"type": "string",
"x-versionadded": "v2.21"
},
"credentialData": {
"description": "The credentials to authenticate with the database, to be used instead of credential ID.",
"oneOf": [
{
"properties": {
"credentialType": {
"description": "The type of these credentials, 'basic' here.",
"enum": [
"basic"
],
"type": "string"
},
"password": {
"description": "The password for database authentication. The password is encrypted at rest and never saved / stored.",
"type": "string"
},
"user": {
"description": "The username for database authentication.",
"type": "string"
}
},
"required": [
"credentialType",
"password",
"user"
],
"type": "object"
},
{
"properties": {
"awsAccessKeyId": {
"description": "The S3 AWS access key ID. Required if configId is not specified.Cannot include this parameter if configId is specified.",
"type": "string"
},
"awsSecretAccessKey": {
"description": "The S3 AWS secret access key. Required if configId is not specified.Cannot include this parameter if configId is specified.",
"type": "string"
},
"awsSessionToken": {
"default": null,
"description": "The S3 AWS session token for AWS temporary credentials.Cannot include this parameter if configId is specified.",
"type": [
"string",
"null"
]
},
"configId": {
"description": "ID of Secure configurations of credentials shared by admin.If specified, cannot include awsAccessKeyId, awsSecretAccessKey or awsSessionToken",
"type": "string"
},
"credentialType": {
"description": "The type of these credentials, 's3' here.",
"enum": [
"s3"
],
"type": "string"
}
},
"required": [
"credentialType"
],
"type": "object"
},
{
"properties": {
"credentialType": {
"description": "The type of these credentials, 'oauth' here.",
"enum": [
"oauth"
],
"type": "string"
},
"oauthAccessToken": {
"default": null,
"description": "The oauth access token.",
"type": [
"string",
"null"
]
},
"oauthClientId": {
"default": null,
"description": "The oauth client ID.",
"type": [
"string",
"null"
]
},
"oauthClientSecret": {
"default": null,
"description": "The oauth client secret.",
"type": [
"string",
"null"
]
},
"oauthRefreshToken": {
"description": "The oauth refresh token.",
"type": "string"
}
},
"required": [
"credentialType",
"oauthRefreshToken"
],
"type": "object"
},
{
"properties": {
"configId": {
"description": "The ID of the saved shared credentials. If specified, cannot include user, privateKeyStr or passphrase.",
"type": "string"
},
"credentialType": {
"description": "The type of these credentials, 'snowflake_key_pair_user_account' here.",
"enum": [
"snowflake_key_pair_user_account"
],
"type": "string"
},
"passphrase": {
"description": "Optional passphrase to decrypt private key. Cannot include this parameter if configId is specified.",
"type": "string"
},
"privateKeyStr": {
"description": "Private key for key pair authentication. Required if configId is not specified. Cannot include this parameter if configId is specified.",
"type": "string"
},
"user": {
"description": "Username for this credential. Required if configId is not specified. Cannot include this parameter if configId is specified.",
"type": "string"
}
},
"required": [
"credentialType"
],
"type": "object"
},
{
"properties": {
"configId": {
"description": "ID of Secure configurations shared by admin.Alternative to googleConfigId (deprecated). If specified, cannot include gcpKey.",
"type": "string"
},
"credentialType": {
"description": "The type of these credentials, 'gcp' here.",
"enum": [
"gcp"
],
"type": "string"
},
"gcpKey": {
"description": "The Google Cloud Platform (GCP) key. Output is the downloaded JSON resulting from creating a service account *User Managed Key* (in the *IAM & admin > Service accounts section* of GCP).Required if googleConfigId/configId is not specified.Cannot include this parameter if googleConfigId/configId is specified.",
"properties": {
"authProviderX509CertUrl": {
"description": "Auth provider X509 certificate URL.",
"format": "uri",
"type": "string"
},
"authUri": {
"description": "Auth URI.",
"format": "uri",
"type": "string"
},
"clientEmail": {
"description": "Client email address.",
"type": "string"
},
"clientId": {
"description": "Client ID.",
"type": "string"
},
"clientX509CertUrl": {
"description": "Client X509 certificate URL.",
"format": "uri",
"type": "string"
},
"privateKey": {
"description": "Private key.",
"type": "string"
},
"privateKeyId": {
"description": "Private key ID",
"type": "string"
},
"projectId": {
"description": "Project ID.",
"type": "string"
},
"tokenUri": {
"description": "Token URI.",
"format": "uri",
"type": "string"
},
"type": {
"description": "GCP account type.",
"enum": [
"service_account"
],
"type": "string"
}
},
"required": [
"type"
],
"type": "object"
},
"googleConfigId": {
"description": "ID of Secure configurations shared by admin. This is deprecated.Please use configId instead. If specified, cannot include gcpKey.",
"type": "string"
}
},
"required": [
"credentialType"
],
"type": "object"
},
{
"properties": {
"credentialType": {
"description": "The type of these credentials, 'databricks_access_token_account' here.",
"enum": [
"databricks_access_token_account"
],
"type": "string"
},
"databricksAccessToken": {
"description": "Databricks personal access token.",
"minLength": 1,
"type": "string"
}
},
"required": [
"credentialType",
"databricksAccessToken"
],
"type": "object"
},
{
"properties": {
"azureTenantId": {
"description": "Tenant ID of the Azure AD service principal.",
"type": "string"
},
"clientId": {
"description": "Client ID of the Azure AD service principal.",
"type": "string"
},
"clientSecret": {
"description": "Client Secret of the Azure AD service principal.",
"type": "string"
},
"configId": {
"description": "ID of secure configurations of credentials shared by admin.",
"type": "string",
"x-versionadded": "v2.35"
},
"credentialType": {
"description": "The type of these credentials, 'azure_service_principal' here.",
"enum": [
"azure_service_principal"
],
"type": "string"
}
},
"required": [
"credentialType"
],
"type": "object"
}
],
"x-versionadded": "v2.23"
},
"credentialId": {
"description": "The ID of the set of credentials to authenticate with the database.",
"type": "string",
"x-versionadded": "v2.19"
},
"credentials": {
"description": "List of credentials for the secondary datasets used in feature discovery project.",
"items": {
"oneOf": [
{
"properties": {
"catalogVersionId": {
"description": "The ID of the latest version of the catalog entry.",
"type": "string"
},
"password": {
"description": "The password (in cleartext) for database authentication. The password will be encrypted on the server side in scope of HTTP request and never saved or stored.",
"type": "string"
},
"url": {
"description": "The link to retrieve more detailed information about the entity that uses this catalog dataset.",
"type": "string"
},
"user": {
"description": "The username for database authentication.",
"type": "string"
}
},
"required": [
"password",
"user"
],
"type": "object"
},
{
"properties": {
"catalogVersionId": {
"description": "The ID of the latest version of the catalog entry.",
"type": "string"
},
"credentialId": {
"description": "The ID of the set of credentials to use instead of user and password. Note that with this change, username and password will become optional.",
"type": "string"
},
"url": {
"description": "The link to retrieve more detailed information about the entity that uses this catalog dataset.",
"type": "string"
}
},
"required": [
"credentialId"
],
"type": "object"
}
]
},
"maxItems": 30,
"type": "array",
"x-versionadded": "v2.19"
},
"datasetId": {
"description": "The ID of the dataset entry to use for prediction dataset.",
"type": "string"
},
"datasetVersionId": {
"description": "The ID of the dataset version to use for the prediction dataset. If not specified - uses latest version associated with datasetId.",
"type": "string"
},
"forecastPoint": {
"description": "For time series projects only. The time in the dataset relative to which predictions are generated. This value is optional. If not specified the default value is the value in the row with the latest specified timestamp. Specifying this value for a project that is not a time series project will result in an error.",
"format": "date-time",
"type": "string",
"x-versionadded": "v2.8"
},
"password": {
"description": "The password (in cleartext) for database authentication. The password will be encrypted on the server side in scope of HTTP request and never saved or stored.DEPRECATED: please use credentialId or credentialData instead.",
"type": "string",
"x-versiondeprecated": "v2.23"
},
"predictionsEndDate": {
"description": "The end date for bulk predictions, exclusive. Used for time series projects only. Note that this parameter is used for generating historical predictions using the training data, not for future predictions. If not specified, the dataset is not considered as a bulk predictions dataset. This parameter should be provided in conjunction with a ``predictionsStartDate``, and cannot be provided with the ``forecastPoint`` parameter.",
"format": "date-time",
"type": "string"
},
"predictionsStartDate": {
"description": "The start date for bulk predictions. Used for time series projects only. Note that this parameter is used for generating historical predictions using the training data, not for future predictions. If not specified, the dataset is not considered as a bulk predictions dataset. This parameter should be provided in conjunction with a ``predictionsEndDate``, and cannot be provided with the ``forecastPoint`` parameter.",
"format": "date-time",
"type": "string"
},
"relaxKnownInAdvanceFeaturesCheck": {
"description": "For time series projects only. If True, missing values in the known in advance features are allowed in the forecast window at the prediction time. If omitted or False, missing values are not allowed.",
"type": "boolean"
},
"secondaryDatasetsConfigId": {
"description": "For feature discovery projects only. The Id of the alternative secondary dataset config to use during prediction.",
"type": "string",
"x-versionadded": "v2.19"
},
"useKerberos": {
"default": false,
"description": "If true, use kerberos authentication for database authentication. Default is false.",
"type": "boolean",
"x-versionadded": "v2.19"
},
"user": {
"description": "The username for database authentication. DEPRECATED: please use credentialId or credentialData instead.",
"type": "string",
"x-versiondeprecated": "v2.23"
}
},
"required": [
"datasetId"
],
"type": "object"
}
Parameters
Example responses
202 Response
{
"properties": {
"datasetId": {
"description": "The ID of the newly created prediction dataset.",
"type": "string"
}
},
"required": [
"datasetId"
],
"type": "object"
}
Responses
Status |
Header |
Type |
Format |
Description |
202 |
Location |
string |
|
A url that can be polled to check the status. |
To perform this operation, you must be authenticated by means of one of the following methods:
BearerAuth
POST /api/v2/projects/{projectId}/predictionDatasets/fileUploads/
Upload a file for predictions from an attached file.
Body parameter
{
"properties": {
"actualValueColumn": {
"description": "Actual value column name, valid for the prediction files if the project is unsupervised and the dataset is considered as bulk predictions dataset. ",
"type": "string",
"x-versionadded": "v2.21"
},
"credentials": {
"description": "A list of credentials for the secondary datasets used in feature discovery project",
"type": "string",
"x-versionadded": "v2.19"
},
"file": {
"description": "The dataset file to upload for prediction.",
"format": "binary",
"type": "string"
},
"forecastPoint": {
"description": "For time series projects only. The time in the dataset relative to which predictions are generated. If not specified the default value is the value in the row with the latest specified timestamp. Specifying this value for a project that is not a time series project will result in an error.",
"format": "date-time",
"type": "string",
"x-versionadded": "v2.8"
},
"predictionsEndDate": {
"description": "Used for time series projects only. The end date for bulk predictions. Note that this parameter is used for generating historical predictions using the training data, not for future predictions. If not specified, the dataset is not considered as a bulk predictions dataset. This parameter should be provided in conjunction with a ``predictionsStartDate``, and cannot be provided with the ``forecastPoint`` parameter.",
"format": "date-time",
"type": "string"
},
"predictionsStartDate": {
"description": "Used for time series projects only. The start date for bulk predictions. Note that this parameter is used for generating historical predictions using the training data, not for future predictions. If not specified, the dataset is not considered as a bulk predictions dataset. This parameter should be provided in conjunction with a ``predictionsEndDate``, and cannot be provided with the ``forecastPoint`` parameter.",
"format": "date-time",
"type": "string"
},
"relaxKnownInAdvanceFeaturesCheck": {
"description": "A boolean flag. If true, missing values in the known in advance features are allowed in the forecast window at the prediction time. If omitted or false, missing values are not allowed. For time series projects only.",
"enum": [
"false",
"False",
"true",
"True"
],
"type": "string",
"x-versionadded": "v2.15"
},
"secondaryDatasetsConfigId": {
"description": "Optional, for feature discovery projects only. The Id of the alternative secondary dataset config to use during prediction.",
"type": "string",
"x-versionadded": "v2.19"
}
},
"required": [
"file"
],
"type": "object"
}
Parameters
Name |
In |
Type |
Required |
Description |
projectId |
path |
string |
true |
The project ID to which the data will be uploaded for prediction. |
body |
body |
PredictionFileUpload |
false |
none |
Responses
Status |
Meaning |
Description |
Schema |
202 |
Accepted |
Upload successfully started. See the Location header. |
None |
Status |
Header |
Type |
Format |
Description |
202 |
Location |
string |
|
A url that can be polled to check the status. |
To perform this operation, you must be authenticated by means of one of the following methods:
BearerAuth
POST /api/v2/projects/{projectId}/predictionDatasets/urlUploads/
Upload a file for predictions from a URL.
Body parameter
{
"properties": {
"actualValueColumn": {
"description": "Actual value column name, valid for the prediction files if the project is unsupervised and the dataset is considered as bulk predictions dataset. This value is optional.",
"type": "string",
"x-versionadded": "v2.21"
},
"credentials": {
"description": "A list of credentials for the secondary datasets used in feature discovery project",
"items": {
"oneOf": [
{
"properties": {
"catalogVersionId": {
"description": "The ID of the latest version of the catalog entry.",
"type": "string"
},
"password": {
"description": "The password (in cleartext) for database authentication. The password will be encrypted on the server side in scope of HTTP request and never saved or stored.",
"type": "string"
},
"url": {
"description": "The link to retrieve more detailed information about the entity that uses this catalog dataset.",
"type": "string"
},
"user": {
"description": "The username for database authentication.",
"type": "string"
}
},
"required": [
"password",
"user"
],
"type": "object"
},
{
"properties": {
"catalogVersionId": {
"description": "The ID of the latest version of the catalog entry.",
"type": "string"
},
"credentialId": {
"description": "The ID of the set of credentials to use instead of user and password. Note that with this change, username and password will become optional.",
"type": "string"
},
"url": {
"description": "The link to retrieve more detailed information about the entity that uses this catalog dataset.",
"type": "string"
}
},
"required": [
"credentialId"
],
"type": "object"
}
]
},
"maxItems": 30,
"type": "array",
"x-versionadded": "v2.19"
},
"forecastPoint": {
"description": "For time series projects only. The time in the dataset relative to which predictions are generated. If not specified the default value is the value in the row with the latest specified timestamp. Specifying this value for a project that is not a time series project will result in an error.",
"format": "date-time",
"type": "string",
"x-versionadded": "v2.8"
},
"predictionsEndDate": {
"description": "Used for time series projects only. The end date for bulk predictions, exclusive. Note that this parameter is used for generating historical predictions using the training data, not for future predictions. If not specified, the dataset is not considered as a bulk predictions dataset. This parameter should be provided in conjunction with a ``predictionsStartDate``, and cannot be provided with the ``forecastPoint`` parameter.",
"format": "date-time",
"type": "string"
},
"predictionsStartDate": {
"description": "Used for time series projects only. The start date for bulk predictions. Note that this parameter is used for generating historical predictions using the training data, not for future predictions. If not specified, the dataset is not considered as a bulk predictions dataset. This parameter should be provided in conjunction with a ``predictionsEndDate``, and cannot be provided with the ``forecastPoint`` parameter.",
"format": "date-time",
"type": "string"
},
"relaxKnownInAdvanceFeaturesCheck": {
"description": "For time series projects only. If true, missing values in the known in advance features are allowed in the forecast window at the prediction time. This value is optional. If omitted or false, missing values are not allowed.",
"type": "boolean",
"x-versionadded": "v2.15"
},
"secondaryDatasetsConfigId": {
"description": "For feature discovery projects only. The ID of the alternative secondary dataset config to use during prediction.",
"type": "string",
"x-versionadded": "v2.19"
},
"url": {
"description": "The URL to download the dataset from.",
"format": "url",
"type": "string"
}
},
"required": [
"url"
],
"type": "object"
}
Parameters
Name |
In |
Type |
Required |
Description |
projectId |
path |
string |
true |
The project ID to which the data will be uploaded for prediction. |
body |
body |
PredictionURLUpload |
false |
none |
Responses
Status |
Meaning |
Description |
Schema |
202 |
Accepted |
Upload successfully started. See the Location header. |
None |
Status |
Header |
Type |
Format |
Description |
202 |
Location |
string |
|
A url that can be polled to check the status. |
To perform this operation, you must be authenticated by means of one of the following methods:
BearerAuth
DELETE /api/v2/projects/{projectId}/predictionDatasets/{datasetId}/
Delete a dataset that was uploaded for prediction.
Parameters
Name |
In |
Type |
Required |
Description |
projectId |
path |
string |
true |
The project ID that owns the data. |
datasetId |
path |
string |
true |
The dataset ID to delete. |
Responses
Status |
Meaning |
Description |
Schema |
204 |
No Content |
The dataset has been successfully deleted. |
None |
404 |
Not Found |
No dataset with the specified datasetId found. |
None |
To perform this operation, you must be authenticated by means of one of the following methods:
BearerAuth
GET /api/v2/projects/{projectId}/predictionDatasets/{datasetId}/
Get the metadata of a specific dataset. This only works for datasets uploaded to an existing project for prediction.
Parameters
Name |
In |
Type |
Required |
Description |
projectId |
path |
string |
true |
The project ID that owns the data. |
datasetId |
path |
string |
true |
The dataset ID to query for. |
Example responses
200 Response
{
"properties": {
"actualValueColumn": {
"description": "Optional, only available for unsupervised projects, in case dataset was uploaded with actual value column specified. Name of the column which will be used to calculate the classification metrics and insights.",
"type": [
"string",
"null"
],
"x-versionadded": "v2.21"
},
"catalogId": {
"description": "The ID of the AI catalog entry used to create the prediction, dataset or None if not created from the AI catalog.",
"type": [
"string",
"null"
],
"x-versionadded": "v2.21"
},
"catalogVersionId": {
"description": "The ID of the AI catalog version used to create the prediction dataset, or None if not created from the AI catalog.",
"type": [
"string",
"null"
],
"x-versionadded": "v2.21"
},
"containsTargetValues": {
"description": "If True, dataset contains target values and can be used to calculate the classification metrics and insights. Only applies for supervised projects.",
"type": [
"boolean",
"null"
],
"x-versionadded": "v2.21"
},
"created": {
"description": "The date string of when the dataset was created, of the format`YYYY-mm-ddTHH:MM:SS.ssssssZ`, like ``2016-06-09T11:32:34.170338Z``.",
"format": "date-time",
"type": "string"
},
"dataEndDate": {
"description": "Only available for time series projects, a date string representing the maximum primary date of the prediction dataset.",
"format": "date-time",
"type": "string",
"x-versionadded": "v2.20"
},
"dataQualityWarnings": {
"description": "A Json object of available warnings about potential problems in this prediction dataset. Empty if no warnings.",
"properties": {
"hasKiaMissingValuesInForecastWindow": {
"description": "If true, known-in-advance features in this dataset have missing values in the forecast window. Absence of the known-in-advance values can negatively impact prediction quality. Only applies for time series projects.",
"type": "boolean",
"x-versionadded": "v2.15"
},
"insufficientRowsForEvaluatingModels": {
"description": "If true, the dataset has a target column present indicating it can be used to evaluate model performance but too few rows to be trustworthy in so doing. If false, either it has no target column at all or it has sufficient rows for model evaluation. Only applies for regression, binary classification, multiclass classification projects and time series unsupervised projects.",
"type": "boolean",
"x-versionadded": "v2.19"
},
"singleClassActualValueColumn": {
"description": "If true, actual value column has only one class and such insights as ROC curve can not be calculated. Only applies for binary classification projects or unsupervised projects.",
"type": "boolean",
"x-versionadded": "v2.21"
}
},
"type": "object"
},
"dataStartDate": {
"description": "Only available for time series projects, a date string representing the minimum primary date of the prediction dataset.",
"format": "date-time",
"type": "string",
"x-versionadded": "v2.20"
},
"detectedActualValueColumns": {
"description": "Only available for unsupervised projects, a list of detected `actualValueColumnInfo` objects which can be used to calculate the classification metrics and insights.",
"items": {
"properties": {
"missingCount": {
"description": "Count of the missing values in the column.",
"type": "integer",
"x-versionadded": "v2.21"
},
"name": {
"description": "Name of the column.",
"type": "string",
"x-versionadded": "v2.21"
}
},
"required": [
"missingCount",
"name"
],
"type": "object"
},
"type": "array",
"x-versionadded": "v2.21"
},
"forecastPoint": {
"description": "The date string of the forecastPoint of this prediction dataset. Only non-null for time series projects.",
"type": [
"string",
"null"
],
"x-versionadded": "v2.8"
},
"forecastPointRange": {
"description": "Only available for time series projects, the start and end of the range of dates available for use as the forecast point, detected based on the uploaded prediction dataset.",
"items": {
"description": "Date string of a forecast point.",
"format": "date-time",
"type": "string"
},
"type": "array",
"x-versionadded": "v2.20"
},
"id": {
"description": "The ID of this dataset.",
"type": "string"
},
"maxForecastDate": {
"description": "Only available for time series projects, a date string representing the maximum forecast date of this prediction dataset.",
"format": "date-time",
"type": "string",
"x-versionadded": "v2.20"
},
"name": {
"description": "The name of the dataset when it was uploaded.",
"type": "string"
},
"numColumns": {
"description": "The number of columns in this dataset.",
"type": "integer"
},
"numRows": {
"description": "The number of rows in this dataset.",
"type": "integer"
},
"predictionsEndDate": {
"description": "The date string of the prediction end date of this prediction dataset. Used for bulk predictions. Note that this parameter is for generating historical predictions using the training data. Only non-null for time series projects.",
"format": "date-time",
"type": [
"string",
"null"
]
},
"predictionsStartDate": {
"description": "The date string of the prediction start date of this prediction dataset. Used for bulk predictions. Note that this parameter is for generating historical predictions using the training data. Only non-null for time series projects.",
"format": "date-time",
"type": [
"string",
"null"
]
},
"projectId": {
"description": "The project ID that owns this dataset.",
"type": "string"
},
"secondaryDatasetsConfigId": {
"description": "Only available for Feature discovery projects. Id of the secondary dataset config used by the dataset for the prediction.",
"type": "string",
"x-versionadded": "v2.21"
}
},
"required": [
"catalogId",
"catalogVersionId",
"created",
"dataQualityWarnings",
"forecastPoint",
"id",
"name",
"numColumns",
"numRows",
"predictionsEndDate",
"predictionsStartDate",
"projectId"
],
"type": "object"
}
Responses
To perform this operation, you must be authenticated by means of one of the following methods:
BearerAuth
POST /api/v2/projects/{projectId}/predictionExplanations/
Create a new PredictionExplanations object (and its accompanying PredictionExplanationsRecord).
In order to successfully create PredictionExplanations for a particular model and dataset, you must first
- Compute feature impact for the model via POST /api/v2/projects/{projectId}/models/{modelId}/featureImpact/
- Compute a PredictionExplanationsInitialization for the model via POST /api/v2/projects/{projectId}/models/{modelId}/predictionExplanationsInitialization/
- Compute predictions for the model and dataset via POST /api/v2/projects/{projectId}/predictions/
thresholdHigh
and thresholdLow
are optional filters applied to speed up computation. When at least one is specified, only the selected outlier rows will have prediction explanations computed. Rows are considered to be outliers if their predicted value (in case of regression projects) or probability of being the positive class (in case of classification projects) isless than thresholdLow
or greater than thresholdHigh
. If neither is specified, prediction explanations will be computed for all rows.
Body parameter
{
"properties": {
"classNames": {
"description": "List of class names that will be explained for each row for multiclass. Mutually exclusive with numTopClasses. If neither specified - we assume numTopClasses=1.",
"items": {
"type": "string"
},
"maxItems": 10,
"type": "array",
"x-versionadded": "v2.29"
},
"datasetId": {
"description": "The dataset ID.",
"type": "string"
},
"maxExplanations": {
"default": 3,
"description": "The maximum number of prediction explanations to supply per row of the dataset.",
"maximum": 10,
"minimum": 0,
"type": "integer"
},
"modelId": {
"description": "The model ID.",
"type": "string"
},
"numTopClasses": {
"description": "Number of top predicted classes for each row that will be explained for multiclass. Mutually exclusive with classNames. If neither specified - we assume numTopClasses=1.",
"maximum": 10,
"minimum": 1,
"type": "integer",
"x-versionadded": "v2.29"
},
"thresholdHigh": {
"default": null,
"description": "The high threshold, above which a prediction must score in order for prediction explanations to be computed. If neither thresholdHigh nor thresholdLow is specified, prediction explanations will be computed for all rows.",
"type": [
"number",
"null"
]
},
"thresholdLow": {
"default": null,
"description": "The lower threshold, below which a prediction must score in order for prediction explanations to be computed for a row in the dataset. If neither thresholdHigh nor thresholdLow is specified, prediction explanations will be computed for all rows.",
"type": [
"number",
"null"
]
}
},
"required": [
"datasetId",
"modelId"
],
"type": "object"
}
Parameters
Responses
Status |
Meaning |
Description |
Schema |
202 |
Accepted |
The request was accepted and will be worked on. |
None |
Status |
Header |
Type |
Format |
Description |
202 |
Location |
string |
|
A url that can be polled to check the status. |
To perform this operation, you must be authenticated by means of one of the following methods:
BearerAuth
GET /api/v2/projects/{projectId}/predictionExplanations/{predictionExplanationsId}/
Retrieve stored Prediction Explanations.
Each PredictionExplanationsRow retrieved corresponds to a row of the prediction dataset, although some rows may not have had prediction explanations computed depending on the thresholds selected.
Parameters
Name |
In |
Type |
Required |
Description |
offset |
query |
integer |
false |
This many results will be skipped. |
limit |
query |
integer |
true |
At most this many results are returned. The default may change and a new maximum limit may be imposed without notice. |
excludeAdjustedPredictions |
query |
string |
false |
Whether to include adjusted prediction in PredictionExplanationsRow response. |
projectId |
path |
string |
true |
The project ID |
predictionExplanationsId |
path |
string |
true |
The ID of the PredictionExplanationsRecord to retrieve. |
Enumerated Values
Parameter |
Value |
excludeAdjustedPredictions |
[false , False , true , True ] |
Example responses
200 Response
{
"properties": {
"adjustmentMethod": {
"description": "'exposureNormalized' (for regression projects with exposure) or 'N/A' (for classification projects) The value of 'exposureNormalized' indicates that prediction outputs are adjusted (or divided) by exposure. The value of 'N/A' indicates that no adjustments are applied to the adjusted predictions and they are identical to the unadjusted predictions.",
"type": "string",
"x-versionadded": "v2.8"
},
"count": {
"description": "How many rows of prediction explanations were returned.",
"type": "integer"
},
"data": {
"description": "Each is a PredictionExplanationsRow corresponding to one row of the prediction dataset.",
"items": {
"properties": {
"adjustedPrediction": {
"description": "The exposure-adjusted output of the model for this row.",
"type": "number",
"x-versionadded": "v2.8"
},
"adjustedPredictionValues": {
"description": "The exposure-adjusted output of the model for this row.",
"items": {
"properties": {
"label": {
"description": "Describes what this model output corresponds to. For regression projects, it is the name of the target feature. For classification projects, it is a level from the target feature. For Anomaly Detection models it is an Anomaly Score.",
"type": "string"
},
"value": {
"description": "The output of the prediction. For regression projects, it is the predicted value of the target. For classification projects, it is the predicted probability the row belongs to the class identified by the label.",
"type": "number"
}
},
"required": [
"label",
"value"
],
"type": "object"
},
"type": "array",
"x-versionadded": "v2.8"
},
"forecastDistance": {
"description": "Forecast distance for the row. For time series projects only.",
"type": "integer",
"x-versionadded": "v2.21"
},
"forecastPoint": {
"description": "Forecast point for the row. For time series projects only.",
"type": "string",
"x-versionadded": "v2.21"
},
"prediction": {
"description": "The output of the model for this row.",
"type": "number"
},
"predictionExplanations": {
"description": "A list of prediction explanations.",
"items": {
"properties": {
"feature": {
"description": "The name of the feature contributing to the prediction.",
"type": "string"
},
"featureValue": {
"description": "The value the feature took on for this row. For image features, this value is the URL of the input image (New in v2.21).",
"type": "string"
},
"imageExplanationUrl": {
"description": "For image features, the URL of the image containing the input image overlaid by the activation heatmap. For non-image features, this field is null.",
"type": [
"string",
"null"
],
"x-versionadded": "v2.21"
},
"label": {
"description": "Describes what this model output corresponds to. For regression projects, it is the name of the target feature. For classification projects, it is a level from the target feature. For Anomaly Detection models it is an Anomaly Score.",
"type": "string"
},
"perNgramTextExplanations": {
"description": "For text features, an array of JSON object containing the per ngram based text prediction explanations.",
"items": {
"properties": {
"isUnknown": {
"description": "Whether the ngram is identifiable by the blueprint or not.",
"type": "boolean",
"x-versionadded": "v2.28"
},
"ngrams": {
"description": "List of JSON objects with the ngram starting index, ngram ending index and unknown ngram information.",
"items": {
"properties": {
"label": {
"description": "Describes what this model output corresponds to. For regression projects, it is the name of the target feature. For classification projects, it is a level from the target feature. For Anomaly Detection models it is an Anomaly Score.",
"type": "string"
},
"value": {
"description": "The output of the prediction. For regression projects, it is the predicted value of the target. For classification projects, it is the predicted probability the row belongs to the class identified by the label.",
"type": "number"
}
},
"required": [
"label",
"value"
],
"type": "object"
},
"maxItems": 1000,
"type": "array",
"x-versionadded": "v2.28"
},
"qualitativateStrength": {
"description": "A human-readable description of how strongly these ngrams's affected the prediction(e.g. '+++', '--', '+', '<+', '<-').",
"type": "string",
"x-versionadded": "v2.28"
},
"strength": {
"description": "The amount these ngrams's affected the prediction.",
"type": "number",
"x-versionadded": "v2.28"
}
},
"required": [
"isUnknown",
"ngrams",
"qualitativateStrength",
"strength"
],
"type": "object"
},
"maxItems": 10000,
"type": "array",
"x-versionadded": "v2.28"
},
"qualitativateStrength": {
"description": "A human-readable description of how strongly the feature affected the prediction. A large positive effect is denoted '+++', medium '++', small '+', very small '<+'. A large negative effect is denoted '---', medium '--', small '-', very small '<-'.",
"type": "string"
},
"strength": {
"description": "The amount this feature's value affected the prediction.",
"type": "number"
}
},
"required": [
"feature",
"featureValue",
"imageExplanationUrl",
"label",
"qualitativateStrength",
"strength"
],
"type": "object"
},
"type": "array"
},
"predictionThreshold": {
"description": "The threshold value used for classification prediction.",
"type": [
"number",
"null"
]
},
"predictionValues": {
"description": "A list of prediction values.",
"items": {
"properties": {
"label": {
"description": "Describes what this model output corresponds to. For regression projects, it is the name of the target feature. For classification projects, it is a level from the target feature. For Anomaly Detection models it is an Anomaly Score.",
"type": "string"
},
"value": {
"description": "The output of the prediction. For regression projects, it is the predicted value of the target. For classification projects, it is the predicted probability the row belongs to the class identified by the label.",
"type": "number"
}
},
"required": [
"label",
"value"
],
"type": "object"
},
"type": "array"
},
"rowId": {
"description": "Which row this PredictionExplanationsRow describes.",
"type": "integer"
},
"seriesId": {
"description": "The ID of the series value for the row in a multiseries project. For a single series project this will be null. For time series projects only.",
"type": [
"string",
"null"
],
"x-versionadded": "v2.21"
},
"timestamp": {
"description": "Timestamp for the row. For time series projects only.",
"type": "string",
"x-versionadded": "v2.21"
}
},
"required": [
"adjustedPrediction",
"adjustedPredictionValues",
"forecastDistance",
"forecastPoint",
"prediction",
"predictionExplanations",
"predictionThreshold",
"predictionValues",
"rowId",
"seriesId",
"timestamp"
],
"type": "object"
},
"type": "array"
},
"id": {
"description": "The ID of this group of prediction explanations.",
"type": "string"
},
"next": {
"description": "URL pointing to the next page (if null, there is no next page).",
"format": "uri",
"type": [
"string",
"null"
]
},
"predictionExplanationsRecordLocation": {
"description": "The URL of the PredictionExplanationsRecord associated with these prediction explanations.",
"type": "string"
},
"previous": {
"description": "URL pointing to the previous page (if null, there is no previous page).",
"format": "uri",
"type": [
"string",
"null"
]
}
},
"required": [
"adjustmentMethod",
"count",
"data",
"id",
"next",
"predictionExplanationsRecordLocation",
"previous"
],
"type": "object"
}
Responses
To perform this operation, you must be authenticated by means of one of the following methods:
BearerAuth
GET /api/v2/projects/{projectId}/predictionExplanationsRecords/
List PredictionExplanationsRecord objects for a project.
These contain metadata about the computed prediction explanations and the location at which the PredictionExplanations can be retrieved.
Parameters
Name |
In |
Type |
Required |
Description |
offset |
query |
integer |
false |
This many results will be skipped. |
limit |
query |
integer |
false |
At most this many results are returned. |
modelId |
query |
string |
false |
If specified, only prediction explanations records computed for this model will be returned. |
projectId |
path |
string |
true |
The project ID |
Example responses
200 Response
{
"properties": {
"count": {
"description": "The number of items returned on this page.",
"minimum": 0,
"type": "integer"
},
"data": {
"description": "Each has the same schema as if retrieving the prediction explanations individually from [GET /api/v2/projects/{projectId}/predictionExplanationsRecords/{predictionExplanationsId}/][get-apiv2projectsprojectidpredictionexplanationsrecordspredictionexplanationsid].",
"items": {
"properties": {
"datasetId": {
"description": "The dataset ID.",
"type": "string"
},
"finishTime": {
"description": "Timestamp referencing when computation for these prediction explanations finished.",
"type": "number"
},
"id": {
"description": "The PredictionExplanationsRecord ID.",
"type": "string"
},
"maxExplanations": {
"description": "The maximum number of codes generated per prediction.",
"type": "integer"
},
"modelId": {
"description": "The model ID.",
"type": "string"
},
"numColumns": {
"description": "The number of columns prediction explanations were computed for.",
"type": "integer"
},
"predictionExplanationsLocation": {
"description": "Where to retrieve the prediction explanations.",
"type": "string"
},
"predictionThreshold": {
"description": "The threshold value used for binary classification prediction.",
"type": [
"number",
"null"
]
},
"projectId": {
"description": "The project ID.",
"type": "string"
},
"thresholdHigh": {
"description": "The prediction explanation high threshold. Predictions must be above this value (or below the thresholdLow value) to have PredictionExplanations computed.",
"type": [
"number",
"null"
]
},
"thresholdLow": {
"description": "The prediction explanation low threshold. Predictions must be below this value (or above the thresholdHigh value) to have PredictionExplanations computed.",
"type": [
"number",
"null"
]
}
},
"required": [
"datasetId",
"finishTime",
"id",
"maxExplanations",
"modelId",
"numColumns",
"predictionExplanationsLocation",
"predictionThreshold",
"projectId",
"thresholdHigh",
"thresholdLow"
],
"type": "object"
},
"type": "array"
},
"next": {
"description": "A URL pointing to the next page (if `null`, there is no next page).",
"type": [
"string",
"null"
]
},
"previous": {
"description": "A URL pointing to the previous page (if `null`, there is no previous page).",
"type": [
"string",
"null"
]
}
},
"required": [
"count",
"data",
"next",
"previous"
],
"type": "object"
}
Responses
To perform this operation, you must be authenticated by means of one of the following methods:
BearerAuth
DELETE /api/v2/projects/{projectId}/predictionExplanationsRecords/{predictionExplanationsId}/
Delete saved Prediction Explanations.
Deletes both the actual prediction explanations and the corresponding PredictionExplanationsRecord.
Parameters
Name |
In |
Type |
Required |
Description |
projectId |
path |
string |
true |
The project ID |
predictionExplanationsId |
path |
string |
true |
The ID of the PredictionExplanationsRecord to retrieve. |
Responses
Status |
Meaning |
Description |
Schema |
204 |
No Content |
The object was deleted successfully. |
None |
To perform this operation, you must be authenticated by means of one of the following methods:
BearerAuth
GET /api/v2/projects/{projectId}/predictionExplanationsRecords/{predictionExplanationsId}/
Retrieve a PredictionExplanationsRecord object.
A PredictionExplanationsRecord contains metadata about the computed prediction explanations and the location at which the PredictionExplanations can be retrieved.
Body parameter
{
"properties": {
"datasetId": {
"description": "The dataset ID.",
"type": "string"
},
"finishTime": {
"description": "Timestamp referencing when computation for these prediction explanations finished.",
"type": "number"
},
"id": {
"description": "The PredictionExplanationsRecord ID.",
"type": "string"
},
"maxExplanations": {
"description": "The maximum number of codes generated per prediction.",
"type": "integer"
},
"modelId": {
"description": "The model ID.",
"type": "string"
},
"numColumns": {
"description": "The number of columns prediction explanations were computed for.",
"type": "integer"
},
"predictionExplanationsLocation": {
"description": "Where to retrieve the prediction explanations.",
"type": "string"
},
"predictionThreshold": {
"description": "The threshold value used for binary classification prediction.",
"type": [
"number",
"null"
]
},
"projectId": {
"description": "The project ID.",
"type": "string"
},
"thresholdHigh": {
"description": "The prediction explanation high threshold. Predictions must be above this value (or below the thresholdLow value) to have PredictionExplanations computed.",
"type": [
"number",
"null"
]
},
"thresholdLow": {
"description": "The prediction explanation low threshold. Predictions must be below this value (or above the thresholdHigh value) to have PredictionExplanations computed.",
"type": [
"number",
"null"
]
}
},
"required": [
"datasetId",
"finishTime",
"id",
"maxExplanations",
"modelId",
"numColumns",
"predictionExplanationsLocation",
"predictionThreshold",
"projectId",
"thresholdHigh",
"thresholdLow"
],
"type": "object"
}
Parameters
Name |
In |
Type |
Required |
Description |
projectId |
path |
string |
true |
The project ID |
predictionExplanationsId |
path |
string |
true |
The ID of the PredictionExplanationsRecord to retrieve. |
body |
body |
PredictionExplanationsRecord |
false |
none |
Responses
Status |
Meaning |
Description |
Schema |
200 |
OK |
The object was found and returned successfully. |
None |
To perform this operation, you must be authenticated by means of one of the following methods:
BearerAuth
GET /api/v2/projects/{projectId}/predictions/
Get a list of prediction records.
.. deprecated:: v2.21
Use GET /api/v2/projects/{projectId}/predictionsMetadata/ instead. The only
difference is that parameter datasetId
is renamed to predictionDatasetId
both in request and response.
Parameters
Name |
In |
Type |
Required |
Description |
offset |
query |
integer |
true |
This many results will be skipped |
limit |
query |
integer |
true |
At most this many results are returned. To specify no limit, use 0. The default may change and a maximum limit may be imposed without notice. |
datasetId |
query |
string |
false |
Dataset id used to create the predictions |
modelId |
query |
string |
false |
Model id |
projectId |
path |
string |
true |
The project of the predictions. |
Example responses
200 Response
{
"properties": {
"count": {
"description": "The number of items returned on this page.",
"type": "integer"
},
"data": {
"description": "An array of the metadata records.",
"items": {
"properties": {
"actualValueColumn": {
"description": "For time series unsupervised projects only. Actual value column can be used to calculate the classification metrics and insights.",
"type": [
"string",
"null"
],
"x-versionadded": "v2.21"
},
"datasetId": {
"description": "Deprecated alias for `predictionDatasetId`.",
"type": [
"string",
"null"
]
},
"explanationAlgorithm": {
"description": "The selected algorithm to use for prediction explanations. At present, the only acceptable value is `shap`, which selects the SHapley Additive exPlanations (SHAP) explainer. Defaults to null (no prediction explanations).",
"type": [
"string",
"null"
],
"x-versionadded": "v2.21"
},
"featureDerivationWindowCounts": {
"description": "For time series projects with partial history only. Indicates how many points were used in during feature derivation.",
"type": [
"integer",
"null"
],
"x-versionadded": "v2.24"
},
"forecastPoint": {
"description": "For time series projects only. The time in the dataset relative to which predictions were generated.",
"format": "date-time",
"type": [
"string",
"null"
],
"x-versionadded": "v2.20"
},
"id": {
"description": "The id of the prediction record.",
"type": "string"
},
"includesPredictionIntervals": {
"description": "Whether the predictions include prediction intervals.",
"type": "boolean"
},
"maxExplanations": {
"description": "The maximum number of prediction explanations values to be returned with each row in the `predictions` json array. Null indicates `no limit`. Will be present only if `explanationAlgorithm` was set.",
"type": [
"integer",
"null"
],
"x-versionadded": "v2.21"
},
"modelId": {
"description": "The model id used for predictions.",
"type": "string"
},
"predictionDatasetId": {
"description": "The dataset id where the prediction data comes from. The field is available via `/api/v2/projects/<projectId>/predictionsMetadata/` route and replaced on `datasetId`in deprecated `/api/v2/projects/<projectId>/predictions/` endpoint.",
"type": [
"string",
"null"
]
},
"predictionIntervalsSize": {
"description": "For time series projects only. If prediction intervals were computed, what percentile they represent. Will be ``None`` if ``includePredictionIntervals`` is ``False``.",
"type": [
"integer",
"null"
]
},
"predictionThreshold": {
"description": "Threshold used for binary classification in predictions.",
"type": [
"number",
"null"
],
"x-versionadded": "v2.22"
},
"predictionsEndDate": {
"description": "For time series projects only. The end date for bulk predictions, exclusive. Note that this parameter was used for generating historical predictions using the training data, not for future predictions.",
"format": "date-time",
"type": [
"string",
"null"
],
"x-versionadded": "v2.20"
},
"predictionsStartDate": {
"description": "For time series projects only. The start date for bulk predictions. Note that this parameter was used for generating historical predictions using the training data, not for future predictions.",
"format": "date-time",
"type": [
"string",
"null"
],
"x-versionadded": "v2.20"
},
"projectId": {
"description": "The project id of the predictions.",
"type": "string"
},
"shapWarnings": {
"description": "Will be present if `explanationAlgorithm` was set to `shap` and there were additivity failures during SHAP values calculation.",
"properties": {
"maxNormalizedMismatch": {
"description": "The maximal relative normalized mismatch value.",
"type": "number",
"x-versionadded": "v2.21"
},
"mismatchRowCount": {
"description": "The count of rows for which additivity check failed.",
"type": "integer",
"x-versionadded": "v2.21"
}
},
"required": [
"maxNormalizedMismatch",
"mismatchRowCount"
],
"type": "object"
},
"url": {
"description": "The url at which you can download the predictions.",
"type": "string"
}
},
"required": [
"id",
"includesPredictionIntervals",
"modelId",
"predictionIntervalsSize",
"projectId",
"url"
],
"type": "object"
},
"type": "array"
},
"next": {
"description": "URL pointing to the next page (if null, there is no next page).",
"format": "uri",
"type": [
"string",
"null"
]
},
"previous": {
"description": "URL pointing to the previous page (if null, there is no previous page).",
"format": "uri",
"type": [
"string",
"null"
]
}
},
"required": [
"count",
"data",
"next",
"previous"
],
"type": "object"
}
Responses
To perform this operation, you must be authenticated by means of one of the following methods:
BearerAuth
POST /api/v2/projects/{projectId}/predictions/
There are two ways of making predictions. The recommended way is to first upload your
dataset to the project, and then using the corresponding datasetId, predict against
that dataset. To follow that pattern, send the json request body.
Note that requesting prediction intervals will automatically trigger backtesting if
backtests were not already completed for this model.
The legacy method which is deprecated is to send the file
directly with the predictions request. If you need to predict against a file 10MB in
size or larger, you will be required to use the above workflow for uploaded datasets.
However, the following multipart/form-data can be used with small files:
:form file: a dataset to make predictions on
:form modelId: the model to use to make predictions
.. note:: If using the legacy method of uploading data to this endpoint, a new dataset
will be created behind the scenes. For performance reasons, it would be much better
to utilize the workflow of creating the dataset first and using the supported method
of making predictions of this endpoint. However, to preserve the functionality of
existing workflows, the legacy method still exists.
Body parameter
{
"properties": {
"actualValueColumn": {
"description": "For time series projects only. Actual value column name, valid for the prediction files if the project is unsupervised and the dataset is considered as bulk predictions dataset. This value is optional.",
"type": "string",
"x-versionadded": "v2.21"
},
"datasetId": {
"description": "The dataset to compute predictions for - must have previously been uploaded.",
"type": "string"
},
"explanationAlgorithm": {
"description": "If set to `shap`, the response will include prediction explanations based on the SHAP explainer (SHapley Additive exPlanations). Defaults to null (no prediction explanations).",
"enum": [
"shap"
],
"type": "string"
},
"forecastPoint": {
"description": "For time series projects only. The time in the dataset relative to which predictions are generated. This value is optional. If not specified the default value is the value in the row with the latest specified timestamp. Specifying this value for a project that is not a time series project will result in an error.",
"format": "date-time",
"type": "string",
"x-versionadded": "v2.20"
},
"includeFdwCounts": {
"default": false,
"description": "For time series projects with partial history only. Indicates if feature derivation window counts `featureDerivationWindowCounts` will be part of the response.",
"type": "boolean",
"x-versionadded": "v2.24"
},
"includePredictionIntervals": {
"description": "Specifies whether prediction intervals should be calculated for this request. Defaults to True if `predictionIntervalsSize` is specified, otherwise defaults to False.",
"type": "boolean",
"x-versionadded": "v2.16"
},
"maxExplanations": {
"description": "Specifies the maximum number of explanation values that should be returned for each row, ordered by absolute value, greatest to least. In the case of 'shap': If not set, explanations are returned for all features. If the number of features is greater than the 'maxExplanations', the sum of remaining values will also be returned as 'shapRemainingTotal'. Defaults to null for datasets narrower than 100 columns, defaults to 100 for datasets wider than 100 columns. Cannot be set if 'explanationAlgorithm' is omitted.",
"maximum": 100,
"minimum": 1,
"type": "integer",
"x-versionadded": "v2.21"
},
"modelId": {
"description": "The model to make predictions on.",
"type": "string"
},
"predictionIntervalsSize": {
"description": "Represents the percentile to use for the size of the prediction intervals. Defaults to 80 if `includePredictionIntervals` is True.",
"maximum": 100,
"minimum": 1,
"type": "integer",
"x-versionadded": "v2.16"
},
"predictionThreshold": {
"description": "Threshold used for binary classification in predictions. Accepts values from 0.0 to 1.0. If not specified, model default prediction threshold will be used.",
"maximum": 1,
"minimum": 0,
"type": "number",
"x-versionadded": "v2.22"
},
"predictionsEndDate": {
"description": "The end date for bulk predictions, exclusive. Used for time series projects only. Note that this parameter is used for generating historical predictions using the training data, not for future predictions. If not specified, the dataset is not considered as a bulk predictions dataset. This parameter should be provided in conjunction with a ``predictionsStartDate``, and cannot be provided with the ``forecastPoint`` parameter.",
"format": "date-time",
"type": "string",
"x-versionadded": "v2.20"
},
"predictionsStartDate": {
"description": "The start date for bulk predictions. Used for time series projects only. Note that this parameter is used for generating historical predictions using the training data, not for future predictions. If not specified, the dataset is not considered as a bulk predictions dataset. This parameter should be provided in conjunction with a ``predictionsEndDate``, and cannot be provided with the ``forecastPoint`` parameter.",
"format": "date-time",
"type": "string",
"x-versionadded": "v2.20"
}
},
"required": [
"datasetId",
"modelId"
],
"type": "object"
}
Parameters
Name |
In |
Type |
Required |
Description |
projectId |
path |
string |
true |
The project to make predictions within. |
Content-Type |
header |
string |
true |
Content types available for making request. multipart/form-data is the legacy deprecated method to send the small file with the prediction request. |
body |
body |
CreatePredictionFromDataset |
false |
none |
Enumerated Values
Parameter |
Value |
Content-Type |
[application/json , multipart/form-data ] |
Responses
Status |
Meaning |
Description |
Schema |
202 |
Accepted |
Prediction has successfully been requested. See Location header. |
None |
422 |
Unprocessable Entity |
The request cannot be processed. |
None |
To perform this operation, you must be authenticated by means of one of the following methods:
BearerAuth
GET /api/v2/projects/{projectId}/predictions/{predictionId}/
Retrieve predictions that have previously been computed.
Training predictions encoded either as JSON or CSV.
If CSV output was requested, the returned CSV data will contain the following columns:
- For regression projects:
row_id
and prediction
.
- For binary classification projects:
row_id
, prediction
,
class_<positive_class_label>
and class_<negative_class_label>
.
- For multiclass projects:
row_id
, prediction
and a
class_<class_label>
for each class.
- For multilabel projects:
row_id
and for each class
prediction_<class_label>
and class_<class_label>
.
- For time-series, these additional columns will be added:
forecast_point
,
forecast_distance
, timestamp
, and series_id
.
.. minversion:: v2.21
* If `explanationAlgorithm` = 'shap', these additional columns will be added:
triplets of (`Explanation_<i>_feature_name`,
`Explanation_<i>_feature_value`, and `Explanation_<i>_strength`) for `i` ranging
from 1 to `maxExplanations`, `shap_remaining_total` and `shap_base_value`. Binary
classification projects will also have `explained_class`, the class for which
positive SHAP values imply an increased probability.
Parameters
Name |
In |
Type |
Required |
Description |
predictionId |
path |
string |
true |
The id of the prediction record to retrieve. If you have the jobId, you can retrieve the predictionId using GET /api/v2/projects/{projectId}/predictJobs/{jobId}/. |
projectId |
path |
string |
true |
The id of the project the prediction belongs to. |
Accept |
header |
string |
false |
Requested MIME type for the returned data |
Enumerated Values
Parameter |
Value |
Accept |
[application/json , text/csv ] |
Example responses
200 Response
{
"properties": {
"actualValueColumn": {
"description": "For time series unsupervised projects only. Will be present only if the prediction dataset has an actual value column. The name of the column with actuals that was used to calculate the scores and insights.",
"type": [
"string",
"null"
],
"x-versionadded": "v2.21"
},
"explanationAlgorithm": {
"description": "The selected algorithm to use for prediction explanations. At present, the only acceptable value is 'shap', which selects the SHapley Additive exPlanations (SHAP) explainer. Defaults to null (no prediction explanations).",
"type": [
"string",
"null"
],
"x-versionadded": "v2.21"
},
"featureDerivationWindowCounts": {
"description": "For time series projects with partial history only. Indicates how many points were used during feature derivation in feature derivation window.",
"type": [
"integer",
"null"
],
"x-versionadded": "v2.24"
},
"includesPredictionIntervals": {
"description": "For time series projects only. Indicates if prediction intervals will be part of the response. Defaults to False.",
"type": "boolean",
"x-versionadded": "v2.16"
},
"maxExplanations": {
"description": "The maximum number of prediction explanations values to be returned with each row in the `predictions` json array. Null indicates 'no limit'. Will be present only if `explanationAlgorithm` was set.",
"type": [
"integer",
"null"
],
"x-versionadded": "v2.21"
},
"positiveClass": {
"description": "For binary classification, the class of the target deemed the positive class. For all other project types this field will be null.",
"oneOf": [
{
"type": "string"
},
{
"type": "integer"
},
{
"type": "number"
}
]
},
"predictionIntervalsSize": {
"description": "For time series projects only. Will be present only if `includePredictionIntervals` is True. Indicates the percentile used for prediction intervals calculation. Defaults to 80.",
"type": [
"integer",
"null"
],
"x-versionadded": "v2.16"
},
"predictions": {
"description": "The json array of predictions. The predictions in the response will have slightly different formats, depending on the project type.",
"items": {
"properties": {
"actualValue": {
"description": "In the case of an unsupervised time series project with a dataset using ``predictionsStartDate`` and ``predictionsEndDate`` for bulk predictions and a specified actual value column, the predictions will be a json array in the same format as with a forecast point with one additional element - `actualValues`. It is the actual value in the row.",
"type": [
"string",
"null"
],
"x-versionadded": "v2.21"
},
"forecastDistance": {
"description": "(if time series project) The number of time units this prediction is away from the forecastPoint. The unit of time is determined by the timeUnit of the datetime partition column.",
"type": [
"integer",
"null"
]
},
"forecastPoint": {
"description": "(if time series project) The forecastPoint of the predictions. Either provided or inferred.",
"format": "date-time",
"type": [
"string",
"null"
]
},
"originalFormatTimestamp": {
"description": "The timestamp of this row in the prediction dataset. Unlike the ``timestamp`` field, this field will keep the same DateTime formatting as the uploaded prediction dataset. (This column is shown if enabled by your administrator.)",
"type": "string",
"x-versionadded": "v2.17"
},
"positiveProbability": {
"description": "For binary classification, the probability the row belongs to the positive class.",
"minimum": 0,
"type": [
"number",
"null"
]
},
"prediction": {
"description": "The prediction of the model.",
"oneOf": [
{
"description": "If using a regressor model, will be the numeric value of the target.",
"type": "number"
},
{
"description": "If using a binary or muliclass classifier model, will be the predicted class.",
"type": "string"
},
{
"description": "If using a multilabel classifier model, will be a list of predicted classes.",
"items": {
"type": "string"
},
"type": "array"
}
]
},
"predictionExplanationMetadata": {
"description": "Array containing algorithm-specific values. Varies depending on the value of `explanationAlgorithm`.",
"items": {
"description": "Prediction explanation metadata.",
"properties": {
"shapRemainingTotal": {
"description": "Will be present only if `explanationAlgorithm` = 'shap' and `maxExplanations` is nonzero. The total of SHAP values for features beyond the `maxExplanations`. This can be identically 0 in all rows, if `maxExplanations` is greater than the number of features and thus all features are returned.",
"type": "integer"
}
},
"type": "object"
},
"type": "array",
"x-versionadded": "v2.21"
},
"predictionExplanations": {
"description": "Array contains `predictionExplanation` objects. The total elements in the array are bounded by maxExplanations and feature count. It will be present only if `explanationAlgorithm` is not null (prediction explanations were requested).",
"items": {
"description": "Prediction explanation result.",
"properties": {
"feature": {
"description": "The name of the feature contributing to the prediction.",
"type": "string"
},
"featureValue": {
"description": "The value the feature took on for this row. The type corresponds to the feature (bool, int, float, str, etc.).",
"oneOf": [
{
"type": "integer"
},
{
"type": "boolean"
},
{
"type": "string"
},
{
"type": "number"
}
]
},
"label": {
"description": "Describes what output was driven by this prediction explanation. For regression projects, it is the name of the target feature. For classification projects, it is the class whose probability increasing would correspond to a positive strength of this prediction explanation. For predictions made using anomaly detection models, it is the `Anomaly Score`.",
"oneOf": [
{
"type": "string"
},
{
"type": "number"
}
]
},
"strength": {
"description": "Algorithm-specific explanation value attributed to `feature` in this row. If `explanationAlgorithm` = `shap`, this is the SHAP value.",
"type": [
"number",
"null"
]
}
},
"required": [
"feature",
"featureValue",
"label"
],
"type": "object"
},
"type": "array",
"x-versionadded": "v2.21"
},
"predictionIntervalLowerBound": {
"description": "Present if ``includePredictionIntervals`` is True. Indicates a lower bound of the estimate of error based on test data.",
"type": "number",
"x-versionadded": "v2.16"
},
"predictionIntervalUpperBound": {
"description": "Present if ``includePredictionIntervals`` is True. Indicates an upper bound of the estimate of error based on test data.",
"type": "number",
"x-versionadded": "v2.16"
},
"predictionThreshold": {
"description": "Threshold used for binary classification in predictions.",
"maximum": 1,
"minimum": 0,
"type": "number"
},
"predictionValues": {
"description": "A list of predicted values for this row.",
"items": {
"description": "Predicted values",
"properties": {
"label": {
"description": "For regression problems this will be the name of the target column, 'Anomaly score' or ignored field. For classification projects this will be the name of the class.",
"oneOf": [
{
"type": "string"
},
{
"type": "number"
}
]
},
"threshold": {
"description": "Threshold used in multilabel classification for this class.",
"maximum": 1,
"minimum": 0,
"type": "number"
},
"value": {
"description": "The predicted probability of the class identified by the label.",
"type": "number"
}
},
"required": [
"label",
"value"
],
"type": "object"
},
"type": "array"
},
"rowId": {
"description": "The row in the prediction dataset this prediction corresponds to.",
"minimum": 0,
"type": "integer"
},
"segmentId": {
"description": "The ID of the segment value for a segmented project.",
"type": "string",
"x-versionadded": "v2.27"
},
"seriesId": {
"description": "The ID of the series value for a multiseries project. For time series projects that are not a multiseries this will be a NaN.",
"type": [
"string",
"null"
]
},
"target": {
"description": "In the case of a time series project with a dataset using predictionsStartDate and predictionsEndDate for bulk predictions, the predictions will be a json array in the same format as with a forecast point with one additional element - `target`. It is the target value in the row.",
"type": [
"string",
"null"
]
},
"timestamp": {
"description": "(if time series project) The timestamp of this row in the prediction dataset.",
"format": "date-time",
"type": [
"string",
"null"
]
}
},
"required": [
"prediction",
"rowId"
],
"type": "object"
},
"type": "array"
},
"shapBaseValue": {
"description": "Will be present only if `explanationAlgorithm` = 'shap'. The model's average prediction over the training data. SHAP values are deviations from the base value.",
"type": [
"number",
"null"
],
"x-versionadded": "v2.21"
},
"shapWarnings": {
"description": "Will be present if `explanationAlgorithm` was set to `shap` and there were additivity failures during SHAP values calculation.",
"items": {
"description": "Mismatch information",
"properties": {
"maxNormalizedMismatch": {
"description": "The maximal relative normalized mismatch value.",
"type": "number"
},
"mismatchRowCount": {
"description": "The count of rows for which additivity check failed.",
"type": "integer"
}
},
"required": [
"maxNormalizedMismatch",
"mismatchRowCount"
],
"type": "object"
},
"type": "array",
"x-versionadded": "v2.21"
},
"task": {
"description": "The prediction task.",
"enum": [
"Regression",
"Binary",
"Multiclass",
"Multilabel"
],
"type": "string"
}
},
"required": [
"positiveClass",
"predictions",
"task"
],
"type": "object"
}
Responses
Status |
Header |
Type |
Format |
Description |
200 |
Content-Type |
string |
|
MIME type of the returned data |
To perform this operation, you must be authenticated by means of one of the following methods:
BearerAuth
Use the ID of a metadata object to get the complete set of predictions.
Name |
In |
Type |
Required |
Description |
offset |
query |
integer |
true |
This many results will be skipped |
limit |
query |
integer |
true |
At most this many results are returned. To specify no limit, use 0. The default may change and a maximum limit may be imposed without notice. |
predictionDatasetId |
query |
string |
false |
Dataset id used to create the predictions |
modelId |
query |
string |
false |
Model id |
projectId |
path |
string |
true |
The project of the predictions. |
Example responses
200 Response
{
"properties": {
"count": {
"description": "The number of items returned on this page.",
"type": "integer"
},
"data": {
"description": "An array of the metadata records.",
"items": {
"properties": {
"actualValueColumn": {
"description": "For time series unsupervised projects only. Actual value column can be used to calculate the classification metrics and insights.",
"type": [
"string",
"null"
],
"x-versionadded": "v2.21"
},
"datasetId": {
"description": "Deprecated alias for `predictionDatasetId`.",
"type": [
"string",
"null"
]
},
"explanationAlgorithm": {
"description": "The selected algorithm to use for prediction explanations. At present, the only acceptable value is `shap`, which selects the SHapley Additive exPlanations (SHAP) explainer. Defaults to null (no prediction explanations).",
"type": [
"string",
"null"
],
"x-versionadded": "v2.21"
},
"featureDerivationWindowCounts": {
"description": "For time series projects with partial history only. Indicates how many points were used in during feature derivation.",
"type": [
"integer",
"null"
],
"x-versionadded": "v2.24"
},
"forecastPoint": {
"description": "For time series projects only. The time in the dataset relative to which predictions were generated.",
"format": "date-time",
"type": [
"string",
"null"
],
"x-versionadded": "v2.20"
},
"id": {
"description": "The id of the prediction record.",
"type": "string"
},
"includesPredictionIntervals": {
"description": "Whether the predictions include prediction intervals.",
"type": "boolean"
},
"maxExplanations": {
"description": "The maximum number of prediction explanations values to be returned with each row in the `predictions` json array. Null indicates `no limit`. Will be present only if `explanationAlgorithm` was set.",
"type": [
"integer",
"null"
],
"x-versionadded": "v2.21"
},
"modelId": {
"description": "The model id used for predictions.",
"type": "string"
},
"predictionDatasetId": {
"description": "The dataset id where the prediction data comes from. The field is available via `/api/v2/projects/<projectId>/predictionsMetadata/` route and replaced on `datasetId`in deprecated `/api/v2/projects/<projectId>/predictions/` endpoint.",
"type": [
"string",
"null"
]
},
"predictionIntervalsSize": {
"description": "For time series projects only. If prediction intervals were computed, what percentile they represent. Will be ``None`` if ``includePredictionIntervals`` is ``False``.",
"type": [
"integer",
"null"
]
},
"predictionThreshold": {
"description": "Threshold used for binary classification in predictions.",
"type": [
"number",
"null"
],
"x-versionadded": "v2.22"
},
"predictionsEndDate": {
"description": "For time series projects only. The end date for bulk predictions, exclusive. Note that this parameter was used for generating historical predictions using the training data, not for future predictions.",
"format": "date-time",
"type": [
"string",
"null"
],
"x-versionadded": "v2.20"
},
"predictionsStartDate": {
"description": "For time series projects only. The start date for bulk predictions. Note that this parameter was used for generating historical predictions using the training data, not for future predictions.",
"format": "date-time",
"type": [
"string",
"null"
],
"x-versionadded": "v2.20"
},
"projectId": {
"description": "The project id of the predictions.",
"type": "string"
},
"shapWarnings": {
"description": "Will be present if `explanationAlgorithm` was set to `shap` and there were additivity failures during SHAP values calculation.",
"properties": {
"maxNormalizedMismatch": {
"description": "The maximal relative normalized mismatch value.",
"type": "number",
"x-versionadded": "v2.21"
},
"mismatchRowCount": {
"description": "The count of rows for which additivity check failed.",
"type": "integer",
"x-versionadded": "v2.21"
}
},
"required": [
"maxNormalizedMismatch",
"mismatchRowCount"
],
"type": "object"
},
"url": {
"description": "The url at which you can download the predictions.",
"type": "string"
}
},
"required": [
"id",
"includesPredictionIntervals",
"modelId",
"predictionIntervalsSize",
"projectId",
"url"
],
"type": "object"
},
"type": "array"
},
"next": {
"description": "URL pointing to the next page (if null, there is no next page).",
"format": "uri",
"type": [
"string",
"null"
]
},
"previous": {
"description": "URL pointing to the previous page (if null, there is no previous page).",
"format": "uri",
"type": [
"string",
"null"
]
}
},
"required": [
"count",
"data",
"next",
"previous"
],
"type": "object"
}
Responses
To perform this operation, you must be authenticated by means of one of the following methods:
BearerAuth
Use the ID of a metadata object to get the complete set of predictions.
Name |
In |
Type |
Required |
Description |
predictionId |
path |
string |
true |
The id of the prediction record to retrieve. If you have the jobId, you can retrieve the predictionId using GET /api/v2/projects/{projectId}/predictJobs/{jobId}/. |
projectId |
path |
string |
true |
The id of the project the prediction belongs to. |
Example responses
200 Response
{
"properties": {
"actualValueColumn": {
"description": "For time series unsupervised projects only. Actual value column can be used to calculate the classification metrics and insights.",
"type": [
"string",
"null"
],
"x-versionadded": "v2.21"
},
"datasetId": {
"description": "Deprecated alias for `predictionDatasetId`.",
"type": [
"string",
"null"
]
},
"explanationAlgorithm": {
"description": "The selected algorithm to use for prediction explanations. At present, the only acceptable value is `shap`, which selects the SHapley Additive exPlanations (SHAP) explainer. Defaults to null (no prediction explanations).",
"type": [
"string",
"null"
],
"x-versionadded": "v2.21"
},
"featureDerivationWindowCounts": {
"description": "For time series projects with partial history only. Indicates how many points were used in during feature derivation.",
"type": [
"integer",
"null"
],
"x-versionadded": "v2.24"
},
"forecastPoint": {
"description": "For time series projects only. The time in the dataset relative to which predictions were generated.",
"format": "date-time",
"type": [
"string",
"null"
],
"x-versionadded": "v2.20"
},
"id": {
"description": "The id of the prediction record.",
"type": "string"
},
"includesPredictionIntervals": {
"description": "Whether the predictions include prediction intervals.",
"type": "boolean"
},
"maxExplanations": {
"description": "The maximum number of prediction explanations values to be returned with each row in the `predictions` json array. Null indicates `no limit`. Will be present only if `explanationAlgorithm` was set.",
"type": [
"integer",
"null"
],
"x-versionadded": "v2.21"
},
"modelId": {
"description": "The model id used for predictions.",
"type": "string"
},
"predictionDatasetId": {
"description": "The dataset id where the prediction data comes from. The field is available via `/api/v2/projects/<projectId>/predictionsMetadata/` route and replaced on `datasetId`in deprecated `/api/v2/projects/<projectId>/predictions/` endpoint.",
"type": [
"string",
"null"
]
},
"predictionIntervalsSize": {
"description": "For time series projects only. If prediction intervals were computed, what percentile they represent. Will be ``None`` if ``includePredictionIntervals`` is ``False``.",
"type": [
"integer",
"null"
]
},
"predictionThreshold": {
"description": "Threshold used for binary classification in predictions.",
"type": [
"number",
"null"
],
"x-versionadded": "v2.22"
},
"predictionsEndDate": {
"description": "For time series projects only. The end date for bulk predictions, exclusive. Note that this parameter was used for generating historical predictions using the training data, not for future predictions.",
"format": "date-time",
"type": [
"string",
"null"
],
"x-versionadded": "v2.20"
},
"predictionsStartDate": {
"description": "For time series projects only. The start date for bulk predictions. Note that this parameter was used for generating historical predictions using the training data, not for future predictions.",
"format": "date-time",
"type": [
"string",
"null"
],
"x-versionadded": "v2.20"
},
"projectId": {
"description": "The project id of the predictions.",
"type": "string"
},
"shapWarnings": {
"description": "Will be present if `explanationAlgorithm` was set to `shap` and there were additivity failures during SHAP values calculation.",
"properties": {
"maxNormalizedMismatch": {
"description": "The maximal relative normalized mismatch value.",
"type": "number",
"x-versionadded": "v2.21"
},
"mismatchRowCount": {
"description": "The count of rows for which additivity check failed.",
"type": "integer",
"x-versionadded": "v2.21"
}
},
"required": [
"maxNormalizedMismatch",
"mismatchRowCount"
],
"type": "object"
},
"url": {
"description": "The url at which you can download the predictions.",
"type": "string"
}
},
"required": [
"id",
"includesPredictionIntervals",
"modelId",
"predictionIntervalsSize",
"projectId",
"url"
],
"type": "object"
}
Responses
To perform this operation, you must be authenticated by means of one of the following methods:
BearerAuth
GET /api/v2/projects/{projectId}/trainingPredictions/
Get a list of training prediction records
Parameters
Name |
In |
Type |
Required |
Description |
offset |
query |
integer |
true |
This many results will be skipped |
limit |
query |
integer |
true |
At most this many results are returned |
projectId |
path |
string |
true |
Project ID to retrieve training predictions for |
Example responses
200 Response
{
"properties": {
"count": {
"description": "Number of items returned on this page.",
"type": "integer"
},
"data": {
"description": "A list of training prediction jobs",
"items": {
"description": "A training prediction job",
"properties": {
"dataSubset": {
"description": "Subset of data predicted on",
"enum": [
"all",
"validationAndHoldout",
"holdout",
"allBacktests",
"validation",
"crossValidation"
],
"type": "string",
"x-enum-versionadded": [
{
"value": "validation",
"x-versionadded": "v2.21"
}
]
},
"explanationAlgorithm": {
"description": "The method used for calculating prediction explanations",
"enum": [
"shap"
],
"type": [
"string",
"null"
],
"x-versionadded": "v2.21"
},
"id": {
"description": "ID of the training prediction job",
"type": "string"
},
"maxExplanations": {
"description": "the number of top contributors that are included in prediction explanations. Defaults to null for datasets narrower than 100 columns, defaults to 100 for datasets wider than 100 columns",
"maximum": 100,
"minimum": 0,
"type": [
"integer",
"null"
]
},
"modelId": {
"description": "ID of the model",
"type": "string"
},
"shapWarnings": {
"description": "Will be present if \"explanationAlgorithm\" was set to \"shap\" and there were additivity failures during SHAP values calculation",
"items": {
"description": "A training prediction job",
"properties": {
"partitionName": {
"description": "The partition used for the prediction record.",
"type": "string"
},
"value": {
"description": "The warnings related to this partition",
"properties": {
"maxNormalizedMismatch": {
"description": "The maximal relative normalized mismatch value",
"type": "number"
},
"mismatchRowCount": {
"description": "The count of rows for which additivity check failed",
"type": "integer"
}
},
"required": [
"maxNormalizedMismatch",
"mismatchRowCount"
],
"type": "object"
}
},
"required": [
"partitionName",
"value"
],
"type": "object"
},
"type": "array",
"x-versionadded": "v2.21"
},
"url": {
"description": "The location of these predictions",
"format": "uri",
"type": "string"
}
},
"required": [
"dataSubset",
"id",
"modelId",
"url"
],
"type": "object"
},
"type": "array"
},
"next": {
"description": "URL pointing to the next page (if null, there is no next page).",
"format": "uri",
"type": [
"string",
"null"
]
},
"previous": {
"description": "URL pointing to the previous page (if null, there is no previous page).",
"format": "uri",
"type": [
"string",
"null"
]
}
},
"required": [
"data",
"next",
"previous"
],
"type": "object"
}
Responses
To perform this operation, you must be authenticated by means of one of the following methods:
BearerAuth
POST /api/v2/projects/{projectId}/trainingPredictions/
Create training data predictions
Body parameter
{
"properties": {
"dataSubset": {
"default": "all",
"description": "Subset of data predicted on: The value \"all\" returns predictions for all rows in the dataset including data used for training, validation, holdout and any rows discarded. This is not available for large datasets or projects created with Date/Time partitioning. The value \"validationAndHoldout\" returns predictions for the rows used to calculate the validation score and the holdout score. Not available for large projects or Date/Time projects for models trained into the validation set. The value \"holdout\" returns predictions for the rows used to calculate the holdout score. Not available for projects created without a holdout or for models trained into holdout for large datasets or created with Date/Time partitioning. The value \"allBacktests\" returns predictions for the rows used to calculate the backtesting scores for Date/Time projects. The value \"validation\" returns predictions for the rows used to calculate the validation score.",
"enum": [
"all",
"validationAndHoldout",
"holdout",
"allBacktests",
"validation",
"crossValidation"
],
"type": "string",
"x-enum-versionadded": [
{
"value": "validation",
"x-versionadded": "v2.21"
}
]
},
"explanationAlgorithm": {
"description": "If set to \"shap\", the response will include prediction explanations based on the SHAP explainer (SHapley Additive exPlanations). Defaults to null (no prediction explanations)",
"type": "string",
"x-versionadded": "v2.21"
},
"maxExplanations": {
"description": "Specifies the maximum number of explanation values that should be returned for each row, ordered by absolute value, greatest to least. In the case of \"shap\": If not set, explanations are returned for all features. If the number of features is greater than the \"maxExplanations\", the sum of remaining values will also be returned as \"shapRemainingTotal\". Defaults to null for datasets narrower than 100 columns, defaults to 100 for datasets wider than 100 columns. Cannot be set if \"explanationAlgorithm\" is omitted.",
"maximum": 100,
"minimum": 1,
"type": "integer",
"x-versionadded": "v2.21"
},
"modelId": {
"description": "The model to make predictions on",
"type": "string"
}
},
"required": [
"dataSubset",
"modelId"
],
"type": "object"
}
Parameters
Name |
In |
Type |
Required |
Description |
projectId |
path |
string |
true |
Project ID to compute training predictions for |
body |
body |
CreateTrainingPrediction |
false |
none |
Responses
Status |
Meaning |
Description |
Schema |
202 |
Accepted |
Submitted successfully. See Location header. |
None |
422 |
Unprocessable Entity |
- Model/Timeseries/Blender does not support shap based prediction explanations |
|
- Error message from StackedPredictionRequestValidationError |
|
|
|
- Could not create training predictions job. Request with same parameters already submitted. |
None |
|
|
Status |
Header |
Type |
Format |
Description |
202 |
Location |
string |
|
URL for tracking async job status. |
To perform this operation, you must be authenticated by means of one of the following methods:
BearerAuth
GET /api/v2/projects/{projectId}/trainingPredictions/{predictionId}/
Retrieve training predictions that have previously been computed
Parameters
Name |
In |
Type |
Required |
Description |
offset |
query |
integer |
true |
This many results will be skipped |
limit |
query |
integer |
true |
At most this many results are returned |
projectId |
path |
string |
true |
Project ID to retrieve training predictions for |
predictionId |
path |
string |
true |
Prediction ID to retrieve training predictions for |
Accept |
header |
string |
false |
Requested MIME type for the returned data |
Enumerated Values
Parameter |
Value |
Accept |
[application/json , text/csv ] |
Example responses
200 Response
{
"properties": {
"count": {
"description": "Number of items returned on this page.",
"type": "integer"
},
"data": {
"description": "A list of training prediction rows",
"items": {
"description": "A training prediction row",
"properties": {
"forecastDistance": {
"description": "(if time series project) The number of time units this prediction is away from the forecastPoint. The unit of time is determined by the timeUnit of the datetime partition column.",
"type": [
"integer",
"null"
]
},
"forecastPoint": {
"description": "(if time series project) The forecastPoint of the predictions. Either provided or inferred.",
"format": "date-time",
"type": [
"string",
"null"
]
},
"partitionId": {
"description": "The partition used for the prediction record",
"type": "string"
},
"prediction": {
"description": "The prediction of the model.",
"oneOf": [
{
"description": "If using a regressor model, will be the numeric value of the target.",
"type": "number"
},
{
"description": "If using a binary or muliclass classifier model, will be the predicted class.",
"type": "string"
},
{
"description": "If using a multilabel classifier model, will be a list of predicted classes.",
"items": {
"type": "string"
},
"type": "array"
}
]
},
"predictionExplanations": {
"description": "Array contains `predictionExplanation` objects. The total elements in the array are bounded by maxExplanations and feature count. It will be present only if `explanationAlgorithm` is not null (prediction explanations were requested).",
"items": {
"description": "Prediction explanation result.",
"properties": {
"feature": {
"description": "The name of the feature contributing to the prediction.",
"type": "string"
},
"featureValue": {
"description": "The value the feature took on for this row. The type corresponds to the feature (bool, int, float, str, etc.).",
"oneOf": [
{
"type": "integer"
},
{
"type": "boolean"
},
{
"type": "string"
},
{
"type": "number"
}
]
},
"label": {
"description": "Describes what output was driven by this prediction explanation. For regression projects, it is the name of the target feature. For classification projects, it is the class whose probability increasing would correspond to a positive strength of this prediction explanation. For predictions made using anomaly detection models, it is the `Anomaly Score`.",
"oneOf": [
{
"type": "string"
},
{
"type": "number"
}
]
},
"strength": {
"description": "Algorithm-specific explanation value attributed to `feature` in this row. If `explanationAlgorithm` = `shap`, this is the SHAP value.",
"type": [
"number",
"null"
]
}
},
"required": [
"feature",
"featureValue",
"label"
],
"type": "object"
},
"type": "array",
"x-versionadded": "v2.21"
},
"predictionThreshold": {
"description": "Threshold used for binary classification in predictions.",
"maximum": 1,
"minimum": 0,
"type": "number"
},
"predictionValues": {
"description": "A list of predicted values for this row.",
"items": {
"description": "Predicted values",
"properties": {
"label": {
"description": "For regression problems this will be the name of the target column, 'Anomaly score' or ignored field. For classification projects this will be the name of the class.",
"oneOf": [
{
"type": "string"
},
{
"type": "number"
}
]
},
"threshold": {
"description": "Threshold used in multilabel classification for this class.",
"maximum": 1,
"minimum": 0,
"type": "number"
},
"value": {
"description": "The predicted probability of the class identified by the label.",
"type": "number"
}
},
"required": [
"label",
"value"
],
"type": "object"
},
"type": "array"
},
"rowId": {
"description": "The row in the prediction dataset this prediction corresponds to.",
"minimum": 0,
"type": "integer"
},
"seriesId": {
"description": "The ID of the series value for a multiseries project. For time series projects that are not a multiseries this will be a NaN.",
"type": [
"string",
"null"
]
},
"shapMetadata": {
"description": "The additional information necessary to understand shap based prediction explanations. Only present if explanationAlgorithm=\"shap\" was added in compute request.",
"properties": {
"shapBaseValue": {
"description": "The model's average prediction over the training data. SHAP values are deviations from the base value.",
"type": "number"
},
"shapRemainingTotal": {
"description": "The total of SHAP values for features beyond the maxExplanations. This can be identically 0 in all rows, if maxExplanations is greater than the number of features and thus all features are returned.",
"type": "integer"
},
"warnings": {
"description": "SHAP values calculation warnings",
"items": {
"description": "The warnings related to this partition",
"properties": {
"maxNormalizedMismatch": {
"description": "The maximal relative normalized mismatch value",
"type": "number"
},
"mismatchRowCount": {
"description": "The count of rows for which additivity check failed",
"type": "integer"
}
},
"required": [
"maxNormalizedMismatch",
"mismatchRowCount"
],
"type": "object"
},
"type": "array"
}
},
"required": [
"shapBaseValue",
"shapRemainingTotal",
"warnings"
],
"type": "object"
},
"timestamp": {
"description": "(if time series project) The timestamp of this row in the prediction dataset.",
"format": "date-time",
"type": [
"string",
"null"
]
}
},
"required": [
"partitionId",
"prediction",
"rowId"
],
"type": "object"
},
"type": "array"
},
"next": {
"description": "URL pointing to the next page (if null, there is no next page).",
"format": "uri",
"type": [
"string",
"null"
]
},
"previous": {
"description": "URL pointing to the previous page (if null, there is no previous page).",
"format": "uri",
"type": [
"string",
"null"
]
}
},
"required": [
"data",
"next",
"previous"
],
"type": "object"
}
Responses
Status |
Meaning |
Description |
Schema |
200 |
OK |
Training predictions encoded either as JSON or CSV |
string |
404 |
Not Found |
Job does not exist or is not completed |
None |
Status |
Header |
Type |
Format |
Description |
200 |
Content-Type |
string |
|
MIME type of the returned data |
To perform this operation, you must be authenticated by means of one of the following methods:
BearerAuth
GET /api/v2/scheduledJobs/
Get a list of scheduled batch prediction jobs a user can view
Parameters
Name |
In |
Type |
Required |
Description |
offset |
query |
integer |
true |
The number of scheduled jobs to skip. Defaults to 0. |
limit |
query |
integer |
true |
The number of scheduled jobs (max 100) to return. Defaults to 20 |
orderBy |
query |
string |
false |
The order to sort the scheduled jobs. Defaults to order by last successful run timestamp in descending order. |
search |
query |
string |
false |
Case insensitive search against scheduled jobs name or type name. |
deploymentId |
query |
string |
false |
Filter by the prediction integration deployment ID. Ignored for non prediction integration type ID. |
typeId |
query |
string |
false |
filter by scheduled job type ID. |
queryByUser |
query |
string |
false |
Which user field to filter with. |
filterEnabled |
query |
string |
false |
Filter jobs using the enabled field. If true , only enabled jobs are returned, otherwise if false , only disabled jobs are returned. The default returns both enabled and disabled jobs. |
Enumerated Values
Parameter |
Value |
typeId |
datasetRefresh |
queryByUser |
[createdBy , updatedBy ] |
filterEnabled |
[false , False , true , True ] |
Example responses
200 Response
{
"properties": {
"count": {
"description": "Number of items returned on this page.",
"type": "integer"
},
"data": {
"description": "List of scheduled jobs",
"items": {
"properties": {
"createdBy": {
"description": "User name of the creator",
"type": [
"string",
"null"
]
},
"deploymentId": {
"description": "ID of the deployment this scheduled job is created from.",
"type": [
"string",
"null"
]
},
"enabled": {
"description": "True if the job is enabled and false if the job is disabled.",
"type": "boolean"
},
"id": {
"description": "ID of scheduled prediction job",
"type": "string"
},
"name": {
"description": "Name of the scheduled job.",
"type": [
"string",
"null"
]
},
"schedule": {
"description": "The scheduling information defining how often and when to execute this job to the Job Scheduling service. Optional if enabled = False.",
"properties": {
"dayOfMonth": {
"description": "The date(s) of the month that the job will run. Allowed values are either ``[1 ... 31]`` or ``[\"*\"]`` for all days of the month. This field is additive with ``dayOfWeek``, meaning the job will run both on the date(s) defined in this field and the day specified by ``dayOfWeek`` (for example, dates 1st, 2nd, 3rd, plus every Tuesday). If ``dayOfMonth`` is set to ``[\"*\"]`` and ``dayOfWeek`` is defined, the scheduler will trigger on every day of the month that matches ``dayOfWeek`` (for example, Tuesday the 2nd, 9th, 16th, 23rd, 30th). Invalid dates such as February 31st are ignored.",
"items": {
"enum": [
"*",
1,
2,
3,
4,
5,
6,
7,
8,
9,
10,
11,
12,
13,
14,
15,
16,
17,
18,
19,
20,
21,
22,
23,
24,
25,
26,
27,
28,
29,
30,
31
],
"type": [
"number",
"string"
]
},
"maxItems": 31,
"type": "array"
},
"dayOfWeek": {
"description": "The day(s) of the week that the job will run. Allowed values are ``[0 .. 6]``, where (Sunday=0), or ``[\"*\"]``, for all days of the week. Strings, either 3-letter abbreviations or the full name of the day, can be used interchangeably (e.g., \"sunday\", \"Sunday\", \"sun\", or \"Sun\", all map to ``[0]``. This field is additive with ``dayOfMonth``, meaning the job will run both on the date specified by ``dayOfMonth`` and the day defined in this field.",
"items": {
"enum": [
"*",
0,
1,
2,
3,
4,
5,
6,
"sunday",
"SUNDAY",
"Sunday",
"monday",
"MONDAY",
"Monday",
"tuesday",
"TUESDAY",
"Tuesday",
"wednesday",
"WEDNESDAY",
"Wednesday",
"thursday",
"THURSDAY",
"Thursday",
"friday",
"FRIDAY",
"Friday",
"saturday",
"SATURDAY",
"Saturday",
"sun",
"SUN",
"Sun",
"mon",
"MON",
"Mon",
"tue",
"TUE",
"Tue",
"wed",
"WED",
"Wed",
"thu",
"THU",
"Thu",
"fri",
"FRI",
"Fri",
"sat",
"SAT",
"Sat"
],
"type": [
"number",
"string"
]
},
"maxItems": 7,
"type": "array"
},
"hour": {
"description": "The hour(s) of the day that the job will run. Allowed values are either ``[\"*\"]`` meaning every hour of the day or ``[0 ... 23]``.",
"items": {
"enum": [
"*",
0,
1,
2,
3,
4,
5,
6,
7,
8,
9,
10,
11,
12,
13,
14,
15,
16,
17,
18,
19,
20,
21,
22,
23
],
"type": [
"number",
"string"
]
},
"maxItems": 24,
"type": "array"
},
"minute": {
"description": "The minute(s) of the day that the job will run. Allowed values are either ``[\"*\"]`` meaning every minute of the day or``[0 ... 59]``.",
"items": {
"enum": [
"*",
0,
1,
2,
3,
4,
5,
6,
7,
8,
9,
10,
11,
12,
13,
14,
15,
16,
17,
18,
19,
20,
21,
22,
23,
24,
25,
26,
27,
28,
29,
30,
31,
32,
33,
34,
35,
36,
37,
38,
39,
40,
41,
42,
43,
44,
45,
46,
47,
48,
49,
50,
51,
52,
53,
54,
55,
56,
57,
58,
59
],
"type": [
"number",
"string"
]
},
"maxItems": 60,
"type": "array"
},
"month": {
"description": "The month(s) of the year that the job will run. Allowed values are either ``[1 ... 12]`` or ``[\"*\"]`` for all months of the year. Strings, either 3-letter abbreviations or the full name of the month, can be used interchangeably (e.g., \"jan\" or \"october\"). Months that are not compatible with ``dayOfMonth`` are ignored, for example ``{\"dayOfMonth\": [31], \"month\":[\"feb\"]}``.",
"items": {
"enum": [
"*",
1,
2,
3,
4,
5,
6,
7,
8,
9,
10,
11,
12,
"january",
"JANUARY",
"January",
"february",
"FEBRUARY",
"February",
"march",
"MARCH",
"March",
"april",
"APRIL",
"April",
"may",
"MAY",
"May",
"june",
"JUNE",
"June",
"july",
"JULY",
"July",
"august",
"AUGUST",
"August",
"september",
"SEPTEMBER",
"September",
"october",
"OCTOBER",
"October",
"november",
"NOVEMBER",
"November",
"december",
"DECEMBER",
"December",
"jan",
"JAN",
"Jan",
"feb",
"FEB",
"Feb",
"mar",
"MAR",
"Mar",
"apr",
"APR",
"Apr",
"jun",
"JUN",
"Jun",
"jul",
"JUL",
"Jul",
"aug",
"AUG",
"Aug",
"sep",
"SEP",
"Sep",
"oct",
"OCT",
"Oct",
"nov",
"NOV",
"Nov",
"dec",
"DEC",
"Dec"
],
"type": [
"number",
"string"
]
},
"maxItems": 12,
"type": "array"
}
},
"required": [
"dayOfMonth",
"dayOfWeek",
"hour",
"minute",
"month"
],
"type": "object"
},
"scheduledJobId": {
"description": "ID of this scheduled job.",
"type": "string"
},
"status": {
"description": "Object containing status information about the scheduled job.",
"properties": {
"lastFailedRun": {
"description": "Date and time of the last failed run.",
"format": "date-time",
"type": [
"string",
"null"
]
},
"lastSuccessfulRun": {
"description": "Date and time of the last successful run.",
"format": "date-time",
"type": [
"string",
"null"
]
},
"nextRunTime": {
"description": "Date and time of the next run.",
"format": "date-time",
"type": [
"string",
"null"
]
},
"queuePosition": {
"description": "Position of the job in the queue Job. The value will show 0 if the job is about to run, otherwise, the number will be greater than 0 if currently queued, or None if the job is not currently running.",
"minimum": 0,
"type": [
"integer",
"null"
]
},
"running": {
"description": "`true` or `false` depending on whether the job is currently running.",
"type": "boolean"
}
},
"required": [
"running"
],
"type": "object"
},
"typeId": {
"description": "Job type of the scheduled job",
"type": "string"
},
"updatedAt": {
"description": "Time of last modification",
"format": "date-time",
"type": [
"string",
"null"
]
}
},
"required": [
"enabled",
"id",
"schedule",
"scheduledJobId",
"status",
"typeId"
],
"type": "object"
},
"maxItems": 100,
"type": "array"
},
"next": {
"description": "URL pointing to the next page (if null, there is no next page).",
"format": "uri",
"type": [
"string",
"null"
]
},
"previous": {
"description": "URL pointing to the previous page (if null, there is no previous page).",
"format": "uri",
"type": [
"string",
"null"
]
},
"totalCount": {
"description": "The total number of items across all pages.",
"type": "integer"
},
"updatedAt": {
"description": "Time of last modification",
"format": "date-time",
"type": "string"
},
"updatedBy": {
"description": "User ID of last modifier",
"type": "string"
}
},
"required": [
"data",
"next",
"previous",
"totalCount"
],
"type": "object"
}
Responses
To perform this operation, you must be authenticated by means of one of the following methods:
BearerAuth
Schemas
ActualValueColumnInfo
{
"properties": {
"missingCount": {
"description": "Count of the missing values in the column.",
"type": "integer",
"x-versionadded": "v2.21"
},
"name": {
"description": "Name of the column.",
"type": "string",
"x-versionadded": "v2.21"
}
},
"required": [
"missingCount",
"name"
],
"type": "object"
}
Properties
Name |
Type |
Required |
Restrictions |
Description |
missingCount |
integer |
true |
|
Count of the missing values in the column. |
name |
string |
true |
|
Name of the column. |
AzureDataStreamer
{
"description": "Stream CSV data chunks from Azure",
"properties": {
"credentialId": {
"description": "Either the populated value of the field or [redacted] due to permission settings",
"oneOf": [
{
"description": "Use the specified credential to access the url",
"type": [
"string",
"null"
]
},
{
"enum": [
"[redacted]"
],
"type": "string"
}
]
},
"format": {
"default": "csv",
"description": "Type of input file format",
"enum": [
"csv",
"parquet"
],
"type": "string",
"x-versionadded": "v2.25"
},
"type": {
"description": "Type name for this intake type",
"enum": [
"azure"
],
"type": "string"
},
"url": {
"description": "URL for the CSV file",
"format": "url",
"type": "string"
}
},
"required": [
"type",
"url"
],
"type": "object"
}
Stream CSV data chunks from Azure
Properties
Name |
Type |
Required |
Restrictions |
Description |
credentialId |
any |
false |
|
Either the populated value of the field or [redacted] due to permission settings |
oneOf
Name |
Type |
Required |
Restrictions |
Description |
» anonymous |
string,null |
false |
|
Use the specified credential to access the url |
xor
Name |
Type |
Required |
Restrictions |
Description |
» anonymous |
string |
false |
|
none |
continued
Name |
Type |
Required |
Restrictions |
Description |
format |
string |
false |
|
Type of input file format |
type |
string |
true |
|
Type name for this intake type |
url |
string(url) |
true |
|
URL for the CSV file |
Enumerated Values
Property |
Value |
anonymous |
[redacted] |
format |
[csv , parquet ] |
type |
azure |
AzureIntake
{
"description": "Stream CSV data chunks from Azure",
"properties": {
"credentialId": {
"description": "Use the specified credential to access the url",
"type": [
"string",
"null"
]
},
"format": {
"default": "csv",
"description": "Type of input file format",
"enum": [
"csv",
"parquet"
],
"type": "string",
"x-versionadded": "v2.25"
},
"type": {
"description": "Type name for this intake type",
"enum": [
"azure"
],
"type": "string"
},
"url": {
"description": "URL for the CSV file",
"format": "url",
"type": "string"
}
},
"required": [
"type",
"url"
],
"type": "object"
}
Stream CSV data chunks from Azure
Properties
Name |
Type |
Required |
Restrictions |
Description |
credentialId |
string,null |
false |
|
Use the specified credential to access the url |
format |
string |
false |
|
Type of input file format |
type |
string |
true |
|
Type name for this intake type |
url |
string(url) |
true |
|
URL for the CSV file |
Enumerated Values
Property |
Value |
format |
[csv , parquet ] |
type |
azure |
AzureOutput
{
"description": "Save CSV data chunks to Azure Blob Storage",
"properties": {
"credentialId": {
"description": "Use the specified credential to access the url",
"type": [
"string",
"null"
]
},
"format": {
"default": "csv",
"description": "Type of output file format",
"enum": [
"csv",
"parquet"
],
"type": "string",
"x-versionadded": "v2.25"
},
"partitionColumns": {
"description": "For Parquet directory-scoring only. The column names of the intake data of which to partition the dataset. Columns are partitioned in the order they are given. At least one value is required if scoring to a directory (meaning the output url ends with a slash (\"/\").",
"items": {
"type": "string"
},
"maxItems": 100,
"type": "array",
"x-versionadded": "v2.26"
},
"type": {
"description": "Type name for this output type",
"enum": [
"azure"
],
"type": "string"
},
"url": {
"description": "URL for the file or directory",
"format": "url",
"type": "string"
}
},
"required": [
"type",
"url"
],
"type": "object"
}
Save CSV data chunks to Azure Blob Storage
Properties
Name |
Type |
Required |
Restrictions |
Description |
credentialId |
string,null |
false |
|
Use the specified credential to access the url |
format |
string |
false |
|
Type of output file format |
partitionColumns |
[string] |
false |
maxItems: 100
|
For Parquet directory-scoring only. The column names of the intake data of which to partition the dataset. Columns are partitioned in the order they are given. At least one value is required if scoring to a directory (meaning the output url ends with a slash ("/"). |
type |
string |
true |
|
Type name for this output type |
url |
string(url) |
true |
|
URL for the file or directory |
Enumerated Values
Property |
Value |
format |
[csv , parquet ] |
type |
azure |
AzureOutputAdaptor
{
"description": "Save CSV data chunks to Azure Blob Storage",
"properties": {
"credentialId": {
"description": "Either the populated value of the field or [redacted] due to permission settings",
"oneOf": [
{
"description": "Use the specified credential to access the url",
"type": [
"string",
"null"
]
},
{
"enum": [
"[redacted]"
],
"type": "string"
}
]
},
"format": {
"default": "csv",
"description": "Type of output file format",
"enum": [
"csv",
"parquet"
],
"type": "string",
"x-versionadded": "v2.25"
},
"partitionColumns": {
"description": "For Parquet directory-scoring only. The column names of the intake data of which to partition the dataset. Columns are partitioned in the order they are given. At least one value is required if scoring to a directory (meaning the output url ends with a slash (\"/\").",
"items": {
"type": "string"
},
"maxItems": 100,
"type": "array",
"x-versionadded": "v2.26"
},
"type": {
"description": "Type name for this output type",
"enum": [
"azure"
],
"type": "string"
},
"url": {
"description": "URL for the file or directory",
"format": "url",
"type": "string"
}
},
"required": [
"type",
"url"
],
"type": "object"
}
Save CSV data chunks to Azure Blob Storage
Properties
Name |
Type |
Required |
Restrictions |
Description |
credentialId |
any |
false |
|
Either the populated value of the field or [redacted] due to permission settings |
oneOf
Name |
Type |
Required |
Restrictions |
Description |
» anonymous |
string,null |
false |
|
Use the specified credential to access the url |
xor
Name |
Type |
Required |
Restrictions |
Description |
» anonymous |
string |
false |
|
none |
continued
Name |
Type |
Required |
Restrictions |
Description |
format |
string |
false |
|
Type of output file format |
partitionColumns |
[string] |
false |
maxItems: 100
|
For Parquet directory-scoring only. The column names of the intake data of which to partition the dataset. Columns are partitioned in the order they are given. At least one value is required if scoring to a directory (meaning the output url ends with a slash ("/"). |
type |
string |
true |
|
Type name for this output type |
url |
string(url) |
true |
|
URL for the file or directory |
Enumerated Values
Property |
Value |
anonymous |
[redacted] |
format |
[csv , parquet ] |
type |
azure |
AzureServicePrincipalCredentials
{
"properties": {
"azureTenantId": {
"description": "Tenant ID of the Azure AD service principal.",
"type": "string"
},
"clientId": {
"description": "Client ID of the Azure AD service principal.",
"type": "string"
},
"clientSecret": {
"description": "Client Secret of the Azure AD service principal.",
"type": "string"
},
"configId": {
"description": "ID of secure configurations of credentials shared by admin.",
"type": "string",
"x-versionadded": "v2.35"
},
"credentialType": {
"description": "The type of these credentials, 'azure_service_principal' here.",
"enum": [
"azure_service_principal"
],
"type": "string"
}
},
"required": [
"credentialType"
],
"type": "object"
}
Properties
Name |
Type |
Required |
Restrictions |
Description |
azureTenantId |
string |
false |
|
Tenant ID of the Azure AD service principal. |
clientId |
string |
false |
|
Client ID of the Azure AD service principal. |
clientSecret |
string |
false |
|
Client Secret of the Azure AD service principal. |
configId |
string |
false |
|
ID of secure configurations of credentials shared by admin. |
credentialType |
string |
true |
|
The type of these credentials, 'azure_service_principal' here. |
Enumerated Values
Property |
Value |
credentialType |
azure_service_principal |
BasicCredentials
{
"properties": {
"credentialType": {
"description": "The type of these credentials, 'basic' here.",
"enum": [
"basic"
],
"type": "string"
},
"password": {
"description": "The password for database authentication. The password is encrypted at rest and never saved / stored.",
"type": "string"
},
"user": {
"description": "The username for database authentication.",
"type": "string"
}
},
"required": [
"credentialType",
"password",
"user"
],
"type": "object"
}
Properties
Name |
Type |
Required |
Restrictions |
Description |
credentialType |
string |
true |
|
The type of these credentials, 'basic' here. |
password |
string |
true |
|
The password for database authentication. The password is encrypted at rest and never saved / stored. |
user |
string |
true |
|
The username for database authentication. |
Enumerated Values
Property |
Value |
credentialType |
basic |
BatchJobCSVSettings
{
"description": "The CSV settings used for this job",
"properties": {
"delimiter": {
"default": ",",
"description": "CSV fields are delimited by this character. Use the string \"tab\" to denote TSV (TAB separated values).",
"oneOf": [
{
"enum": [
"tab"
],
"type": "string"
},
{
"maxLength": 1,
"minLength": 1,
"type": "string"
}
]
},
"encoding": {
"default": "utf-8",
"description": "The encoding to be used for intake and output. For example (but not limited to): \"shift_jis\", \"latin_1\" or \"mskanji\".",
"type": "string"
},
"quotechar": {
"default": "\"",
"description": "Fields containing the delimiter or newlines must be quoted using this character.",
"maxLength": 1,
"minLength": 1,
"type": "string"
}
},
"required": [
"delimiter",
"encoding",
"quotechar"
],
"type": "object"
}
The CSV settings used for this job
Properties
Name |
Type |
Required |
Restrictions |
Description |
delimiter |
any |
true |
|
CSV fields are delimited by this character. Use the string "tab" to denote TSV (TAB separated values). |
oneOf
Name |
Type |
Required |
Restrictions |
Description |
» anonymous |
string |
false |
|
none |
xor
Name |
Type |
Required |
Restrictions |
Description |
» anonymous |
string |
false |
maxLength: 1 minLength: 1 minLength: 1
|
none |
continued
Name |
Type |
Required |
Restrictions |
Description |
encoding |
string |
true |
|
The encoding to be used for intake and output. For example (but not limited to): "shift_jis", "latin_1" or "mskanji". |
quotechar |
string |
true |
maxLength: 1 minLength: 1 minLength: 1
|
Fields containing the delimiter or newlines must be quoted using this character. |
Enumerated Values
Property |
Value |
anonymous |
tab |
BatchJobCreatedBy
{
"description": "Who created this job",
"properties": {
"fullName": {
"description": "The full name of the user who created this job (if defined by the user)",
"type": [
"string",
"null"
]
},
"userId": {
"description": "The User ID of the user who created this job",
"type": "string"
},
"username": {
"description": "The username (e-mail address) of the user who created this job",
"type": "string"
}
},
"required": [
"fullName",
"userId",
"username"
],
"type": "object"
}
Who created this job
Properties
Name |
Type |
Required |
Restrictions |
Description |
fullName |
string,null |
true |
|
The full name of the user who created this job (if defined by the user) |
userId |
string |
true |
|
The User ID of the user who created this job |
username |
string |
true |
|
The username (e-mail address) of the user who created this job |
BatchJobDefinitionResponse
{
"description": "The Batch Prediction Job Definition linking to this job, if any.",
"properties": {
"createdBy": {
"description": "The ID of creator of this job definition",
"type": "string"
},
"id": {
"description": "The ID of the Batch Prediction job definition",
"type": "string"
},
"name": {
"description": "A human-readable name for the definition, must be unique across organisations",
"type": "string"
}
},
"required": [
"createdBy",
"id",
"name"
],
"type": "object",
"x-versionadded": "v2.35"
}
The Batch Prediction Job Definition linking to this job, if any.
Properties
Name |
Type |
Required |
Restrictions |
Description |
createdBy |
string |
true |
|
The ID of creator of this job definition |
id |
string |
true |
|
The ID of the Batch Prediction job definition |
name |
string |
true |
|
A human-readable name for the definition, must be unique across organisations |
BatchJobLinks
{
"description": "Links useful for this job",
"properties": {
"csvUpload": {
"description": "The URL used to upload the dataset for this job. Only available for localFile intake.",
"format": "url",
"type": "string"
},
"download": {
"description": "The URL used to download the results from this job. Only available for localFile outputs. Will be null if the download is not yet available.",
"type": [
"string",
"null"
]
},
"self": {
"description": "The URL used access this job.",
"format": "url",
"type": "string"
}
},
"required": [
"self"
],
"type": "object",
"x-versionadded": "v2.35"
}
Links useful for this job
Properties
Name |
Type |
Required |
Restrictions |
Description |
csvUpload |
string(url) |
false |
|
The URL used to upload the dataset for this job. Only available for localFile intake. |
download |
string,null |
false |
|
The URL used to download the results from this job. Only available for localFile outputs. Will be null if the download is not yet available. |
self |
string(url) |
true |
|
The URL used access this job. |
BatchJobListResponse
{
"properties": {
"count": {
"description": "Number of items returned on this page.",
"type": "integer"
},
"data": {
"description": "An array of jobs",
"items": {
"properties": {
"batchMonitoringJobDefinition": {
"description": "The Batch Prediction Job Definition linking to this job, if any.",
"properties": {
"createdBy": {
"description": "The ID of creator of this job definition",
"type": "string"
},
"id": {
"description": "The ID of the Batch Prediction job definition",
"type": "string"
},
"name": {
"description": "A human-readable name for the definition, must be unique across organisations",
"type": "string"
}
},
"required": [
"createdBy",
"id",
"name"
],
"type": "object",
"x-versionadded": "v2.35"
},
"batchPredictionJobDefinition": {
"description": "The Batch Prediction Job Definition linking to this job, if any.",
"properties": {
"createdBy": {
"description": "The ID of creator of this job definition",
"type": "string"
},
"id": {
"description": "The ID of the Batch Prediction job definition",
"type": "string"
},
"name": {
"description": "A human-readable name for the definition, must be unique across organisations",
"type": "string"
}
},
"required": [
"createdBy",
"id",
"name"
],
"type": "object",
"x-versionadded": "v2.35"
},
"created": {
"description": "When was this job created",
"format": "date-time",
"type": "string",
"x-versionadded": "v2.30"
},
"createdBy": {
"description": "Who created this job",
"properties": {
"fullName": {
"description": "The full name of the user who created this job (if defined by the user)",
"type": [
"string",
"null"
]
},
"userId": {
"description": "The User ID of the user who created this job",
"type": "string"
},
"username": {
"description": "The username (e-mail address) of the user who created this job",
"type": "string"
}
},
"required": [
"fullName",
"userId",
"username"
],
"type": "object"
},
"elapsedTimeSec": {
"description": "Number of seconds the job has been processing for",
"minimum": 0,
"type": "integer"
},
"failedRows": {
"description": "Number of rows that have failed scoring",
"minimum": 0,
"type": "integer"
},
"hidden": {
"description": "When was this job was hidden last, blank if visible",
"format": "date-time",
"type": "string",
"x-versionadded": "v2.30"
},
"id": {
"description": "The ID of the Batch job",
"type": "string",
"x-versionadded": "v2.30"
},
"intakeDatasetDisplayName": {
"description": "If applicable (e.g. for AI catalog), will contain the dataset name used for the intake dataset.",
"type": [
"string",
"null"
],
"x-versionadded": "v2.30"
},
"jobIntakeSize": {
"description": "Number of bytes in the intake dataset for this job",
"minimum": 0,
"type": [
"integer",
"null"
]
},
"jobOutputSize": {
"description": "Number of bytes in the output dataset for this job",
"minimum": 0,
"type": [
"integer",
"null"
]
},
"jobSpec": {
"description": "The job configuration used to create this job",
"properties": {
"abortOnError": {
"default": true,
"description": "Should this job abort if too many errors are encountered",
"type": "boolean"
},
"batchJobType": {
"description": "Batch job type.",
"enum": [
"monitoring",
"prediction"
],
"type": "string",
"x-versionadded": "v2.30"
},
"chunkSize": {
"default": "auto",
"description": "Which strategy should be used to determine the chunk size. Can be either a named strategy or a fixed size in bytes.",
"oneOf": [
{
"enum": [
"auto",
"fixed",
"dynamic"
],
"type": "string"
},
{
"maximum": 41943040,
"minimum": 20,
"type": "integer"
}
]
},
"columnNamesRemapping": {
"description": "Remap (rename or remove columns from) the output from this job",
"oneOf": [
{
"description": "Provide a dictionary with key/value pairs to remap (deprecated)",
"type": "object"
},
{
"description": "Provide a list of items to remap",
"items": {
"properties": {
"inputName": {
"description": "Rename column with this name",
"type": "string"
},
"outputName": {
"description": "Rename column to this name (leave as null to remove from the output)",
"type": [
"string",
"null"
]
}
},
"required": [
"inputName",
"outputName"
],
"type": "object"
},
"maxItems": 1000,
"type": "array"
}
]
},
"csvSettings": {
"description": "The CSV settings used for this job",
"properties": {
"delimiter": {
"default": ",",
"description": "CSV fields are delimited by this character. Use the string \"tab\" to denote TSV (TAB separated values).",
"oneOf": [
{
"enum": [
"tab"
],
"type": "string"
},
{
"maxLength": 1,
"minLength": 1,
"type": "string"
}
]
},
"encoding": {
"default": "utf-8",
"description": "The encoding to be used for intake and output. For example (but not limited to): \"shift_jis\", \"latin_1\" or \"mskanji\".",
"type": "string"
},
"quotechar": {
"default": "\"",
"description": "Fields containing the delimiter or newlines must be quoted using this character.",
"maxLength": 1,
"minLength": 1,
"type": "string"
}
},
"required": [
"delimiter",
"encoding",
"quotechar"
],
"type": "object"
},
"deploymentId": {
"description": "ID of deployment which is used in job for processing predictions dataset",
"type": "string"
},
"disableRowLevelErrorHandling": {
"default": false,
"description": "Skip row by row error handling",
"type": "boolean"
},
"explanationAlgorithm": {
"description": "Which algorithm will be used to calculate prediction explanations",
"enum": [
"shap",
"xemp"
],
"type": "string"
},
"explanationClassNames": {
"description": "List of class names that will be explained for each row for multiclass. Mutually exclusive with explanationNumTopClasses. If neither specified - we assume explanationNumTopClasses=1",
"items": {
"description": "Class name to explain",
"type": "string"
},
"maxItems": 10,
"minItems": 1,
"type": "array",
"x-versionadded": "v2.30"
},
"explanationNumTopClasses": {
"description": "Number of top predicted classes for each row that will be explained for multiclass. Mutually exclusive with explanationClassNames. If neither specified - we assume explanationNumTopClasses=1",
"maximum": 10,
"minimum": 1,
"type": "integer",
"x-versionadded": "v2.30"
},
"includePredictionStatus": {
"default": false,
"description": "Include prediction status column in the output",
"type": "boolean"
},
"includeProbabilities": {
"default": true,
"description": "Include probabilities for all classes",
"type": "boolean"
},
"includeProbabilitiesClasses": {
"default": [],
"description": "Include only probabilities for these specific class names.",
"items": {
"description": "Include probability for this class name",
"type": "string"
},
"maxItems": 100,
"type": "array"
},
"intakeSettings": {
"default": {
"type": "localFile"
},
"description": "The response option configured for this job",
"oneOf": [
{
"description": "Stream CSV data chunks from Azure",
"properties": {
"credentialId": {
"description": "Either the populated value of the field or [redacted] due to permission settings",
"oneOf": [
{
"description": "Use the specified credential to access the url",
"type": [
"string",
"null"
]
},
{
"enum": [
"[redacted]"
],
"type": "string"
}
]
},
"format": {
"default": "csv",
"description": "Type of input file format",
"enum": [
"csv",
"parquet"
],
"type": "string",
"x-versionadded": "v2.25"
},
"type": {
"description": "Type name for this intake type",
"enum": [
"azure"
],
"type": "string"
},
"url": {
"description": "URL for the CSV file",
"format": "url",
"type": "string"
}
},
"required": [
"type",
"url"
],
"type": "object"
},
{
"description": "Stream CSV data chunks from data stage storage",
"properties": {
"dataStageId": {
"description": "The ID of the data stage",
"type": "string"
},
"type": {
"description": "Type name for this intake type",
"enum": [
"dataStage"
],
"type": "string"
}
},
"required": [
"dataStageId",
"type"
],
"type": "object"
},
{
"description": "Stream CSV data chunks from AI catalog dataset",
"properties": {
"datasetId": {
"description": "Either the populated value of the field or [redacted] due to permission settings",
"oneOf": [
{
"description": "The ID of the AI catalog dataset",
"type": "string"
},
{
"enum": [
"[redacted]"
],
"type": "string"
}
]
},
"datasetVersionId": {
"description": "The ID of the AI catalog dataset version",
"type": "string"
},
"type": {
"description": "Type name for this intake type",
"enum": [
"dataset"
],
"type": "string"
}
},
"required": [
"datasetId",
"type"
],
"type": "object"
},
{
"description": "Stream CSV data chunks from Google Storage",
"properties": {
"credentialId": {
"description": "Either the populated value of the field or [redacted] due to permission settings",
"oneOf": [
{
"description": "Use the specified credential to access the url",
"type": [
"string",
"null"
]
},
{
"enum": [
"[redacted]"
],
"type": "string"
}
]
},
"format": {
"default": "csv",
"description": "Type of input file format",
"enum": [
"csv",
"parquet"
],
"type": "string",
"x-versionadded": "v2.25"
},
"type": {
"description": "Type name for this intake type",
"enum": [
"gcp"
],
"type": "string"
},
"url": {
"description": "URL for the CSV file",
"format": "url",
"type": "string"
}
},
"required": [
"type",
"url"
],
"type": "object"
},
{
"description": "Stream CSV data chunks from Big Query using GCS",
"properties": {
"bucket": {
"description": "The name of gcs bucket for data export",
"type": "string"
},
"credentialId": {
"description": "Either the populated value of the field or [redacted] due to permission settings",
"oneOf": [
{
"description": "The ID of the GCP credentials",
"type": "string"
},
{
"enum": [
"[redacted]"
],
"type": "string"
}
]
},
"dataset": {
"description": "The name of the specified big query dataset to read input data from",
"type": "string"
},
"table": {
"description": "The name of the specified big query table to read input data from",
"type": "string"
},
"type": {
"description": "Type name for this intake type",
"enum": [
"bigquery"
],
"type": "string"
}
},
"required": [
"bucket",
"credentialId",
"dataset",
"table",
"type"
],
"type": "object"
},
{
"description": "Stream CSV data chunks from Amazon Cloud Storage S3",
"properties": {
"credentialId": {
"description": "Either the populated value of the field or [redacted] due to permission settings",
"oneOf": [
{
"description": "Use the specified credential to access the url",
"type": [
"string",
"null"
]
},
{
"enum": [
"[redacted]"
],
"type": "string"
}
]
},
"endpointUrl": {
"description": "Endpoint URL for the S3 connection (omit to use the default)",
"format": "url",
"type": "string",
"x-versionadded": "v2.29"
},
"format": {
"default": "csv",
"description": "Type of input file format",
"enum": [
"csv",
"parquet"
],
"type": "string",
"x-versionadded": "v2.25"
},
"type": {
"description": "Type name for this intake type",
"enum": [
"s3"
],
"type": "string"
},
"url": {
"description": "URL for the CSV file",
"format": "url",
"type": "string"
}
},
"required": [
"type",
"url"
],
"type": "object"
},
{
"description": "Stream CSV data chunks from Snowflake",
"properties": {
"catalog": {
"description": "The name of the specified database catalog to read input data from.",
"type": "string",
"x-versionadded": "v2.28"
},
"cloudStorageCredentialId": {
"description": "Either the populated value of the field or [redacted] due to permission settings",
"oneOf": [
{
"description": "The ID of the credential holding information about a user with read access to the cloud storage.",
"type": [
"string",
"null"
]
},
{
"enum": [
"[redacted]"
],
"type": "string"
}
]
},
"cloudStorageType": {
"default": "s3",
"description": "Type name for cloud storage",
"enum": [
"azure",
"gcp",
"s3"
],
"type": "string"
},
"credentialId": {
"description": "Either the populated value of the field or [redacted] due to permission settings",
"oneOf": [
{
"description": "The ID of the credential holding information about a user with read access to the Snowflake data source.",
"type": [
"string",
"null"
]
},
{
"enum": [
"[redacted]"
],
"type": "string"
}
]
},
"dataStoreId": {
"description": "Either the populated value of the field or [redacted] due to permission settings",
"oneOf": [
{
"description": "ID of the data store to connect to",
"type": "string"
},
{
"enum": [
"[redacted]"
],
"type": "string"
}
]
},
"externalStage": {
"description": "External storage",
"type": "string"
},
"query": {
"description": "A self-supplied SELECT statement of the dataset you wish to score. Helpful for supplying a more fine-grained selection of data not achievable through specification of \"table\" and/or \"schema\" parameters exclusively.If this job is executed with a job definition, then template variables are available which will be substituted for timestamps: {{ current_run_timestamp }}, {{ last_completed_run_time }}, {{ last_scheduled_run_time }}, {{ next_scheduled_run_time }}, {{ current_run_time }}",
"type": "string"
},
"schema": {
"description": "The name of the specified database schema to read input data from.",
"type": "string"
},
"table": {
"description": "The name of the specified database table to read input data from.",
"type": "string"
},
"type": {
"description": "Type name for this intake type",
"enum": [
"snowflake"
],
"type": "string"
}
},
"required": [
"dataStoreId",
"externalStage",
"type"
],
"type": "object"
},
{
"description": "Stream CSV data chunks from Azure Synapse",
"properties": {
"cloudStorageCredentialId": {
"description": "Either the populated value of the field or [redacted] due to permission settings",
"oneOf": [
{
"description": "The ID of the Azure credential holding information about a user with read access to the cloud storage.",
"type": [
"string",
"null"
]
},
{
"enum": [
"[redacted]"
],
"type": "string"
}
]
},
"credentialId": {
"description": "Either the populated value of the field or [redacted] due to permission settings",
"oneOf": [
{
"description": "The ID of the credential holding information about a user with read access to the JDBC data source.",
"type": [
"string",
"null"
]
},
{
"enum": [
"[redacted]"
],
"type": "string"
}
]
},
"dataStoreId": {
"description": "Either the populated value of the field or [redacted] due to permission settings",
"oneOf": [
{
"description": "ID of the data store to connect to",
"type": "string"
},
{
"enum": [
"[redacted]"
],
"type": "string"
}
]
},
"externalDataSource": {
"description": "External datasource name",
"type": "string"
},
"query": {
"description": "A self-supplied SELECT statement of the dataset you wish to score. Helpful for supplying a more fine-grained selection of data not achievable through specification of \"table\" and/or \"schema\" parameters exclusively.If this job is executed with a job definition, then template variables are available which will be substituted for timestamps: {{ current_run_timestamp }}, {{ last_completed_run_time }}, {{ last_scheduled_run_time }}, {{ next_scheduled_run_time }}, {{ current_run_time }}",
"type": "string"
},
"schema": {
"description": "The name of the specified database schema to read input data from.",
"type": "string"
},
"table": {
"description": "The name of the specified database table to read input data from.",
"type": "string"
},
"type": {
"description": "Type name for this intake type",
"enum": [
"synapse"
],
"type": "string"
}
},
"required": [
"dataStoreId",
"externalDataSource",
"type"
],
"type": "object"
},
{
"description": "Stream CSV data chunks from DSS dataset",
"properties": {
"datasetId": {
"description": "Either the populated value of the field or [redacted] due to permission settings",
"oneOf": [
{
"description": "The ID of the dataset",
"type": "string"
},
{
"enum": [
"[redacted]"
],
"type": "string"
}
]
},
"partition": {
"default": null,
"description": "Partition used to predict",
"enum": [
"holdout",
"validation",
"allBacktests",
null
],
"type": [
"string",
"null"
]
},
"projectId": {
"description": "The ID of the project",
"type": "string"
},
"type": {
"description": "Type name for this intake type",
"enum": [
"dss"
],
"type": "string"
}
},
"required": [
"projectId",
"type"
],
"type": "object"
},
{
"properties": {
"path": {
"description": "Path to data on host filesystem",
"type": "string"
},
"type": {
"description": "Type name for this intake type",
"enum": [
"filesystem"
],
"type": "string"
}
},
"required": [
"path",
"type"
],
"type": "object"
},
{
"description": "Stream CSV data chunks from HTTP",
"properties": {
"type": {
"description": "Type name for this intake type",
"enum": [
"http"
],
"type": "string"
},
"url": {
"description": "URL for the CSV file",
"format": "url",
"type": "string"
}
},
"required": [
"type",
"url"
],
"type": "object"
},
{
"description": "Stream CSV data chunks from JDBC",
"properties": {
"catalog": {
"description": "The name of the specified database catalog to read input data from.",
"type": "string",
"x-versionadded": "v2.22"
},
"credentialId": {
"description": "Either the populated value of the field or [redacted] due to permission settings",
"oneOf": [
{
"description": "The ID of the credential holding information about a user with read access to the JDBC data source.",
"type": [
"string",
"null"
]
},
{
"enum": [
"[redacted]"
],
"type": "string"
}
]
},
"dataStoreId": {
"description": "Either the populated value of the field or [redacted] due to permission settings",
"oneOf": [
{
"description": "ID of the data store to connect to",
"type": "string"
},
{
"enum": [
"[redacted]"
],
"type": "string"
}
]
},
"fetchSize": {
"description": "A user specified fetch size. Changing it can be used to balance throughput and memory usage. Deprecated and ignored since v2.21.",
"maximum": 1000000,
"minimum": 1,
"type": "integer"
},
"query": {
"description": "A self-supplied SELECT statement of the dataset you wish to score. Helpful for supplying a more fine-grained selection of data not achievable through specification of \"table\" and/or \"schema\" parameters exclusively.If this job is executed with a job definition, then template variables are available which will be substituted for timestamps: {{ current_run_timestamp }}, {{ last_completed_run_time }}, {{ last_scheduled_run_time }}, {{ next_scheduled_run_time }}, {{ current_run_time }}",
"type": "string"
},
"schema": {
"description": "The name of the specified database schema to read input data from.",
"type": "string"
},
"table": {
"description": "The name of the specified database table to read input data from.",
"type": "string"
},
"type": {
"description": "Type name for this intake type",
"enum": [
"jdbc"
],
"type": "string"
}
},
"required": [
"dataStoreId",
"type"
],
"type": "object"
},
{
"description": "Stream CSV data chunks from local file storage",
"properties": {
"async": {
"description": "The default behavior (async: true) will still submit the job to the queue and start processing as soon as the upload is started.Setting it to false will postpone submitting the job to the queue until all data has been uploaded.This is helpful if the user is on a bad connection and bottlednecked by the upload speed. Instead of blocking the queue this will allow others to submit to the queue until the upload has finished.",
"type": [
"boolean",
"null"
],
"x-versionadded": "v2.28"
},
"multipart": {
"description": "specify if the data will be uploaded in multiple parts instead of a single file",
"type": "boolean",
"x-versionadded": "v2.27"
},
"type": {
"description": "Type name for this intake type",
"enum": [
"local_file",
"localFile"
],
"type": "string"
}
},
"required": [
"type"
],
"type": "object"
},
{
"description": "Stream CSV data chunks from Datasphere using browser-datasphere",
"properties": {
"catalog": {
"description": "The name of the specified database catalog to read input data from.",
"type": "string"
},
"credentialId": {
"description": "Either the populated value of the field or [redacted] due to permission settings",
"oneOf": [
{
"description": "The ID of the credential holding information about a user with read access to the data source.",
"type": "string"
},
{
"enum": [
"[redacted]"
],
"type": "string"
}
]
},
"dataStoreId": {
"description": "Either the populated value of the field or [redacted] due to permission settings",
"oneOf": [
{
"description": "ID of the data store to connect to",
"type": "string"
},
{
"enum": [
"[redacted]"
],
"type": "string"
}
]
},
"schema": {
"description": "The name of the specified database schema to read input data from.",
"type": "string"
},
"table": {
"description": "The name of the specified database table to read input data from.",
"type": "string"
},
"type": {
"description": "Type name for this intake type",
"enum": [
"datasphere"
],
"type": "string"
}
},
"required": [
"credentialId",
"dataStoreId",
"schema",
"table",
"type"
],
"type": "object",
"x-versionadded": "v2.35"
}
]
},
"maxExplanations": {
"default": 0,
"description": "Number of explanations requested. Will be ordered by strength.",
"maximum": 100,
"minimum": 0,
"type": "integer"
},
"maxNgramExplanations": {
"description": "The maximum number of text ngram explanations to supply per row of the dataset. The default recommended `maxNgramExplanations` is `all` (no limit)",
"oneOf": [
{
"minimum": 0,
"type": "integer"
},
{
"enum": [
"all"
],
"type": "string"
}
],
"x-versionadded": "v2.30"
},
"modelId": {
"description": "ID of leaderboard model which is used in job for processing predictions dataset",
"type": "string",
"x-versionadded": "v2.30"
},
"modelPackageId": {
"description": "ID of model package from registry is used in job for processing predictions dataset",
"type": "string",
"x-versionadded": "v2.30"
},
"monitoringAggregation": {
"description": "Defines the aggregation policy for monitoring jobs.",
"properties": {
"retentionPolicy": {
"default": "percentage",
"description": "Monitoring jobs retention policy for aggregation.",
"enum": [
"samples",
"percentage"
],
"type": "string"
},
"retentionValue": {
"default": 0,
"description": "Amount/percentage of samples to retain.",
"type": "integer"
}
},
"type": "object"
},
"monitoringBatchPrefix": {
"description": "Name of the batch to create with this job",
"type": [
"string",
"null"
]
},
"monitoringColumns": {
"description": "Column names mapping for monitoring",
"properties": {
"actedUponColumn": {
"description": "Name of column that contains value for acted_on.",
"type": "string"
},
"actualsTimestampColumn": {
"description": "Name of column that contains actual timestamps.",
"type": "string"
},
"actualsValueColumn": {
"description": "Name of column that contains actuals value.",
"type": "string"
},
"associationIdColumn": {
"description": "Name of column that contains association Id.",
"type": "string"
},
"customMetricId": {
"description": "Id of custom metric to process values for.",
"type": "string"
},
"customMetricTimestampColumn": {
"description": "Name of column that contains custom metric values timestamps.",
"type": "string"
},
"customMetricTimestampFormat": {
"description": "Format of timestamps from customMetricTimestampColumn.",
"type": "string"
},
"customMetricValueColumn": {
"description": "Name of column that contains values for custom metric.",
"type": "string"
},
"monitoredStatusColumn": {
"description": "Column name used to mark monitored rows.",
"type": "string"
},
"predictionsColumns": {
"description": "Name of the column(s) which contain prediction values.",
"oneOf": [
{
"description": "Map containing column name(s) and class name(s) for multiclass problem.",
"items": {
"properties": {
"className": {
"description": "Class name.",
"type": "string"
},
"columnName": {
"description": "Column name that contains the prediction for a specific class.",
"type": "string"
}
},
"required": [
"className",
"columnName"
],
"type": "object"
},
"maxItems": 100,
"type": "array"
},
{
"description": "Column name that contains the prediction for regressions problem.",
"type": "string"
}
]
},
"reportDrift": {
"description": "True to report drift, False otherwise.",
"type": "boolean"
},
"reportPredictions": {
"description": "True to report prediction, False otherwise.",
"type": "boolean"
},
"uniqueRowIdentifierColumns": {
"description": "Column(s) name of unique row identifiers.",
"items": {
"type": "string"
},
"maxItems": 100,
"type": "array"
}
},
"type": "object"
},
"monitoringOutputSettings": {
"description": "Output settings for monitoring jobs",
"properties": {
"monitoredStatusColumn": {
"description": "Column name used to mark monitored rows.",
"type": "string"
},
"uniqueRowIdentifierColumns": {
"description": "Column(s) name of unique row identifiers.",
"items": {
"type": "string"
},
"maxItems": 100,
"type": "array"
}
},
"required": [
"monitoredStatusColumn",
"uniqueRowIdentifierColumns"
],
"type": "object"
},
"numConcurrent": {
"description": "Number of simultaneous requests to run against the prediction instance",
"minimum": 1,
"type": "integer"
},
"outputSettings": {
"description": "The response option configured for this job",
"oneOf": [
{
"description": "Save CSV data chunks to Azure Blob Storage",
"properties": {
"credentialId": {
"description": "Either the populated value of the field or [redacted] due to permission settings",
"oneOf": [
{
"description": "Use the specified credential to access the url",
"type": [
"string",
"null"
]
},
{
"enum": [
"[redacted]"
],
"type": "string"
}
]
},
"format": {
"default": "csv",
"description": "Type of output file format",
"enum": [
"csv",
"parquet"
],
"type": "string",
"x-versionadded": "v2.25"
},
"partitionColumns": {
"description": "For Parquet directory-scoring only. The column names of the intake data of which to partition the dataset. Columns are partitioned in the order they are given. At least one value is required if scoring to a directory (meaning the output url ends with a slash (\"/\").",
"items": {
"type": "string"
},
"maxItems": 100,
"type": "array",
"x-versionadded": "v2.26"
},
"type": {
"description": "Type name for this output type",
"enum": [
"azure"
],
"type": "string"
},
"url": {
"description": "URL for the file or directory",
"format": "url",
"type": "string"
}
},
"required": [
"type",
"url"
],
"type": "object"
},
{
"description": "Save CSV data chunks to Google Storage",
"properties": {
"credentialId": {
"description": "Either the populated value of the field or [redacted] due to permission settings",
"oneOf": [
{
"description": "Use the specified credential to access the url",
"type": [
"string",
"null"
]
},
{
"enum": [
"[redacted]"
],
"type": "string"
}
]
},
"format": {
"default": "csv",
"description": "Type of input file format",
"enum": [
"csv",
"parquet"
],
"type": "string",
"x-versionadded": "v2.25"
},
"partitionColumns": {
"description": "For Parquet directory-scoring only. The column names of the intake data of which to partition the dataset. Columns are partitioned in the order they are given. At least one value is required if scoring to a directory (meaning the output url ends with a slash (\"/\").",
"items": {
"type": "string"
},
"maxItems": 100,
"type": "array",
"x-versionadded": "v2.26"
},
"type": {
"description": "Type name for this output type",
"enum": [
"gcp"
],
"type": "string"
},
"url": {
"description": "URL for the CSV file",
"format": "url",
"type": "string"
}
},
"required": [
"type",
"url"
],
"type": "object"
},
{
"description": "Save CSV data chunks to Google BigQuery in bulk",
"properties": {
"bucket": {
"description": "The name of gcs bucket for data loading",
"type": "string"
},
"credentialId": {
"description": "Either the populated value of the field or [redacted] due to permission settings",
"oneOf": [
{
"description": "The ID of the GCP credentials",
"type": "string"
},
{
"enum": [
"[redacted]"
],
"type": "string"
}
]
},
"dataset": {
"description": "The name of the specified big query dataset to write data back",
"type": "string"
},
"table": {
"description": "The name of the specified big query table to write data back",
"type": "string"
},
"type": {
"description": "Type name for this output type",
"enum": [
"bigquery"
],
"type": "string"
}
},
"required": [
"bucket",
"credentialId",
"dataset",
"table",
"type"
],
"type": "object"
},
{
"description": "Saves CSV data chunks to Amazon Cloud Storage S3",
"properties": {
"credentialId": {
"description": "Either the populated value of the field or [redacted] due to permission settings",
"oneOf": [
{
"description": "Use the specified credential to access the url",
"type": [
"string",
"null"
]
},
{
"enum": [
"[redacted]"
],
"type": "string"
}
]
},
"endpointUrl": {
"description": "Endpoint URL for the S3 connection (omit to use the default)",
"format": "url",
"type": "string",
"x-versionadded": "v2.29"
},
"format": {
"default": "csv",
"description": "Type of output file format",
"enum": [
"csv",
"parquet"
],
"type": "string",
"x-versionadded": "v2.25"
},
"partitionColumns": {
"description": "For Parquet directory-scoring only. The column names of the intake data of which to partition the dataset. Columns are partitioned in the order they are given. At least one value is required if scoring to a directory (meaning the output url ends with a slash (\"/\").",
"items": {
"type": "string"
},
"maxItems": 100,
"type": "array",
"x-versionadded": "v2.26"
},
"serverSideEncryption": {
"description": "Configure Server-Side Encryption for S3 output",
"properties": {
"algorithm": {
"description": "The server-side encryption algorithm used when storing this object in Amazon S3 (for example, AES256, aws:kms).",
"type": "string"
},
"customerAlgorithm": {
"description": "Specifies the algorithm to use to when encrypting the object (for example, AES256).",
"type": "string"
},
"customerKey": {
"description": "Specifies the customer-provided encryption key for Amazon S3 to use in encrypting data. This value is used to store the object and then it is discarded; Amazon S3 does not store the encryption key. The key must be appropriate for use with the algorithm specified in customerAlgorithm. The key must be sent as an base64 encoded string.",
"type": "string"
},
"kmsEncryptionContext": {
"description": "Specifies the Amazon Web Services KMS Encryption Context to use for object encryption. The value of this header is a base64-encoded UTF-8 string holding JSON with the encryption context key-value pairs.",
"type": "string"
},
"kmsKeyId": {
"description": "Specifies the ID of the symmetric customer managed key to use for object encryption.",
"type": "string"
}
},
"type": "object"
},
"type": {
"description": "Type name for this output type",
"enum": [
"s3"
],
"type": "string"
},
"url": {
"description": "URL for the CSV file",
"format": "url",
"type": "string"
}
},
"required": [
"type",
"url"
],
"type": "object"
},
{
"description": "Save CSV data chunks to Snowflake in bulk",
"properties": {
"catalog": {
"description": "The name of the specified database catalog to write output data to.",
"type": "string",
"x-versionadded": "v2.28"
},
"cloudStorageCredentialId": {
"description": "Either the populated value of the field or [redacted] due to permission settings",
"oneOf": [
{
"description": "The ID of the credential holding information about a user with write access to the cloud storage.",
"type": [
"string",
"null"
]
},
{
"enum": [
"[redacted]"
],
"type": "string"
}
]
},
"cloudStorageType": {
"default": "s3",
"description": "Type name for cloud storage",
"enum": [
"azure",
"gcp",
"s3"
],
"type": "string"
},
"createTableIfNotExists": {
"default": false,
"description": "Attempt to create the table first if no existing one is detected, before writing data with the strategy defined in the `statementType` parameter.",
"type": "boolean",
"x-versionadded": "v2.25"
},
"credentialId": {
"description": "Either the populated value of the field or [redacted] due to permission settings",
"oneOf": [
{
"description": "The ID of the credential holding information about a user with write access to the Snowflake data source.",
"type": [
"string",
"null"
]
},
{
"enum": [
"[redacted]"
],
"type": "string"
}
]
},
"dataStoreId": {
"description": "Either the populated value of the field or [redacted] due to permission settings",
"oneOf": [
{
"description": "ID of the data store to connect to",
"type": "string"
},
{
"enum": [
"[redacted]"
],
"type": "string"
}
]
},
"externalStage": {
"description": "External storage",
"type": "string"
},
"schema": {
"description": "The name of the specified database schema to write results to.",
"type": "string"
},
"statementType": {
"description": "The statement type to use when writing the results.",
"enum": [
"insert",
"create_table",
"createTable"
],
"type": "string"
},
"table": {
"description": "The name of the specified database table to write results to.",
"type": "string"
},
"type": {
"description": "Type name for this output type",
"enum": [
"snowflake"
],
"type": "string"
}
},
"required": [
"dataStoreId",
"externalStage",
"statementType",
"table",
"type"
],
"type": "object"
},
{
"description": "Save CSV data chunks to Azure Synapse in bulk",
"properties": {
"cloudStorageCredentialId": {
"description": "Either the populated value of the field or [redacted] due to permission settings",
"oneOf": [
{
"description": "The ID of the credential holding information about a user with write access to the cloud storage.",
"type": [
"string",
"null"
]
},
{
"enum": [
"[redacted]"
],
"type": "string"
}
]
},
"createTableIfNotExists": {
"default": false,
"description": "Attempt to create the table first if no existing one is detected, before writing data with the strategy defined in the `statementType` parameter.",
"type": "boolean",
"x-versionadded": "v2.25"
},
"credentialId": {
"description": "Either the populated value of the field or [redacted] due to permission settings",
"oneOf": [
{
"description": "The ID of the credential holding information about a user with write access to the JDBC data source.",
"type": [
"string",
"null"
]
},
{
"enum": [
"[redacted]"
],
"type": "string"
}
]
},
"dataStoreId": {
"description": "Either the populated value of the field or [redacted] due to permission settings",
"oneOf": [
{
"description": "ID of the data store to connect to",
"type": "string"
},
{
"enum": [
"[redacted]"
],
"type": "string"
}
]
},
"externalDataSource": {
"description": "External data source name",
"type": "string"
},
"schema": {
"description": "The name of the specified database schema to write results to.",
"type": "string"
},
"statementType": {
"description": "The statement type to use when writing the results.",
"enum": [
"insert",
"create_table",
"createTable"
],
"type": "string"
},
"table": {
"description": "The name of the specified database table to write results to.",
"type": "string"
},
"type": {
"description": "Type name for this output type",
"enum": [
"synapse"
],
"type": "string"
}
},
"required": [
"dataStoreId",
"externalDataSource",
"statementType",
"table",
"type"
],
"type": "object"
},
{
"properties": {
"path": {
"description": "Path to results on host filesystem",
"type": "string"
},
"type": {
"description": "Type name for this output type",
"enum": [
"filesystem"
],
"type": "string"
}
},
"required": [
"path",
"type"
],
"type": "object"
},
{
"description": "Save CSV data chunks to HTTP data endpoint",
"properties": {
"headers": {
"description": "Extra headers to send with the request",
"type": "object"
},
"method": {
"description": "Method to use when saving the CSV file",
"enum": [
"POST",
"PUT"
],
"type": "string"
},
"type": {
"description": "Type name for this output type",
"enum": [
"http"
],
"type": "string"
},
"url": {
"description": "URL for the CSV file",
"format": "url",
"type": "string"
}
},
"required": [
"method",
"type",
"url"
],
"type": "object"
},
{
"description": "Save CSV data chunks via JDBC",
"properties": {
"catalog": {
"description": "The name of the specified database catalog to write output data to.",
"type": "string",
"x-versionadded": "v2.22"
},
"commitInterval": {
"default": 600,
"description": "Defines a time interval in seconds between each commit is done to the JDBC source. If set to 0, the batch prediction operation will write the entire job before committing.",
"maximum": 86400,
"minimum": 0,
"type": "integer",
"x-versionadded": "v2.21"
},
"createTableIfNotExists": {
"default": false,
"description": "Attempt to create the table first if no existing one is detected, before writing data with the strategy defined in the `statementType` parameter.",
"type": "boolean",
"x-versionadded": "v2.24"
},
"credentialId": {
"description": "Either the populated value of the field or [redacted] due to permission settings",
"oneOf": [
{
"description": "The ID of the credential holding information about a user with write access to the JDBC data source.",
"type": [
"string",
"null"
]
},
{
"enum": [
"[redacted]"
],
"type": "string"
}
]
},
"dataStoreId": {
"description": "Either the populated value of the field or [redacted] due to permission settings",
"oneOf": [
{
"description": "ID of the data store to connect to",
"type": "string"
},
{
"enum": [
"[redacted]"
],
"type": "string"
}
]
},
"schema": {
"description": "The name of the specified database schema to write the results to.",
"type": "string"
},
"statementType": {
"description": "The statement type to use when writing the results. Deprecation Warning: Use of `create_table` is now discouraged. Use one of the other possibilities along with the parameter `createTableIfNotExists` set to `true`.",
"enum": [
"createTable",
"create_table",
"insert",
"insertUpdate",
"insert_update",
"update"
],
"type": "string"
},
"table": {
"description": "The name of the specified database table to write the results to.If this job is executed with a job definition, then template variables are available which will be substituted for timestamps: {{ current_run_timestamp }}, {{ last_completed_run_time }}, {{ last_scheduled_run_time }}, {{ next_scheduled_run_time }}, {{ current_run_time }}",
"type": "string"
},
"type": {
"description": "Type name for this intake type",
"enum": [
"jdbc"
],
"type": "string"
},
"updateColumns": {
"description": "The column names to be updated if statementType is set to either update or upsert.",
"items": {
"type": "string"
},
"maxItems": 100,
"type": "array"
},
"whereColumns": {
"description": "The column names to be used in the where clause if statementType is set to update or upsert.",
"items": {
"type": "string"
},
"maxItems": 100,
"type": "array"
}
},
"required": [
"dataStoreId",
"statementType",
"table",
"type"
],
"type": "object"
},
{
"description": "Save CSV data chunks to local file storage",
"properties": {
"type": {
"description": "Type name for this output type",
"enum": [
"local_file",
"localFile"
],
"type": "string"
}
},
"required": [
"type"
],
"type": "object"
},
{
"description": "Saves CSV data chunks to Datasphere using browser-datasphere",
"properties": {
"catalog": {
"description": "The name of the specified database catalog to read input data from.",
"type": "string"
},
"credentialId": {
"description": "Either the populated value of the field or [redacted] due to permission settings",
"oneOf": [
{
"description": "The ID of the credential holding information about a user with write access to the data destination.",
"type": "string"
},
{
"enum": [
"[redacted]"
],
"type": "string"
}
]
},
"dataStoreId": {
"description": "Either the populated value of the field or [redacted] due to permission settings",
"oneOf": [
{
"description": "The ID of the data store to connect to",
"type": "string"
},
{
"enum": [
"[redacted]"
],
"type": "string"
}
]
},
"schema": {
"description": "The name of the specified database schema to read input data from.",
"type": "string"
},
"table": {
"description": "The name of the specified database table to read input data from.",
"type": "string"
},
"type": {
"description": "The type name for this output type",
"enum": [
"datasphere"
],
"type": "string"
}
},
"required": [
"credentialId",
"dataStoreId",
"schema",
"table",
"type"
],
"type": "object",
"x-versionadded": "v2.35"
}
]
},
"passthroughColumns": {
"description": "Pass through columns from the original dataset",
"items": {
"description": "A column name from the original dataset to pass through to the resulting predictions",
"maxLength": 50,
"minLength": 1,
"type": "string"
},
"maxItems": 100,
"type": "array"
},
"passthroughColumnsSet": {
"description": "Pass through all columns from the original dataset",
"enum": [
"all"
],
"type": "string"
},
"pinnedModelId": {
"description": "Specify a model ID used for scoring",
"type": "string"
},
"predictionInstance": {
"description": "Override the default prediction instance from the deployment when scoring this job.",
"properties": {
"apiKey": {
"description": "By default, prediction requests will use the API key of the user that created the job. This allows you to make requests on behalf of other users.",
"type": "string"
},
"datarobotKey": {
"description": "If running a job against a prediction instance in the Managed AI Cloud, you must provide the organization level DataRobot-Key.",
"type": "string"
},
"hostName": {
"description": "Override the default host name of the deployment with this.",
"type": "string"
},
"sslEnabled": {
"default": true,
"description": "Use SSL (HTTPS) when communicating with the overriden prediction server.",
"type": "boolean"
}
},
"required": [
"hostName",
"sslEnabled"
],
"type": "object"
},
"predictionWarningEnabled": {
"description": "Enable prediction warnings.",
"type": [
"boolean",
"null"
]
},
"redactedFields": {
"description": "A list of qualified field names from intake- and/or outputSettings that was redacted due to permissions and sharing settings. For example: intakeSettings.dataStoreId",
"items": {
"description": "Field names that are potentially redacted",
"type": "string"
},
"type": "array",
"x-versionadded": "v2.30"
},
"skipDriftTracking": {
"default": false,
"description": "Skip drift tracking for this job.",
"type": "boolean"
},
"thresholdHigh": {
"description": "Compute explanations for predictions above this threshold",
"type": "number"
},
"thresholdLow": {
"description": "Compute explanations for predictions below this threshold",
"type": "number"
},
"timeseriesSettings": {
"description": "Time Series settings included of this job is a Time Series job.",
"oneOf": [
{
"properties": {
"forecastPoint": {
"description": "Used for forecast predictions in order to override the inferred forecast point from the dataset.",
"format": "date-time",
"type": "string"
},
"relaxKnownInAdvanceFeaturesCheck": {
"default": false,
"description": "If activated, missing values in the known in advance features are allowed in the forecast window at prediction time. If omitted or false, missing values are not allowed.",
"type": "boolean"
},
"type": {
"description": "Forecast mode makes predictions using forecastPoint or rows in the dataset without target.",
"enum": [
"forecast"
],
"type": "string"
}
},
"required": [
"type"
],
"type": "object"
},
{
"properties": {
"predictionsEndDate": {
"description": "Used for historical predictions in order to override date to which predictions should be calculated. By default value will be inferred automatically from the dataset.",
"format": "date-time",
"type": "string"
},
"predictionsStartDate": {
"description": "Used for historical predictions in order to override date from which predictions should be calculated. By default value will be inferred automatically from the dataset.",
"format": "date-time",
"type": "string"
},
"relaxKnownInAdvanceFeaturesCheck": {
"default": false,
"description": "If activated, missing values in the known in advance features are allowed in the forecast window at prediction time. If omitted or false, missing values are not allowed.",
"type": "boolean"
},
"type": {
"description": "Historical mode enables bulk predictions which calculates predictions for all possible forecast points and forecast distances in the dataset within the predictionsStartDate/predictionsEndDate range.",
"enum": [
"historical"
],
"type": "string"
}
},
"required": [
"type"
],
"type": "object"
}
],
"x-versionadded": "v2.30"
}
},
"required": [
"abortOnError",
"csvSettings",
"disableRowLevelErrorHandling",
"includePredictionStatus",
"includeProbabilities",
"includeProbabilitiesClasses",
"intakeSettings",
"maxExplanations",
"redactedFields",
"skipDriftTracking"
],
"type": "object",
"x-versionadded": "v2.35"
},
"links": {
"description": "Links useful for this job",
"properties": {
"csvUpload": {
"description": "The URL used to upload the dataset for this job. Only available for localFile intake.",
"format": "url",
"type": "string"
},
"download": {
"description": "The URL used to download the results from this job. Only available for localFile outputs. Will be null if the download is not yet available.",
"type": [
"string",
"null"
]
},
"self": {
"description": "The URL used access this job.",
"format": "url",
"type": "string"
}
},
"required": [
"self"
],
"type": "object",
"x-versionadded": "v2.35"
},
"logs": {
"description": "The job log.",
"items": {
"description": "A log line from the job log.",
"type": "string"
},
"type": "array"
},
"monitoringBatchId": {
"description": "Id of the monitoring batch created by this job. Only present if the job runs on a deployment with batch monitoring enabled.",
"type": [
"string",
"null"
],
"x-versionadded": "v2.33"
},
"percentageCompleted": {
"description": "Indicates job progress which is based on number of already processed rows in dataset",
"maximum": 100,
"minimum": 0,
"type": "number"
},
"queuePosition": {
"description": "To ensure a dedicated prediction instance is not overloaded, only one job will be run against it at a time. This is the number of jobs that are awaiting processing before this job start running. May not be available in all environments.",
"minimum": 0,
"type": [
"integer",
"null"
],
"x-versionadded": "v2.30"
},
"queued": {
"description": "The job has been put on the queue for execution.",
"type": "boolean",
"x-versionadded": "v2.30"
},
"resultsDeleted": {
"description": "Indicates if the job was subject to garbage collection and had its artifacts deleted (output files, if any, and scoring data on local storage)",
"type": "boolean",
"x-versionadded": "v2.30"
},
"scoredRows": {
"description": "Number of rows that have been used in prediction computation",
"minimum": 0,
"type": "integer"
},
"skippedRows": {
"description": "Number of rows that have been skipped during scoring. May contain non-zero value only in time-series predictions case if provided dataset contains more than required historical rows.",
"minimum": 0,
"type": "integer",
"x-versionadded": "v2.30"
},
"source": {
"description": "Source from which batch job was started",
"type": "string",
"x-versionadded": "v2.30"
},
"status": {
"description": "The current job status",
"enum": [
"INITIALIZING",
"RUNNING",
"COMPLETED",
"ABORTED",
"FAILED"
],
"type": "string"
},
"statusDetails": {
"description": "Explanation for current status",
"type": "string"
}
},
"required": [
"created",
"createdBy",
"elapsedTimeSec",
"failedRows",
"id",
"jobIntakeSize",
"jobOutputSize",
"jobSpec",
"links",
"logs",
"monitoringBatchId",
"percentageCompleted",
"queued",
"scoredRows",
"skippedRows",
"status",
"statusDetails"
],
"type": "object",
"x-versionadded": "v2.35"
},
"maxItems": 10000,
"type": "array"
},
"next": {
"description": "URL pointing to the next page (if null, there is no next page).",
"format": "uri",
"type": [
"string",
"null"
]
},
"previous": {
"description": "URL pointing to the previous page (if null, there is no previous page).",
"format": "uri",
"type": [
"string",
"null"
]
},
"totalCount": {
"description": "The total number of items across all pages.",
"type": "integer"
}
},
"required": [
"data",
"next",
"previous",
"totalCount"
],
"type": "object",
"x-versionadded": "v2.35"
}
Properties
Name |
Type |
Required |
Restrictions |
Description |
count |
integer |
false |
|
Number of items returned on this page. |
data |
[BatchJobResponse] |
true |
maxItems: 10000
|
An array of jobs |
next |
string,null(uri) |
true |
|
URL pointing to the next page (if null, there is no next page). |
previous |
string,null(uri) |
true |
|
URL pointing to the previous page (if null, there is no previous page). |
totalCount |
integer |
true |
|
The total number of items across all pages. |
BatchJobPredictionInstance
{
"description": "Override the default prediction instance from the deployment when scoring this job.",
"properties": {
"apiKey": {
"description": "By default, prediction requests will use the API key of the user that created the job. This allows you to make requests on behalf of other users.",
"type": "string"
},
"datarobotKey": {
"description": "If running a job against a prediction instance in the Managed AI Cloud, you must provide the organization level DataRobot-Key.",
"type": "string"
},
"hostName": {
"description": "Override the default host name of the deployment with this.",
"type": "string"
},
"sslEnabled": {
"default": true,
"description": "Use SSL (HTTPS) when communicating with the overriden prediction server.",
"type": "boolean"
}
},
"required": [
"hostName",
"sslEnabled"
],
"type": "object"
}
Override the default prediction instance from the deployment when scoring this job.
Properties
Name |
Type |
Required |
Restrictions |
Description |
apiKey |
string |
false |
|
By default, prediction requests will use the API key of the user that created the job. This allows you to make requests on behalf of other users. |
datarobotKey |
string |
false |
|
If running a job against a prediction instance in the Managed AI Cloud, you must provide the organization level DataRobot-Key. |
hostName |
string |
true |
|
Override the default host name of the deployment with this. |
sslEnabled |
boolean |
true |
|
Use SSL (HTTPS) when communicating with the overriden prediction server. |
BatchJobRemapping
{
"properties": {
"inputName": {
"description": "Rename column with this name",
"type": "string"
},
"outputName": {
"description": "Rename column to this name (leave as null to remove from the output)",
"type": [
"string",
"null"
]
}
},
"required": [
"inputName",
"outputName"
],
"type": "object"
}
Properties
Name |
Type |
Required |
Restrictions |
Description |
inputName |
string |
true |
|
Rename column with this name |
outputName |
string,null |
true |
|
Rename column to this name (leave as null to remove from the output) |
BatchJobResponse
{
"properties": {
"batchMonitoringJobDefinition": {
"description": "The Batch Prediction Job Definition linking to this job, if any.",
"properties": {
"createdBy": {
"description": "The ID of creator of this job definition",
"type": "string"
},
"id": {
"description": "The ID of the Batch Prediction job definition",
"type": "string"
},
"name": {
"description": "A human-readable name for the definition, must be unique across organisations",
"type": "string"
}
},
"required": [
"createdBy",
"id",
"name"
],
"type": "object",
"x-versionadded": "v2.35"
},
"batchPredictionJobDefinition": {
"description": "The Batch Prediction Job Definition linking to this job, if any.",
"properties": {
"createdBy": {
"description": "The ID of creator of this job definition",
"type": "string"
},
"id": {
"description": "The ID of the Batch Prediction job definition",
"type": "string"
},
"name": {
"description": "A human-readable name for the definition, must be unique across organisations",
"type": "string"
}
},
"required": [
"createdBy",
"id",
"name"
],
"type": "object",
"x-versionadded": "v2.35"
},
"created": {
"description": "When was this job created",
"format": "date-time",
"type": "string",
"x-versionadded": "v2.30"
},
"createdBy": {
"description": "Who created this job",
"properties": {
"fullName": {
"description": "The full name of the user who created this job (if defined by the user)",
"type": [
"string",
"null"
]
},
"userId": {
"description": "The User ID of the user who created this job",
"type": "string"
},
"username": {
"description": "The username (e-mail address) of the user who created this job",
"type": "string"
}
},
"required": [
"fullName",
"userId",
"username"
],
"type": "object"
},
"elapsedTimeSec": {
"description": "Number of seconds the job has been processing for",
"minimum": 0,
"type": "integer"
},
"failedRows": {
"description": "Number of rows that have failed scoring",
"minimum": 0,
"type": "integer"
},
"hidden": {
"description": "When was this job was hidden last, blank if visible",
"format": "date-time",
"type": "string",
"x-versionadded": "v2.30"
},
"id": {
"description": "The ID of the Batch job",
"type": "string",
"x-versionadded": "v2.30"
},
"intakeDatasetDisplayName": {
"description": "If applicable (e.g. for AI catalog), will contain the dataset name used for the intake dataset.",
"type": [
"string",
"null"
],
"x-versionadded": "v2.30"
},
"jobIntakeSize": {
"description": "Number of bytes in the intake dataset for this job",
"minimum": 0,
"type": [
"integer",
"null"
]
},
"jobOutputSize": {
"description": "Number of bytes in the output dataset for this job",
"minimum": 0,
"type": [
"integer",
"null"
]
},
"jobSpec": {
"description": "The job configuration used to create this job",
"properties": {
"abortOnError": {
"default": true,
"description": "Should this job abort if too many errors are encountered",
"type": "boolean"
},
"batchJobType": {
"description": "Batch job type.",
"enum": [
"monitoring",
"prediction"
],
"type": "string",
"x-versionadded": "v2.30"
},
"chunkSize": {
"default": "auto",
"description": "Which strategy should be used to determine the chunk size. Can be either a named strategy or a fixed size in bytes.",
"oneOf": [
{
"enum": [
"auto",
"fixed",
"dynamic"
],
"type": "string"
},
{
"maximum": 41943040,
"minimum": 20,
"type": "integer"
}
]
},
"columnNamesRemapping": {
"description": "Remap (rename or remove columns from) the output from this job",
"oneOf": [
{
"description": "Provide a dictionary with key/value pairs to remap (deprecated)",
"type": "object"
},
{
"description": "Provide a list of items to remap",
"items": {
"properties": {
"inputName": {
"description": "Rename column with this name",
"type": "string"
},
"outputName": {
"description": "Rename column to this name (leave as null to remove from the output)",
"type": [
"string",
"null"
]
}
},
"required": [
"inputName",
"outputName"
],
"type": "object"
},
"maxItems": 1000,
"type": "array"
}
]
},
"csvSettings": {
"description": "The CSV settings used for this job",
"properties": {
"delimiter": {
"default": ",",
"description": "CSV fields are delimited by this character. Use the string \"tab\" to denote TSV (TAB separated values).",
"oneOf": [
{
"enum": [
"tab"
],
"type": "string"
},
{
"maxLength": 1,
"minLength": 1,
"type": "string"
}
]
},
"encoding": {
"default": "utf-8",
"description": "The encoding to be used for intake and output. For example (but not limited to): \"shift_jis\", \"latin_1\" or \"mskanji\".",
"type": "string"
},
"quotechar": {
"default": "\"",
"description": "Fields containing the delimiter or newlines must be quoted using this character.",
"maxLength": 1,
"minLength": 1,
"type": "string"
}
},
"required": [
"delimiter",
"encoding",
"quotechar"
],
"type": "object"
},
"deploymentId": {
"description": "ID of deployment which is used in job for processing predictions dataset",
"type": "string"
},
"disableRowLevelErrorHandling": {
"default": false,
"description": "Skip row by row error handling",
"type": "boolean"
},
"explanationAlgorithm": {
"description": "Which algorithm will be used to calculate prediction explanations",
"enum": [
"shap",
"xemp"
],
"type": "string"
},
"explanationClassNames": {
"description": "List of class names that will be explained for each row for multiclass. Mutually exclusive with explanationNumTopClasses. If neither specified - we assume explanationNumTopClasses=1",
"items": {
"description": "Class name to explain",
"type": "string"
},
"maxItems": 10,
"minItems": 1,
"type": "array",
"x-versionadded": "v2.30"
},
"explanationNumTopClasses": {
"description": "Number of top predicted classes for each row that will be explained for multiclass. Mutually exclusive with explanationClassNames. If neither specified - we assume explanationNumTopClasses=1",
"maximum": 10,
"minimum": 1,
"type": "integer",
"x-versionadded": "v2.30"
},
"includePredictionStatus": {
"default": false,
"description": "Include prediction status column in the output",
"type": "boolean"
},
"includeProbabilities": {
"default": true,
"description": "Include probabilities for all classes",
"type": "boolean"
},
"includeProbabilitiesClasses": {
"default": [],
"description": "Include only probabilities for these specific class names.",
"items": {
"description": "Include probability for this class name",
"type": "string"
},
"maxItems": 100,
"type": "array"
},
"intakeSettings": {
"default": {
"type": "localFile"
},
"description": "The response option configured for this job",
"oneOf": [
{
"description": "Stream CSV data chunks from Azure",
"properties": {
"credentialId": {
"description": "Either the populated value of the field or [redacted] due to permission settings",
"oneOf": [
{
"description": "Use the specified credential to access the url",
"type": [
"string",
"null"
]
},
{
"enum": [
"[redacted]"
],
"type": "string"
}
]
},
"format": {
"default": "csv",
"description": "Type of input file format",
"enum": [
"csv",
"parquet"
],
"type": "string",
"x-versionadded": "v2.25"
},
"type": {
"description": "Type name for this intake type",
"enum": [
"azure"
],
"type": "string"
},
"url": {
"description": "URL for the CSV file",
"format": "url",
"type": "string"
}
},
"required": [
"type",
"url"
],
"type": "object"
},
{
"description": "Stream CSV data chunks from data stage storage",
"properties": {
"dataStageId": {
"description": "The ID of the data stage",
"type": "string"
},
"type": {
"description": "Type name for this intake type",
"enum": [
"dataStage"
],
"type": "string"
}
},
"required": [
"dataStageId",
"type"
],
"type": "object"
},
{
"description": "Stream CSV data chunks from AI catalog dataset",
"properties": {
"datasetId": {
"description": "Either the populated value of the field or [redacted] due to permission settings",
"oneOf": [
{
"description": "The ID of the AI catalog dataset",
"type": "string"
},
{
"enum": [
"[redacted]"
],
"type": "string"
}
]
},
"datasetVersionId": {
"description": "The ID of the AI catalog dataset version",
"type": "string"
},
"type": {
"description": "Type name for this intake type",
"enum": [
"dataset"
],
"type": "string"
}
},
"required": [
"datasetId",
"type"
],
"type": "object"
},
{
"description": "Stream CSV data chunks from Google Storage",
"properties": {
"credentialId": {
"description": "Either the populated value of the field or [redacted] due to permission settings",
"oneOf": [
{
"description": "Use the specified credential to access the url",
"type": [
"string",
"null"
]
},
{
"enum": [
"[redacted]"
],
"type": "string"
}
]
},
"format": {
"default": "csv",
"description": "Type of input file format",
"enum": [
"csv",
"parquet"
],
"type": "string",
"x-versionadded": "v2.25"
},
"type": {
"description": "Type name for this intake type",
"enum": [
"gcp"
],
"type": "string"
},
"url": {
"description": "URL for the CSV file",
"format": "url",
"type": "string"
}
},
"required": [
"type",
"url"
],
"type": "object"
},
{
"description": "Stream CSV data chunks from Big Query using GCS",
"properties": {
"bucket": {
"description": "The name of gcs bucket for data export",
"type": "string"
},
"credentialId": {
"description": "Either the populated value of the field or [redacted] due to permission settings",
"oneOf": [
{
"description": "The ID of the GCP credentials",
"type": "string"
},
{
"enum": [
"[redacted]"
],
"type": "string"
}
]
},
"dataset": {
"description": "The name of the specified big query dataset to read input data from",
"type": "string"
},
"table": {
"description": "The name of the specified big query table to read input data from",
"type": "string"
},
"type": {
"description": "Type name for this intake type",
"enum": [
"bigquery"
],
"type": "string"
}
},
"required": [
"bucket",
"credentialId",
"dataset",
"table",
"type"
],
"type": "object"
},
{
"description": "Stream CSV data chunks from Amazon Cloud Storage S3",
"properties": {
"credentialId": {
"description": "Either the populated value of the field or [redacted] due to permission settings",
"oneOf": [
{
"description": "Use the specified credential to access the url",
"type": [
"string",
"null"
]
},
{
"enum": [
"[redacted]"
],
"type": "string"
}
]
},
"endpointUrl": {
"description": "Endpoint URL for the S3 connection (omit to use the default)",
"format": "url",
"type": "string",
"x-versionadded": "v2.29"
},
"format": {
"default": "csv",
"description": "Type of input file format",
"enum": [
"csv",
"parquet"
],
"type": "string",
"x-versionadded": "v2.25"
},
"type": {
"description": "Type name for this intake type",
"enum": [
"s3"
],
"type": "string"
},
"url": {
"description": "URL for the CSV file",
"format": "url",
"type": "string"
}
},
"required": [
"type",
"url"
],
"type": "object"
},
{
"description": "Stream CSV data chunks from Snowflake",
"properties": {
"catalog": {
"description": "The name of the specified database catalog to read input data from.",
"type": "string",
"x-versionadded": "v2.28"
},
"cloudStorageCredentialId": {
"description": "Either the populated value of the field or [redacted] due to permission settings",
"oneOf": [
{
"description": "The ID of the credential holding information about a user with read access to the cloud storage.",
"type": [
"string",
"null"
]
},
{
"enum": [
"[redacted]"
],
"type": "string"
}
]
},
"cloudStorageType": {
"default": "s3",
"description": "Type name for cloud storage",
"enum": [
"azure",
"gcp",
"s3"
],
"type": "string"
},
"credentialId": {
"description": "Either the populated value of the field or [redacted] due to permission settings",
"oneOf": [
{
"description": "The ID of the credential holding information about a user with read access to the Snowflake data source.",
"type": [
"string",
"null"
]
},
{
"enum": [
"[redacted]"
],
"type": "string"
}
]
},
"dataStoreId": {
"description": "Either the populated value of the field or [redacted] due to permission settings",
"oneOf": [
{
"description": "ID of the data store to connect to",
"type": "string"
},
{
"enum": [
"[redacted]"
],
"type": "string"
}
]
},
"externalStage": {
"description": "External storage",
"type": "string"
},
"query": {
"description": "A self-supplied SELECT statement of the dataset you wish to score. Helpful for supplying a more fine-grained selection of data not achievable through specification of \"table\" and/or \"schema\" parameters exclusively.If this job is executed with a job definition, then template variables are available which will be substituted for timestamps: {{ current_run_timestamp }}, {{ last_completed_run_time }}, {{ last_scheduled_run_time }}, {{ next_scheduled_run_time }}, {{ current_run_time }}",
"type": "string"
},
"schema": {
"description": "The name of the specified database schema to read input data from.",
"type": "string"
},
"table": {
"description": "The name of the specified database table to read input data from.",
"type": "string"
},
"type": {
"description": "Type name for this intake type",
"enum": [
"snowflake"
],
"type": "string"
}
},
"required": [
"dataStoreId",
"externalStage",
"type"
],
"type": "object"
},
{
"description": "Stream CSV data chunks from Azure Synapse",
"properties": {
"cloudStorageCredentialId": {
"description": "Either the populated value of the field or [redacted] due to permission settings",
"oneOf": [
{
"description": "The ID of the Azure credential holding information about a user with read access to the cloud storage.",
"type": [
"string",
"null"
]
},
{
"enum": [
"[redacted]"
],
"type": "string"
}
]
},
"credentialId": {
"description": "Either the populated value of the field or [redacted] due to permission settings",
"oneOf": [
{
"description": "The ID of the credential holding information about a user with read access to the JDBC data source.",
"type": [
"string",
"null"
]
},
{
"enum": [
"[redacted]"
],
"type": "string"
}
]
},
"dataStoreId": {
"description": "Either the populated value of the field or [redacted] due to permission settings",
"oneOf": [
{
"description": "ID of the data store to connect to",
"type": "string"
},
{
"enum": [
"[redacted]"
],
"type": "string"
}
]
},
"externalDataSource": {
"description": "External datasource name",
"type": "string"
},
"query": {
"description": "A self-supplied SELECT statement of the dataset you wish to score. Helpful for supplying a more fine-grained selection of data not achievable through specification of \"table\" and/or \"schema\" parameters exclusively.If this job is executed with a job definition, then template variables are available which will be substituted for timestamps: {{ current_run_timestamp }}, {{ last_completed_run_time }}, {{ last_scheduled_run_time }}, {{ next_scheduled_run_time }}, {{ current_run_time }}",
"type": "string"
},
"schema": {
"description": "The name of the specified database schema to read input data from.",
"type": "string"
},
"table": {
"description": "The name of the specified database table to read input data from.",
"type": "string"
},
"type": {
"description": "Type name for this intake type",
"enum": [
"synapse"
],
"type": "string"
}
},
"required": [
"dataStoreId",
"externalDataSource",
"type"
],
"type": "object"
},
{
"description": "Stream CSV data chunks from DSS dataset",
"properties": {
"datasetId": {
"description": "Either the populated value of the field or [redacted] due to permission settings",
"oneOf": [
{
"description": "The ID of the dataset",
"type": "string"
},
{
"enum": [
"[redacted]"
],
"type": "string"
}
]
},
"partition": {
"default": null,
"description": "Partition used to predict",
"enum": [
"holdout",
"validation",
"allBacktests",
null
],
"type": [
"string",
"null"
]
},
"projectId": {
"description": "The ID of the project",
"type": "string"
},
"type": {
"description": "Type name for this intake type",
"enum": [
"dss"
],
"type": "string"
}
},
"required": [
"projectId",
"type"
],
"type": "object"
},
{
"properties": {
"path": {
"description": "Path to data on host filesystem",
"type": "string"
},
"type": {
"description": "Type name for this intake type",
"enum": [
"filesystem"
],
"type": "string"
}
},
"required": [
"path",
"type"
],
"type": "object"
},
{
"description": "Stream CSV data chunks from HTTP",
"properties": {
"type": {
"description": "Type name for this intake type",
"enum": [
"http"
],
"type": "string"
},
"url": {
"description": "URL for the CSV file",
"format": "url",
"type": "string"
}
},
"required": [
"type",
"url"
],
"type": "object"
},
{
"description": "Stream CSV data chunks from JDBC",
"properties": {
"catalog": {
"description": "The name of the specified database catalog to read input data from.",
"type": "string",
"x-versionadded": "v2.22"
},
"credentialId": {
"description": "Either the populated value of the field or [redacted] due to permission settings",
"oneOf": [
{
"description": "The ID of the credential holding information about a user with read access to the JDBC data source.",
"type": [
"string",
"null"
]
},
{
"enum": [
"[redacted]"
],
"type": "string"
}
]
},
"dataStoreId": {
"description": "Either the populated value of the field or [redacted] due to permission settings",
"oneOf": [
{
"description": "ID of the data store to connect to",
"type": "string"
},
{
"enum": [
"[redacted]"
],
"type": "string"
}
]
},
"fetchSize": {
"description": "A user specified fetch size. Changing it can be used to balance throughput and memory usage. Deprecated and ignored since v2.21.",
"maximum": 1000000,
"minimum": 1,
"type": "integer"
},
"query": {
"description": "A self-supplied SELECT statement of the dataset you wish to score. Helpful for supplying a more fine-grained selection of data not achievable through specification of \"table\" and/or \"schema\" parameters exclusively.If this job is executed with a job definition, then template variables are available which will be substituted for timestamps: {{ current_run_timestamp }}, {{ last_completed_run_time }}, {{ last_scheduled_run_time }}, {{ next_scheduled_run_time }}, {{ current_run_time }}",
"type": "string"
},
"schema": {
"description": "The name of the specified database schema to read input data from.",
"type": "string"
},
"table": {
"description": "The name of the specified database table to read input data from.",
"type": "string"
},
"type": {
"description": "Type name for this intake type",
"enum": [
"jdbc"
],
"type": "string"
}
},
"required": [
"dataStoreId",
"type"
],
"type": "object"
},
{
"description": "Stream CSV data chunks from local file storage",
"properties": {
"async": {
"description": "The default behavior (async: true) will still submit the job to the queue and start processing as soon as the upload is started.Setting it to false will postpone submitting the job to the queue until all data has been uploaded.This is helpful if the user is on a bad connection and bottlednecked by the upload speed. Instead of blocking the queue this will allow others to submit to the queue until the upload has finished.",
"type": [
"boolean",
"null"
],
"x-versionadded": "v2.28"
},
"multipart": {
"description": "specify if the data will be uploaded in multiple parts instead of a single file",
"type": "boolean",
"x-versionadded": "v2.27"
},
"type": {
"description": "Type name for this intake type",
"enum": [
"local_file",
"localFile"
],
"type": "string"
}
},
"required": [
"type"
],
"type": "object"
},
{
"description": "Stream CSV data chunks from Datasphere using browser-datasphere",
"properties": {
"catalog": {
"description": "The name of the specified database catalog to read input data from.",
"type": "string"
},
"credentialId": {
"description": "Either the populated value of the field or [redacted] due to permission settings",
"oneOf": [
{
"description": "The ID of the credential holding information about a user with read access to the data source.",
"type": "string"
},
{
"enum": [
"[redacted]"
],
"type": "string"
}
]
},
"dataStoreId": {
"description": "Either the populated value of the field or [redacted] due to permission settings",
"oneOf": [
{
"description": "ID of the data store to connect to",
"type": "string"
},
{
"enum": [
"[redacted]"
],
"type": "string"
}
]
},
"schema": {
"description": "The name of the specified database schema to read input data from.",
"type": "string"
},
"table": {
"description": "The name of the specified database table to read input data from.",
"type": "string"
},
"type": {
"description": "Type name for this intake type",
"enum": [
"datasphere"
],
"type": "string"
}
},
"required": [
"credentialId",
"dataStoreId",
"schema",
"table",
"type"
],
"type": "object",
"x-versionadded": "v2.35"
}
]
},
"maxExplanations": {
"default": 0,
"description": "Number of explanations requested. Will be ordered by strength.",
"maximum": 100,
"minimum": 0,
"type": "integer"
},
"maxNgramExplanations": {
"description": "The maximum number of text ngram explanations to supply per row of the dataset. The default recommended `maxNgramExplanations` is `all` (no limit)",
"oneOf": [
{
"minimum": 0,
"type": "integer"
},
{
"enum": [
"all"
],
"type": "string"
}
],
"x-versionadded": "v2.30"
},
"modelId": {
"description": "ID of leaderboard model which is used in job for processing predictions dataset",
"type": "string",
"x-versionadded": "v2.30"
},
"modelPackageId": {
"description": "ID of model package from registry is used in job for processing predictions dataset",
"type": "string",
"x-versionadded": "v2.30"
},
"monitoringAggregation": {
"description": "Defines the aggregation policy for monitoring jobs.",
"properties": {
"retentionPolicy": {
"default": "percentage",
"description": "Monitoring jobs retention policy for aggregation.",
"enum": [
"samples",
"percentage"
],
"type": "string"
},
"retentionValue": {
"default": 0,
"description": "Amount/percentage of samples to retain.",
"type": "integer"
}
},
"type": "object"
},
"monitoringBatchPrefix": {
"description": "Name of the batch to create with this job",
"type": [
"string",
"null"
]
},
"monitoringColumns": {
"description": "Column names mapping for monitoring",
"properties": {
"actedUponColumn": {
"description": "Name of column that contains value for acted_on.",
"type": "string"
},
"actualsTimestampColumn": {
"description": "Name of column that contains actual timestamps.",
"type": "string"
},
"actualsValueColumn": {
"description": "Name of column that contains actuals value.",
"type": "string"
},
"associationIdColumn": {
"description": "Name of column that contains association Id.",
"type": "string"
},
"customMetricId": {
"description": "Id of custom metric to process values for.",
"type": "string"
},
"customMetricTimestampColumn": {
"description": "Name of column that contains custom metric values timestamps.",
"type": "string"
},
"customMetricTimestampFormat": {
"description": "Format of timestamps from customMetricTimestampColumn.",
"type": "string"
},
"customMetricValueColumn": {
"description": "Name of column that contains values for custom metric.",
"type": "string"
},
"monitoredStatusColumn": {
"description": "Column name used to mark monitored rows.",
"type": "string"
},
"predictionsColumns": {
"description": "Name of the column(s) which contain prediction values.",
"oneOf": [
{
"description": "Map containing column name(s) and class name(s) for multiclass problem.",
"items": {
"properties": {
"className": {
"description": "Class name.",
"type": "string"
},
"columnName": {
"description": "Column name that contains the prediction for a specific class.",
"type": "string"
}
},
"required": [
"className",
"columnName"
],
"type": "object"
},
"maxItems": 100,
"type": "array"
},
{
"description": "Column name that contains the prediction for regressions problem.",
"type": "string"
}
]
},
"reportDrift": {
"description": "True to report drift, False otherwise.",
"type": "boolean"
},
"reportPredictions": {
"description": "True to report prediction, False otherwise.",
"type": "boolean"
},
"uniqueRowIdentifierColumns": {
"description": "Column(s) name of unique row identifiers.",
"items": {
"type": "string"
},
"maxItems": 100,
"type": "array"
}
},
"type": "object"
},
"monitoringOutputSettings": {
"description": "Output settings for monitoring jobs",
"properties": {
"monitoredStatusColumn": {
"description": "Column name used to mark monitored rows.",
"type": "string"
},
"uniqueRowIdentifierColumns": {
"description": "Column(s) name of unique row identifiers.",
"items": {
"type": "string"
},
"maxItems": 100,
"type": "array"
}
},
"required": [
"monitoredStatusColumn",
"uniqueRowIdentifierColumns"
],
"type": "object"
},
"numConcurrent": {
"description": "Number of simultaneous requests to run against the prediction instance",
"minimum": 1,
"type": "integer"
},
"outputSettings": {
"description": "The response option configured for this job",
"oneOf": [
{
"description": "Save CSV data chunks to Azure Blob Storage",
"properties": {
"credentialId": {
"description": "Either the populated value of the field or [redacted] due to permission settings",
"oneOf": [
{
"description": "Use the specified credential to access the url",
"type": [
"string",
"null"
]
},
{
"enum": [
"[redacted]"
],
"type": "string"
}
]
},
"format": {
"default": "csv",
"description": "Type of output file format",
"enum": [
"csv",
"parquet"
],
"type": "string",
"x-versionadded": "v2.25"
},
"partitionColumns": {
"description": "For Parquet directory-scoring only. The column names of the intake data of which to partition the dataset. Columns are partitioned in the order they are given. At least one value is required if scoring to a directory (meaning the output url ends with a slash (\"/\").",
"items": {
"type": "string"
},
"maxItems": 100,
"type": "array",
"x-versionadded": "v2.26"
},
"type": {
"description": "Type name for this output type",
"enum": [
"azure"
],
"type": "string"
},
"url": {
"description": "URL for the file or directory",
"format": "url",
"type": "string"
}
},
"required": [
"type",
"url"
],
"type": "object"
},
{
"description": "Save CSV data chunks to Google Storage",
"properties": {
"credentialId": {
"description": "Either the populated value of the field or [redacted] due to permission settings",
"oneOf": [
{
"description": "Use the specified credential to access the url",
"type": [
"string",
"null"
]
},
{
"enum": [
"[redacted]"
],
"type": "string"
}
]
},
"format": {
"default": "csv",
"description": "Type of input file format",
"enum": [
"csv",
"parquet"
],
"type": "string",
"x-versionadded": "v2.25"
},
"partitionColumns": {
"description": "For Parquet directory-scoring only. The column names of the intake data of which to partition the dataset. Columns are partitioned in the order they are given. At least one value is required if scoring to a directory (meaning the output url ends with a slash (\"/\").",
"items": {
"type": "string"
},
"maxItems": 100,
"type": "array",
"x-versionadded": "v2.26"
},
"type": {
"description": "Type name for this output type",
"enum": [
"gcp"
],
"type": "string"
},
"url": {
"description": "URL for the CSV file",
"format": "url",
"type": "string"
}
},
"required": [
"type",
"url"
],
"type": "object"
},
{
"description": "Save CSV data chunks to Google BigQuery in bulk",
"properties": {
"bucket": {
"description": "The name of gcs bucket for data loading",
"type": "string"
},
"credentialId": {
"description": "Either the populated value of the field or [redacted] due to permission settings",
"oneOf": [
{
"description": "The ID of the GCP credentials",
"type": "string"
},
{
"enum": [
"[redacted]"
],
"type": "string"
}
]
},
"dataset": {
"description": "The name of the specified big query dataset to write data back",
"type": "string"
},
"table": {
"description": "The name of the specified big query table to write data back",
"type": "string"
},
"type": {
"description": "Type name for this output type",
"enum": [
"bigquery"
],
"type": "string"
}
},
"required": [
"bucket",
"credentialId",
"dataset",
"table",
"type"
],
"type": "object"
},
{
"description": "Saves CSV data chunks to Amazon Cloud Storage S3",
"properties": {
"credentialId": {
"description": "Either the populated value of the field or [redacted] due to permission settings",
"oneOf": [
{
"description": "Use the specified credential to access the url",
"type": [
"string",
"null"
]
},
{
"enum": [
"[redacted]"
],
"type": "string"
}
]
},
"endpointUrl": {
"description": "Endpoint URL for the S3 connection (omit to use the default)",
"format": "url",
"type": "string",
"x-versionadded": "v2.29"
},
"format": {
"default": "csv",
"description": "Type of output file format",
"enum": [
"csv",
"parquet"
],
"type": "string",
"x-versionadded": "v2.25"
},
"partitionColumns": {
"description": "For Parquet directory-scoring only. The column names of the intake data of which to partition the dataset. Columns are partitioned in the order they are given. At least one value is required if scoring to a directory (meaning the output url ends with a slash (\"/\").",
"items": {
"type": "string"
},
"maxItems": 100,
"type": "array",
"x-versionadded": "v2.26"
},
"serverSideEncryption": {
"description": "Configure Server-Side Encryption for S3 output",
"properties": {
"algorithm": {
"description": "The server-side encryption algorithm used when storing this object in Amazon S3 (for example, AES256, aws:kms).",
"type": "string"
},
"customerAlgorithm": {
"description": "Specifies the algorithm to use to when encrypting the object (for example, AES256).",
"type": "string"
},
"customerKey": {
"description": "Specifies the customer-provided encryption key for Amazon S3 to use in encrypting data. This value is used to store the object and then it is discarded; Amazon S3 does not store the encryption key. The key must be appropriate for use with the algorithm specified in customerAlgorithm. The key must be sent as an base64 encoded string.",
"type": "string"
},
"kmsEncryptionContext": {
"description": "Specifies the Amazon Web Services KMS Encryption Context to use for object encryption. The value of this header is a base64-encoded UTF-8 string holding JSON with the encryption context key-value pairs.",
"type": "string"
},
"kmsKeyId": {
"description": "Specifies the ID of the symmetric customer managed key to use for object encryption.",
"type": "string"
}
},
"type": "object"
},
"type": {
"description": "Type name for this output type",
"enum": [
"s3"
],
"type": "string"
},
"url": {
"description": "URL for the CSV file",
"format": "url",
"type": "string"
}
},
"required": [
"type",
"url"
],
"type": "object"
},
{
"description": "Save CSV data chunks to Snowflake in bulk",
"properties": {
"catalog": {
"description": "The name of the specified database catalog to write output data to.",
"type": "string",
"x-versionadded": "v2.28"
},
"cloudStorageCredentialId": {
"description": "Either the populated value of the field or [redacted] due to permission settings",
"oneOf": [
{
"description": "The ID of the credential holding information about a user with write access to the cloud storage.",
"type": [
"string",
"null"
]
},
{
"enum": [
"[redacted]"
],
"type": "string"
}
]
},
"cloudStorageType": {
"default": "s3",
"description": "Type name for cloud storage",
"enum": [
"azure",
"gcp",
"s3"
],
"type": "string"
},
"createTableIfNotExists": {
"default": false,
"description": "Attempt to create the table first if no existing one is detected, before writing data with the strategy defined in the `statementType` parameter.",
"type": "boolean",
"x-versionadded": "v2.25"
},
"credentialId": {
"description": "Either the populated value of the field or [redacted] due to permission settings",
"oneOf": [
{
"description": "The ID of the credential holding information about a user with write access to the Snowflake data source.",
"type": [
"string",
"null"
]
},
{
"enum": [
"[redacted]"
],
"type": "string"
}
]
},
"dataStoreId": {
"description": "Either the populated value of the field or [redacted] due to permission settings",
"oneOf": [
{
"description": "ID of the data store to connect to",
"type": "string"
},
{
"enum": [
"[redacted]"
],
"type": "string"
}
]
},
"externalStage": {
"description": "External storage",
"type": "string"
},
"schema": {
"description": "The name of the specified database schema to write results to.",
"type": "string"
},
"statementType": {
"description": "The statement type to use when writing the results.",
"enum": [
"insert",
"create_table",
"createTable"
],
"type": "string"
},
"table": {
"description": "The name of the specified database table to write results to.",
"type": "string"
},
"type": {
"description": "Type name for this output type",
"enum": [
"snowflake"
],
"type": "string"
}
},
"required": [
"dataStoreId",
"externalStage",
"statementType",
"table",
"type"
],
"type": "object"
},
{
"description": "Save CSV data chunks to Azure Synapse in bulk",
"properties": {
"cloudStorageCredentialId": {
"description": "Either the populated value of the field or [redacted] due to permission settings",
"oneOf": [
{
"description": "The ID of the credential holding information about a user with write access to the cloud storage.",
"type": [
"string",
"null"
]
},
{
"enum": [
"[redacted]"
],
"type": "string"
}
]
},
"createTableIfNotExists": {
"default": false,
"description": "Attempt to create the table first if no existing one is detected, before writing data with the strategy defined in the `statementType` parameter.",
"type": "boolean",
"x-versionadded": "v2.25"
},
"credentialId": {
"description": "Either the populated value of the field or [redacted] due to permission settings",
"oneOf": [
{
"description": "The ID of the credential holding information about a user with write access to the JDBC data source.",
"type": [
"string",
"null"
]
},
{
"enum": [
"[redacted]"
],
"type": "string"
}
]
},
"dataStoreId": {
"description": "Either the populated value of the field or [redacted] due to permission settings",
"oneOf": [
{
"description": "ID of the data store to connect to",
"type": "string"
},
{
"enum": [
"[redacted]"
],
"type": "string"
}
]
},
"externalDataSource": {
"description": "External data source name",
"type": "string"
},
"schema": {
"description": "The name of the specified database schema to write results to.",
"type": "string"
},
"statementType": {
"description": "The statement type to use when writing the results.",
"enum": [
"insert",
"create_table",
"createTable"
],
"type": "string"
},
"table": {
"description": "The name of the specified database table to write results to.",
"type": "string"
},
"type": {
"description": "Type name for this output type",
"enum": [
"synapse"
],
"type": "string"
}
},
"required": [
"dataStoreId",
"externalDataSource",
"statementType",
"table",
"type"
],
"type": "object"
},
{
"properties": {
"path": {
"description": "Path to results on host filesystem",
"type": "string"
},
"type": {
"description": "Type name for this output type",
"enum": [
"filesystem"
],
"type": "string"
}
},
"required": [
"path",
"type"
],
"type": "object"
},
{
"description": "Save CSV data chunks to HTTP data endpoint",
"properties": {
"headers": {
"description": "Extra headers to send with the request",
"type": "object"
},
"method": {
"description": "Method to use when saving the CSV file",
"enum": [
"POST",
"PUT"
],
"type": "string"
},
"type": {
"description": "Type name for this output type",
"enum": [
"http"
],
"type": "string"
},
"url": {
"description": "URL for the CSV file",
"format": "url",
"type": "string"
}
},
"required": [
"method",
"type",
"url"
],
"type": "object"
},
{
"description": "Save CSV data chunks via JDBC",
"properties": {
"catalog": {
"description": "The name of the specified database catalog to write output data to.",
"type": "string",
"x-versionadded": "v2.22"
},
"commitInterval": {
"default": 600,
"description": "Defines a time interval in seconds between each commit is done to the JDBC source. If set to 0, the batch prediction operation will write the entire job before committing.",
"maximum": 86400,
"minimum": 0,
"type": "integer",
"x-versionadded": "v2.21"
},
"createTableIfNotExists": {
"default": false,
"description": "Attempt to create the table first if no existing one is detected, before writing data with the strategy defined in the `statementType` parameter.",
"type": "boolean",
"x-versionadded": "v2.24"
},
"credentialId": {
"description": "Either the populated value of the field or [redacted] due to permission settings",
"oneOf": [
{
"description": "The ID of the credential holding information about a user with write access to the JDBC data source.",
"type": [
"string",
"null"
]
},
{
"enum": [
"[redacted]"
],
"type": "string"
}
]
},
"dataStoreId": {
"description": "Either the populated value of the field or [redacted] due to permission settings",
"oneOf": [
{
"description": "ID of the data store to connect to",
"type": "string"
},
{
"enum": [
"[redacted]"
],
"type": "string"
}
]
},
"schema": {
"description": "The name of the specified database schema to write the results to.",
"type": "string"
},
"statementType": {
"description": "The statement type to use when writing the results. Deprecation Warning: Use of `create_table` is now discouraged. Use one of the other possibilities along with the parameter `createTableIfNotExists` set to `true`.",
"enum": [
"createTable",
"create_table",
"insert",
"insertUpdate",
"insert_update",
"update"
],
"type": "string"
},
"table": {
"description": "The name of the specified database table to write the results to.If this job is executed with a job definition, then template variables are available which will be substituted for timestamps: {{ current_run_timestamp }}, {{ last_completed_run_time }}, {{ last_scheduled_run_time }}, {{ next_scheduled_run_time }}, {{ current_run_time }}",
"type": "string"
},
"type": {
"description": "Type name for this intake type",
"enum": [
"jdbc"
],
"type": "string"
},
"updateColumns": {
"description": "The column names to be updated if statementType is set to either update or upsert.",
"items": {
"type": "string"
},
"maxItems": 100,
"type": "array"
},
"whereColumns": {
"description": "The column names to be used in the where clause if statementType is set to update or upsert.",
"items": {
"type": "string"
},
"maxItems": 100,
"type": "array"
}
},
"required": [
"dataStoreId",
"statementType",
"table",
"type"
],
"type": "object"
},
{
"description": "Save CSV data chunks to local file storage",
"properties": {
"type": {
"description": "Type name for this output type",
"enum": [
"local_file",
"localFile"
],
"type": "string"
}
},
"required": [
"type"
],
"type": "object"
},
{
"description": "Saves CSV data chunks to Datasphere using browser-datasphere",
"properties": {
"catalog": {
"description": "The name of the specified database catalog to read input data from.",
"type": "string"
},
"credentialId": {
"description": "Either the populated value of the field or [redacted] due to permission settings",
"oneOf": [
{
"description": "The ID of the credential holding information about a user with write access to the data destination.",
"type": "string"
},
{
"enum": [
"[redacted]"
],
"type": "string"
}
]
},
"dataStoreId": {
"description": "Either the populated value of the field or [redacted] due to permission settings",
"oneOf": [
{
"description": "The ID of the data store to connect to",
"type": "string"
},
{
"enum": [
"[redacted]"
],
"type": "string"
}
]
},
"schema": {
"description": "The name of the specified database schema to read input data from.",
"type": "string"
},
"table": {
"description": "The name of the specified database table to read input data from.",
"type": "string"
},
"type": {
"description": "The type name for this output type",
"enum": [
"datasphere"
],
"type": "string"
}
},
"required": [
"credentialId",
"dataStoreId",
"schema",
"table",
"type"
],
"type": "object",
"x-versionadded": "v2.35"
}
]
},
"passthroughColumns": {
"description": "Pass through columns from the original dataset",
"items": {
"description": "A column name from the original dataset to pass through to the resulting predictions",
"maxLength": 50,
"minLength": 1,
"type": "string"
},
"maxItems": 100,
"type": "array"
},
"passthroughColumnsSet": {
"description": "Pass through all columns from the original dataset",
"enum": [
"all"
],
"type": "string"
},
"pinnedModelId": {
"description": "Specify a model ID used for scoring",
"type": "string"
},
"predictionInstance": {
"description": "Override the default prediction instance from the deployment when scoring this job.",
"properties": {
"apiKey": {
"description": "By default, prediction requests will use the API key of the user that created the job. This allows you to make requests on behalf of other users.",
"type": "string"
},
"datarobotKey": {
"description": "If running a job against a prediction instance in the Managed AI Cloud, you must provide the organization level DataRobot-Key.",
"type": "string"
},
"hostName": {
"description": "Override the default host name of the deployment with this.",
"type": "string"
},
"sslEnabled": {
"default": true,
"description": "Use SSL (HTTPS) when communicating with the overriden prediction server.",
"type": "boolean"
}
},
"required": [
"hostName",
"sslEnabled"
],
"type": "object"
},
"predictionWarningEnabled": {
"description": "Enable prediction warnings.",
"type": [
"boolean",
"null"
]
},
"redactedFields": {
"description": "A list of qualified field names from intake- and/or outputSettings that was redacted due to permissions and sharing settings. For example: intakeSettings.dataStoreId",
"items": {
"description": "Field names that are potentially redacted",
"type": "string"
},
"type": "array",
"x-versionadded": "v2.30"
},
"skipDriftTracking": {
"default": false,
"description": "Skip drift tracking for this job.",
"type": "boolean"
},
"thresholdHigh": {
"description": "Compute explanations for predictions above this threshold",
"type": "number"
},
"thresholdLow": {
"description": "Compute explanations for predictions below this threshold",
"type": "number"
},
"timeseriesSettings": {
"description": "Time Series settings included of this job is a Time Series job.",
"oneOf": [
{
"properties": {
"forecastPoint": {
"description": "Used for forecast predictions in order to override the inferred forecast point from the dataset.",
"format": "date-time",
"type": "string"
},
"relaxKnownInAdvanceFeaturesCheck": {
"default": false,
"description": "If activated, missing values in the known in advance features are allowed in the forecast window at prediction time. If omitted or false, missing values are not allowed.",
"type": "boolean"
},
"type": {
"description": "Forecast mode makes predictions using forecastPoint or rows in the dataset without target.",
"enum": [
"forecast"
],
"type": "string"
}
},
"required": [
"type"
],
"type": "object"
},
{
"properties": {
"predictionsEndDate": {
"description": "Used for historical predictions in order to override date to which predictions should be calculated. By default value will be inferred automatically from the dataset.",
"format": "date-time",
"type": "string"
},
"predictionsStartDate": {
"description": "Used for historical predictions in order to override date from which predictions should be calculated. By default value will be inferred automatically from the dataset.",
"format": "date-time",
"type": "string"
},
"relaxKnownInAdvanceFeaturesCheck": {
"default": false,
"description": "If activated, missing values in the known in advance features are allowed in the forecast window at prediction time. If omitted or false, missing values are not allowed.",
"type": "boolean"
},
"type": {
"description": "Historical mode enables bulk predictions which calculates predictions for all possible forecast points and forecast distances in the dataset within the predictionsStartDate/predictionsEndDate range.",
"enum": [
"historical"
],
"type": "string"
}
},
"required": [
"type"
],
"type": "object"
}
],
"x-versionadded": "v2.30"
}
},
"required": [
"abortOnError",
"csvSettings",
"disableRowLevelErrorHandling",
"includePredictionStatus",
"includeProbabilities",
"includeProbabilitiesClasses",
"intakeSettings",
"maxExplanations",
"redactedFields",
"skipDriftTracking"
],
"type": "object",
"x-versionadded": "v2.35"
},
"links": {
"description": "Links useful for this job",
"properties": {
"csvUpload": {
"description": "The URL used to upload the dataset for this job. Only available for localFile intake.",
"format": "url",
"type": "string"
},
"download": {
"description": "The URL used to download the results from this job. Only available for localFile outputs. Will be null if the download is not yet available.",
"type": [
"string",
"null"
]
},
"self": {
"description": "The URL used access this job.",
"format": "url",
"type": "string"
}
},
"required": [
"self"
],
"type": "object",
"x-versionadded": "v2.35"
},
"logs": {
"description": "The job log.",
"items": {
"description": "A log line from the job log.",
"type": "string"
},
"type": "array"
},
"monitoringBatchId": {
"description": "Id of the monitoring batch created by this job. Only present if the job runs on a deployment with batch monitoring enabled.",
"type": [
"string",
"null"
],
"x-versionadded": "v2.33"
},
"percentageCompleted": {
"description": "Indicates job progress which is based on number of already processed rows in dataset",
"maximum": 100,
"minimum": 0,
"type": "number"
},
"queuePosition": {
"description": "To ensure a dedicated prediction instance is not overloaded, only one job will be run against it at a time. This is the number of jobs that are awaiting processing before this job start running. May not be available in all environments.",
"minimum": 0,
"type": [
"integer",
"null"
],
"x-versionadded": "v2.30"
},
"queued": {
"description": "The job has been put on the queue for execution.",
"type": "boolean",
"x-versionadded": "v2.30"
},
"resultsDeleted": {
"description": "Indicates if the job was subject to garbage collection and had its artifacts deleted (output files, if any, and scoring data on local storage)",
"type": "boolean",
"x-versionadded": "v2.30"
},
"scoredRows": {
"description": "Number of rows that have been used in prediction computation",
"minimum": 0,
"type": "integer"
},
"skippedRows": {
"description": "Number of rows that have been skipped during scoring. May contain non-zero value only in time-series predictions case if provided dataset contains more than required historical rows.",
"minimum": 0,
"type": "integer",
"x-versionadded": "v2.30"
},
"source": {
"description": "Source from which batch job was started",
"type": "string",
"x-versionadded": "v2.30"
},
"status": {
"description": "The current job status",
"enum": [
"INITIALIZING",
"RUNNING",
"COMPLETED",
"ABORTED",
"FAILED"
],
"type": "string"
},
"statusDetails": {
"description": "Explanation for current status",
"type": "string"
}
},
"required": [
"created",
"createdBy",
"elapsedTimeSec",
"failedRows",
"id",
"jobIntakeSize",
"jobOutputSize",
"jobSpec",
"links",
"logs",
"monitoringBatchId",
"percentageCompleted",
"queued",
"scoredRows",
"skippedRows",
"status",
"statusDetails"
],
"type": "object",
"x-versionadded": "v2.35"
}
Properties
Name |
Type |
Required |
Restrictions |
Description |
batchMonitoringJobDefinition |
BatchJobDefinitionResponse |
false |
|
The Batch Prediction Job Definition linking to this job, if any. |
batchPredictionJobDefinition |
BatchJobDefinitionResponse |
false |
|
The Batch Prediction Job Definition linking to this job, if any. |
created |
string(date-time) |
true |
|
When was this job created |
createdBy |
BatchJobCreatedBy |
true |
|
Who created this job |
elapsedTimeSec |
integer |
true |
minimum: 0
|
Number of seconds the job has been processing for |
failedRows |
integer |
true |
minimum: 0
|
Number of rows that have failed scoring |
hidden |
string(date-time) |
false |
|
When was this job was hidden last, blank if visible |
id |
string |
true |
|
The ID of the Batch job |
intakeDatasetDisplayName |
string,null |
false |
|
If applicable (e.g. for AI catalog), will contain the dataset name used for the intake dataset. |
jobIntakeSize |
integer,null |
true |
minimum: 0
|
Number of bytes in the intake dataset for this job |
jobOutputSize |
integer,null |
true |
minimum: 0
|
Number of bytes in the output dataset for this job |
jobSpec |
BatchJobSpecResponse |
true |
|
The job configuration used to create this job |
links |
BatchJobLinks |
true |
|
Links useful for this job |
logs |
[string] |
true |
|
The job log. |
monitoringBatchId |
string,null |
true |
|
Id of the monitoring batch created by this job. Only present if the job runs on a deployment with batch monitoring enabled. |
percentageCompleted |
number |
true |
maximum: 100 minimum: 0
|
Indicates job progress which is based on number of already processed rows in dataset |
queuePosition |
integer,null |
false |
minimum: 0
|
To ensure a dedicated prediction instance is not overloaded, only one job will be run against it at a time. This is the number of jobs that are awaiting processing before this job start running. May not be available in all environments. |
queued |
boolean |
true |
|
The job has been put on the queue for execution. |
resultsDeleted |
boolean |
false |
|
Indicates if the job was subject to garbage collection and had its artifacts deleted (output files, if any, and scoring data on local storage) |
scoredRows |
integer |
true |
minimum: 0
|
Number of rows that have been used in prediction computation |
skippedRows |
integer |
true |
minimum: 0
|
Number of rows that have been skipped during scoring. May contain non-zero value only in time-series predictions case if provided dataset contains more than required historical rows. |
source |
string |
false |
|
Source from which batch job was started |
status |
string |
true |
|
The current job status |
statusDetails |
string |
true |
|
Explanation for current status |
Enumerated Values
Property |
Value |
status |
[INITIALIZING , RUNNING , COMPLETED , ABORTED , FAILED ] |
BatchJobSpecResponse
{
"description": "The job configuration used to create this job",
"properties": {
"abortOnError": {
"default": true,
"description": "Should this job abort if too many errors are encountered",
"type": "boolean"
},
"batchJobType": {
"description": "Batch job type.",
"enum": [
"monitoring",
"prediction"
],
"type": "string",
"x-versionadded": "v2.30"
},
"chunkSize": {
"default": "auto",
"description": "Which strategy should be used to determine the chunk size. Can be either a named strategy or a fixed size in bytes.",
"oneOf": [
{
"enum": [
"auto",
"fixed",
"dynamic"
],
"type": "string"
},
{
"maximum": 41943040,
"minimum": 20,
"type": "integer"
}
]
},
"columnNamesRemapping": {
"description": "Remap (rename or remove columns from) the output from this job",
"oneOf": [
{
"description": "Provide a dictionary with key/value pairs to remap (deprecated)",
"type": "object"
},
{
"description": "Provide a list of items to remap",
"items": {
"properties": {
"inputName": {
"description": "Rename column with this name",
"type": "string"
},
"outputName": {
"description": "Rename column to this name (leave as null to remove from the output)",
"type": [
"string",
"null"
]
}
},
"required": [
"inputName",
"outputName"
],
"type": "object"
},
"maxItems": 1000,
"type": "array"
}
]
},
"csvSettings": {
"description": "The CSV settings used for this job",
"properties": {
"delimiter": {
"default": ",",
"description": "CSV fields are delimited by this character. Use the string \"tab\" to denote TSV (TAB separated values).",
"oneOf": [
{
"enum": [
"tab"
],
"type": "string"
},
{
"maxLength": 1,
"minLength": 1,
"type": "string"
}
]
},
"encoding": {
"default": "utf-8",
"description": "The encoding to be used for intake and output. For example (but not limited to): \"shift_jis\", \"latin_1\" or \"mskanji\".",
"type": "string"
},
"quotechar": {
"default": "\"",
"description": "Fields containing the delimiter or newlines must be quoted using this character.",
"maxLength": 1,
"minLength": 1,
"type": "string"
}
},
"required": [
"delimiter",
"encoding",
"quotechar"
],
"type": "object"
},
"deploymentId": {
"description": "ID of deployment which is used in job for processing predictions dataset",
"type": "string"
},
"disableRowLevelErrorHandling": {
"default": false,
"description": "Skip row by row error handling",
"type": "boolean"
},
"explanationAlgorithm": {
"description": "Which algorithm will be used to calculate prediction explanations",
"enum": [
"shap",
"xemp"
],
"type": "string"
},
"explanationClassNames": {
"description": "List of class names that will be explained for each row for multiclass. Mutually exclusive with explanationNumTopClasses. If neither specified - we assume explanationNumTopClasses=1",
"items": {
"description": "Class name to explain",
"type": "string"
},
"maxItems": 10,
"minItems": 1,
"type": "array",
"x-versionadded": "v2.30"
},
"explanationNumTopClasses": {
"description": "Number of top predicted classes for each row that will be explained for multiclass. Mutually exclusive with explanationClassNames. If neither specified - we assume explanationNumTopClasses=1",
"maximum": 10,
"minimum": 1,
"type": "integer",
"x-versionadded": "v2.30"
},
"includePredictionStatus": {
"default": false,
"description": "Include prediction status column in the output",
"type": "boolean"
},
"includeProbabilities": {
"default": true,
"description": "Include probabilities for all classes",
"type": "boolean"
},
"includeProbabilitiesClasses": {
"default": [],
"description": "Include only probabilities for these specific class names.",
"items": {
"description": "Include probability for this class name",
"type": "string"
},
"maxItems": 100,
"type": "array"
},
"intakeSettings": {
"default": {
"type": "localFile"
},
"description": "The response option configured for this job",
"oneOf": [
{
"description": "Stream CSV data chunks from Azure",
"properties": {
"credentialId": {
"description": "Either the populated value of the field or [redacted] due to permission settings",
"oneOf": [
{
"description": "Use the specified credential to access the url",
"type": [
"string",
"null"
]
},
{
"enum": [
"[redacted]"
],
"type": "string"
}
]
},
"format": {
"default": "csv",
"description": "Type of input file format",
"enum": [
"csv",
"parquet"
],
"type": "string",
"x-versionadded": "v2.25"
},
"type": {
"description": "Type name for this intake type",
"enum": [
"azure"
],
"type": "string"
},
"url": {
"description": "URL for the CSV file",
"format": "url",
"type": "string"
}
},
"required": [
"type",
"url"
],
"type": "object"
},
{
"description": "Stream CSV data chunks from data stage storage",
"properties": {
"dataStageId": {
"description": "The ID of the data stage",
"type": "string"
},
"type": {
"description": "Type name for this intake type",
"enum": [
"dataStage"
],
"type": "string"
}
},
"required": [
"dataStageId",
"type"
],
"type": "object"
},
{
"description": "Stream CSV data chunks from AI catalog dataset",
"properties": {
"datasetId": {
"description": "Either the populated value of the field or [redacted] due to permission settings",
"oneOf": [
{
"description": "The ID of the AI catalog dataset",
"type": "string"
},
{
"enum": [
"[redacted]"
],
"type": "string"
}
]
},
"datasetVersionId": {
"description": "The ID of the AI catalog dataset version",
"type": "string"
},
"type": {
"description": "Type name for this intake type",
"enum": [
"dataset"
],
"type": "string"
}
},
"required": [
"datasetId",
"type"
],
"type": "object"
},
{
"description": "Stream CSV data chunks from Google Storage",
"properties": {
"credentialId": {
"description": "Either the populated value of the field or [redacted] due to permission settings",
"oneOf": [
{
"description": "Use the specified credential to access the url",
"type": [
"string",
"null"
]
},
{
"enum": [
"[redacted]"
],
"type": "string"
}
]
},
"format": {
"default": "csv",
"description": "Type of input file format",
"enum": [
"csv",
"parquet"
],
"type": "string",
"x-versionadded": "v2.25"
},
"type": {
"description": "Type name for this intake type",
"enum": [
"gcp"
],
"type": "string"
},
"url": {
"description": "URL for the CSV file",
"format": "url",
"type": "string"
}
},
"required": [
"type",
"url"
],
"type": "object"
},
{
"description": "Stream CSV data chunks from Big Query using GCS",
"properties": {
"bucket": {
"description": "The name of gcs bucket for data export",
"type": "string"
},
"credentialId": {
"description": "Either the populated value of the field or [redacted] due to permission settings",
"oneOf": [
{
"description": "The ID of the GCP credentials",
"type": "string"
},
{
"enum": [
"[redacted]"
],
"type": "string"
}
]
},
"dataset": {
"description": "The name of the specified big query dataset to read input data from",
"type": "string"
},
"table": {
"description": "The name of the specified big query table to read input data from",
"type": "string"
},
"type": {
"description": "Type name for this intake type",
"enum": [
"bigquery"
],
"type": "string"
}
},
"required": [
"bucket",
"credentialId",
"dataset",
"table",
"type"
],
"type": "object"
},
{
"description": "Stream CSV data chunks from Amazon Cloud Storage S3",
"properties": {
"credentialId": {
"description": "Either the populated value of the field or [redacted] due to permission settings",
"oneOf": [
{
"description": "Use the specified credential to access the url",
"type": [
"string",
"null"
]
},
{
"enum": [
"[redacted]"
],
"type": "string"
}
]
},
"endpointUrl": {
"description": "Endpoint URL for the S3 connection (omit to use the default)",
"format": "url",
"type": "string",
"x-versionadded": "v2.29"
},
"format": {
"default": "csv",
"description": "Type of input file format",
"enum": [
"csv",
"parquet"
],
"type": "string",
"x-versionadded": "v2.25"
},
"type": {
"description": "Type name for this intake type",
"enum": [
"s3"
],
"type": "string"
},
"url": {
"description": "URL for the CSV file",
"format": "url",
"type": "string"
}
},
"required": [
"type",
"url"
],
"type": "object"
},
{
"description": "Stream CSV data chunks from Snowflake",
"properties": {
"catalog": {
"description": "The name of the specified database catalog to read input data from.",
"type": "string",
"x-versionadded": "v2.28"
},
"cloudStorageCredentialId": {
"description": "Either the populated value of the field or [redacted] due to permission settings",
"oneOf": [
{
"description": "The ID of the credential holding information about a user with read access to the cloud storage.",
"type": [
"string",
"null"
]
},
{
"enum": [
"[redacted]"
],
"type": "string"
}
]
},
"cloudStorageType": {
"default": "s3",
"description": "Type name for cloud storage",
"enum": [
"azure",
"gcp",
"s3"
],
"type": "string"
},
"credentialId": {
"description": "Either the populated value of the field or [redacted] due to permission settings",
"oneOf": [
{
"description": "The ID of the credential holding information about a user with read access to the Snowflake data source.",
"type": [
"string",
"null"
]
},
{
"enum": [
"[redacted]"
],
"type": "string"
}
]
},
"dataStoreId": {
"description": "Either the populated value of the field or [redacted] due to permission settings",
"oneOf": [
{
"description": "ID of the data store to connect to",
"type": "string"
},
{
"enum": [
"[redacted]"
],
"type": "string"
}
]
},
"externalStage": {
"description": "External storage",
"type": "string"
},
"query": {
"description": "A self-supplied SELECT statement of the dataset you wish to score. Helpful for supplying a more fine-grained selection of data not achievable through specification of \"table\" and/or \"schema\" parameters exclusively.If this job is executed with a job definition, then template variables are available which will be substituted for timestamps: {{ current_run_timestamp }}, {{ last_completed_run_time }}, {{ last_scheduled_run_time }}, {{ next_scheduled_run_time }}, {{ current_run_time }}",
"type": "string"
},
"schema": {
"description": "The name of the specified database schema to read input data from.",
"type": "string"
},
"table": {
"description": "The name of the specified database table to read input data from.",
"type": "string"
},
"type": {
"description": "Type name for this intake type",
"enum": [
"snowflake"
],
"type": "string"
}
},
"required": [
"dataStoreId",
"externalStage",
"type"
],
"type": "object"
},
{
"description": "Stream CSV data chunks from Azure Synapse",
"properties": {
"cloudStorageCredentialId": {
"description": "Either the populated value of the field or [redacted] due to permission settings",
"oneOf": [
{
"description": "The ID of the Azure credential holding information about a user with read access to the cloud storage.",
"type": [
"string",
"null"
]
},
{
"enum": [
"[redacted]"
],
"type": "string"
}
]
},
"credentialId": {
"description": "Either the populated value of the field or [redacted] due to permission settings",
"oneOf": [
{
"description": "The ID of the credential holding information about a user with read access to the JDBC data source.",
"type": [
"string",
"null"
]
},
{
"enum": [
"[redacted]"
],
"type": "string"
}
]
},
"dataStoreId": {
"description": "Either the populated value of the field or [redacted] due to permission settings",
"oneOf": [
{
"description": "ID of the data store to connect to",
"type": "string"
},
{
"enum": [
"[redacted]"
],
"type": "string"
}
]
},
"externalDataSource": {
"description": "External datasource name",
"type": "string"
},
"query": {
"description": "A self-supplied SELECT statement of the dataset you wish to score. Helpful for supplying a more fine-grained selection of data not achievable through specification of \"table\" and/or \"schema\" parameters exclusively.If this job is executed with a job definition, then template variables are available which will be substituted for timestamps: {{ current_run_timestamp }}, {{ last_completed_run_time }}, {{ last_scheduled_run_time }}, {{ next_scheduled_run_time }}, {{ current_run_time }}",
"type": "string"
},
"schema": {
"description": "The name of the specified database schema to read input data from.",
"type": "string"
},
"table": {
"description": "The name of the specified database table to read input data from.",
"type": "string"
},
"type": {
"description": "Type name for this intake type",
"enum": [
"synapse"
],
"type": "string"
}
},
"required": [
"dataStoreId",
"externalDataSource",
"type"
],
"type": "object"
},
{
"description": "Stream CSV data chunks from DSS dataset",
"properties": {
"datasetId": {
"description": "Either the populated value of the field or [redacted] due to permission settings",
"oneOf": [
{
"description": "The ID of the dataset",
"type": "string"
},
{
"enum": [
"[redacted]"
],
"type": "string"
}
]
},
"partition": {
"default": null,
"description": "Partition used to predict",
"enum": [
"holdout",
"validation",
"allBacktests",
null
],
"type": [
"string",
"null"
]
},
"projectId": {
"description": "The ID of the project",
"type": "string"
},
"type": {
"description": "Type name for this intake type",
"enum": [
"dss"
],
"type": "string"
}
},
"required": [
"projectId",
"type"
],
"type": "object"
},
{
"properties": {
"path": {
"description": "Path to data on host filesystem",
"type": "string"
},
"type": {
"description": "Type name for this intake type",
"enum": [
"filesystem"
],
"type": "string"
}
},
"required": [
"path",
"type"
],
"type": "object"
},
{
"description": "Stream CSV data chunks from HTTP",
"properties": {
"type": {
"description": "Type name for this intake type",
"enum": [
"http"
],
"type": "string"
},
"url": {
"description": "URL for the CSV file",
"format": "url",
"type": "string"
}
},
"required": [
"type",
"url"
],
"type": "object"
},
{
"description": "Stream CSV data chunks from JDBC",
"properties": {
"catalog": {
"description": "The name of the specified database catalog to read input data from.",
"type": "string",
"x-versionadded": "v2.22"
},
"credentialId": {
"description": "Either the populated value of the field or [redacted] due to permission settings",
"oneOf": [
{
"description": "The ID of the credential holding information about a user with read access to the JDBC data source.",
"type": [
"string",
"null"
]
},
{
"enum": [
"[redacted]"
],
"type": "string"
}
]
},
"dataStoreId": {
"description": "Either the populated value of the field or [redacted] due to permission settings",
"oneOf": [
{
"description": "ID of the data store to connect to",
"type": "string"
},
{
"enum": [
"[redacted]"
],
"type": "string"
}
]
},
"fetchSize": {
"description": "A user specified fetch size. Changing it can be used to balance throughput and memory usage. Deprecated and ignored since v2.21.",
"maximum": 1000000,
"minimum": 1,
"type": "integer"
},
"query": {
"description": "A self-supplied SELECT statement of the dataset you wish to score. Helpful for supplying a more fine-grained selection of data not achievable through specification of \"table\" and/or \"schema\" parameters exclusively.If this job is executed with a job definition, then template variables are available which will be substituted for timestamps: {{ current_run_timestamp }}, {{ last_completed_run_time }}, {{ last_scheduled_run_time }}, {{ next_scheduled_run_time }}, {{ current_run_time }}",
"type": "string"
},
"schema": {
"description": "The name of the specified database schema to read input data from.",
"type": "string"
},
"table": {
"description": "The name of the specified database table to read input data from.",
"type": "string"
},
"type": {
"description": "Type name for this intake type",
"enum": [
"jdbc"
],
"type": "string"
}
},
"required": [
"dataStoreId",
"type"
],
"type": "object"
},
{
"description": "Stream CSV data chunks from local file storage",
"properties": {
"async": {
"description": "The default behavior (async: true) will still submit the job to the queue and start processing as soon as the upload is started.Setting it to false will postpone submitting the job to the queue until all data has been uploaded.This is helpful if the user is on a bad connection and bottlednecked by the upload speed. Instead of blocking the queue this will allow others to submit to the queue until the upload has finished.",
"type": [
"boolean",
"null"
],
"x-versionadded": "v2.28"
},
"multipart": {
"description": "specify if the data will be uploaded in multiple parts instead of a single file",
"type": "boolean",
"x-versionadded": "v2.27"
},
"type": {
"description": "Type name for this intake type",
"enum": [
"local_file",
"localFile"
],
"type": "string"
}
},
"required": [
"type"
],
"type": "object"
},
{
"description": "Stream CSV data chunks from Datasphere using browser-datasphere",
"properties": {
"catalog": {
"description": "The name of the specified database catalog to read input data from.",
"type": "string"
},
"credentialId": {
"description": "Either the populated value of the field or [redacted] due to permission settings",
"oneOf": [
{
"description": "The ID of the credential holding information about a user with read access to the data source.",
"type": "string"
},
{
"enum": [
"[redacted]"
],
"type": "string"
}
]
},
"dataStoreId": {
"description": "Either the populated value of the field or [redacted] due to permission settings",
"oneOf": [
{
"description": "ID of the data store to connect to",
"type": "string"
},
{
"enum": [
"[redacted]"
],
"type": "string"
}
]
},
"schema": {
"description": "The name of the specified database schema to read input data from.",
"type": "string"
},
"table": {
"description": "The name of the specified database table to read input data from.",
"type": "string"
},
"type": {
"description": "Type name for this intake type",
"enum": [
"datasphere"
],
"type": "string"
}
},
"required": [
"credentialId",
"dataStoreId",
"schema",
"table",
"type"
],
"type": "object",
"x-versionadded": "v2.35"
}
]
},
"maxExplanations": {
"default": 0,
"description": "Number of explanations requested. Will be ordered by strength.",
"maximum": 100,
"minimum": 0,
"type": "integer"
},
"maxNgramExplanations": {
"description": "The maximum number of text ngram explanations to supply per row of the dataset. The default recommended `maxNgramExplanations` is `all` (no limit)",
"oneOf": [
{
"minimum": 0,
"type": "integer"
},
{
"enum": [
"all"
],
"type": "string"
}
],
"x-versionadded": "v2.30"
},
"modelId": {
"description": "ID of leaderboard model which is used in job for processing predictions dataset",
"type": "string",
"x-versionadded": "v2.30"
},
"modelPackageId": {
"description": "ID of model package from registry is used in job for processing predictions dataset",
"type": "string",
"x-versionadded": "v2.30"
},
"monitoringAggregation": {
"description": "Defines the aggregation policy for monitoring jobs.",
"properties": {
"retentionPolicy": {
"default": "percentage",
"description": "Monitoring jobs retention policy for aggregation.",
"enum": [
"samples",
"percentage"
],
"type": "string"
},
"retentionValue": {
"default": 0,
"description": "Amount/percentage of samples to retain.",
"type": "integer"
}
},
"type": "object"
},
"monitoringBatchPrefix": {
"description": "Name of the batch to create with this job",
"type": [
"string",
"null"
]
},
"monitoringColumns": {
"description": "Column names mapping for monitoring",
"properties": {
"actedUponColumn": {
"description": "Name of column that contains value for acted_on.",
"type": "string"
},
"actualsTimestampColumn": {
"description": "Name of column that contains actual timestamps.",
"type": "string"
},
"actualsValueColumn": {
"description": "Name of column that contains actuals value.",
"type": "string"
},
"associationIdColumn": {
"description": "Name of column that contains association Id.",
"type": "string"
},
"customMetricId": {
"description": "Id of custom metric to process values for.",
"type": "string"
},
"customMetricTimestampColumn": {
"description": "Name of column that contains custom metric values timestamps.",
"type": "string"
},
"customMetricTimestampFormat": {
"description": "Format of timestamps from customMetricTimestampColumn.",
"type": "string"
},
"customMetricValueColumn": {
"description": "Name of column that contains values for custom metric.",
"type": "string"
},
"monitoredStatusColumn": {
"description": "Column name used to mark monitored rows.",
"type": "string"
},
"predictionsColumns": {
"description": "Name of the column(s) which contain prediction values.",
"oneOf": [
{
"description": "Map containing column name(s) and class name(s) for multiclass problem.",
"items": {
"properties": {
"className": {
"description": "Class name.",
"type": "string"
},
"columnName": {
"description": "Column name that contains the prediction for a specific class.",
"type": "string"
}
},
"required": [
"className",
"columnName"
],
"type": "object"
},
"maxItems": 100,
"type": "array"
},
{
"description": "Column name that contains the prediction for regressions problem.",
"type": "string"
}
]
},
"reportDrift": {
"description": "True to report drift, False otherwise.",
"type": "boolean"
},
"reportPredictions": {
"description": "True to report prediction, False otherwise.",
"type": "boolean"
},
"uniqueRowIdentifierColumns": {
"description": "Column(s) name of unique row identifiers.",
"items": {
"type": "string"
},
"maxItems": 100,
"type": "array"
}
},
"type": "object"
},
"monitoringOutputSettings": {
"description": "Output settings for monitoring jobs",
"properties": {
"monitoredStatusColumn": {
"description": "Column name used to mark monitored rows.",
"type": "string"
},
"uniqueRowIdentifierColumns": {
"description": "Column(s) name of unique row identifiers.",
"items": {
"type": "string"
},
"maxItems": 100,
"type": "array"
}
},
"required": [
"monitoredStatusColumn",
"uniqueRowIdentifierColumns"
],
"type": "object"
},
"numConcurrent": {
"description": "Number of simultaneous requests to run against the prediction instance",
"minimum": 1,
"type": "integer"
},
"outputSettings": {
"description": "The response option configured for this job",
"oneOf": [
{
"description": "Save CSV data chunks to Azure Blob Storage",
"properties": {
"credentialId": {
"description": "Either the populated value of the field or [redacted] due to permission settings",
"oneOf": [
{
"description": "Use the specified credential to access the url",
"type": [
"string",
"null"
]
},
{
"enum": [
"[redacted]"
],
"type": "string"
}
]
},
"format": {
"default": "csv",
"description": "Type of output file format",
"enum": [
"csv",
"parquet"
],
"type": "string",
"x-versionadded": "v2.25"
},
"partitionColumns": {
"description": "For Parquet directory-scoring only. The column names of the intake data of which to partition the dataset. Columns are partitioned in the order they are given. At least one value is required if scoring to a directory (meaning the output url ends with a slash (\"/\").",
"items": {
"type": "string"
},
"maxItems": 100,
"type": "array",
"x-versionadded": "v2.26"
},
"type": {
"description": "Type name for this output type",
"enum": [
"azure"
],
"type": "string"
},
"url": {
"description": "URL for the file or directory",
"format": "url",
"type": "string"
}
},
"required": [
"type",
"url"
],
"type": "object"
},
{
"description": "Save CSV data chunks to Google Storage",
"properties": {
"credentialId": {
"description": "Either the populated value of the field or [redacted] due to permission settings",
"oneOf": [
{
"description": "Use the specified credential to access the url",
"type": [
"string",
"null"
]
},
{
"enum": [
"[redacted]"
],
"type": "string"
}
]
},
"format": {
"default": "csv",
"description": "Type of input file format",
"enum": [
"csv",
"parquet"
],
"type": "string",
"x-versionadded": "v2.25"
},
"partitionColumns": {
"description": "For Parquet directory-scoring only. The column names of the intake data of which to partition the dataset. Columns are partitioned in the order they are given. At least one value is required if scoring to a directory (meaning the output url ends with a slash (\"/\").",
"items": {
"type": "string"
},
"maxItems": 100,
"type": "array",
"x-versionadded": "v2.26"
},
"type": {
"description": "Type name for this output type",
"enum": [
"gcp"
],
"type": "string"
},
"url": {
"description": "URL for the CSV file",
"format": "url",
"type": "string"
}
},
"required": [
"type",
"url"
],
"type": "object"
},
{
"description": "Save CSV data chunks to Google BigQuery in bulk",
"properties": {
"bucket": {
"description": "The name of gcs bucket for data loading",
"type": "string"
},
"credentialId": {
"description": "Either the populated value of the field or [redacted] due to permission settings",
"oneOf": [
{
"description": "The ID of the GCP credentials",
"type": "string"
},
{
"enum": [
"[redacted]"
],
"type": "string"
}
]
},
"dataset": {
"description": "The name of the specified big query dataset to write data back",
"type": "string"
},
"table": {
"description": "The name of the specified big query table to write data back",
"type": "string"
},
"type": {
"description": "Type name for this output type",
"enum": [
"bigquery"
],
"type": "string"
}
},
"required": [
"bucket",
"credentialId",
"dataset",
"table",
"type"
],
"type": "object"
},
{
"description": "Saves CSV data chunks to Amazon Cloud Storage S3",
"properties": {
"credentialId": {
"description": "Either the populated value of the field or [redacted] due to permission settings",
"oneOf": [
{
"description": "Use the specified credential to access the url",
"type": [
"string",
"null"
]
},
{
"enum": [
"[redacted]"
],
"type": "string"
}
]
},
"endpointUrl": {
"description": "Endpoint URL for the S3 connection (omit to use the default)",
"format": "url",
"type": "string",
"x-versionadded": "v2.29"
},
"format": {
"default": "csv",
"description": "Type of output file format",
"enum": [
"csv",
"parquet"
],
"type": "string",
"x-versionadded": "v2.25"
},
"partitionColumns": {
"description": "For Parquet directory-scoring only. The column names of the intake data of which to partition the dataset. Columns are partitioned in the order they are given. At least one value is required if scoring to a directory (meaning the output url ends with a slash (\"/\").",
"items": {
"type": "string"
},
"maxItems": 100,
"type": "array",
"x-versionadded": "v2.26"
},
"serverSideEncryption": {
"description": "Configure Server-Side Encryption for S3 output",
"properties": {
"algorithm": {
"description": "The server-side encryption algorithm used when storing this object in Amazon S3 (for example, AES256, aws:kms).",
"type": "string"
},
"customerAlgorithm": {
"description": "Specifies the algorithm to use to when encrypting the object (for example, AES256).",
"type": "string"
},
"customerKey": {
"description": "Specifies the customer-provided encryption key for Amazon S3 to use in encrypting data. This value is used to store the object and then it is discarded; Amazon S3 does not store the encryption key. The key must be appropriate for use with the algorithm specified in customerAlgorithm. The key must be sent as an base64 encoded string.",
"type": "string"
},
"kmsEncryptionContext": {
"description": "Specifies the Amazon Web Services KMS Encryption Context to use for object encryption. The value of this header is a base64-encoded UTF-8 string holding JSON with the encryption context key-value pairs.",
"type": "string"
},
"kmsKeyId": {
"description": "Specifies the ID of the symmetric customer managed key to use for object encryption.",
"type": "string"
}
},
"type": "object"
},
"type": {
"description": "Type name for this output type",
"enum": [
"s3"
],
"type": "string"
},
"url": {
"description": "URL for the CSV file",
"format": "url",
"type": "string"
}
},
"required": [
"type",
"url"
],
"type": "object"
},
{
"description": "Save CSV data chunks to Snowflake in bulk",
"properties": {
"catalog": {
"description": "The name of the specified database catalog to write output data to.",
"type": "string",
"x-versionadded": "v2.28"
},
"cloudStorageCredentialId": {
"description": "Either the populated value of the field or [redacted] due to permission settings",
"oneOf": [
{
"description": "The ID of the credential holding information about a user with write access to the cloud storage.",
"type": [
"string",
"null"
]
},
{
"enum": [
"[redacted]"
],
"type": "string"
}
]
},
"cloudStorageType": {
"default": "s3",
"description": "Type name for cloud storage",
"enum": [
"azure",
"gcp",
"s3"
],
"type": "string"
},
"createTableIfNotExists": {
"default": false,
"description": "Attempt to create the table first if no existing one is detected, before writing data with the strategy defined in the `statementType` parameter.",
"type": "boolean",
"x-versionadded": "v2.25"
},
"credentialId": {
"description": "Either the populated value of the field or [redacted] due to permission settings",
"oneOf": [
{
"description": "The ID of the credential holding information about a user with write access to the Snowflake data source.",
"type": [
"string",
"null"
]
},
{
"enum": [
"[redacted]"
],
"type": "string"
}
]
},
"dataStoreId": {
"description": "Either the populated value of the field or [redacted] due to permission settings",
"oneOf": [
{
"description": "ID of the data store to connect to",
"type": "string"
},
{
"enum": [
"[redacted]"
],
"type": "string"
}
]
},
"externalStage": {
"description": "External storage",
"type": "string"
},
"schema": {
"description": "The name of the specified database schema to write results to.",
"type": "string"
},
"statementType": {
"description": "The statement type to use when writing the results.",
"enum": [
"insert",
"create_table",
"createTable"
],
"type": "string"
},
"table": {
"description": "The name of the specified database table to write results to.",
"type": "string"
},
"type": {
"description": "Type name for this output type",
"enum": [
"snowflake"
],
"type": "string"
}
},
"required": [
"dataStoreId",
"externalStage",
"statementType",
"table",
"type"
],
"type": "object"
},
{
"description": "Save CSV data chunks to Azure Synapse in bulk",
"properties": {
"cloudStorageCredentialId": {
"description": "Either the populated value of the field or [redacted] due to permission settings",
"oneOf": [
{
"description": "The ID of the credential holding information about a user with write access to the cloud storage.",
"type": [
"string",
"null"
]
},
{
"enum": [
"[redacted]"
],
"type": "string"
}
]
},
"createTableIfNotExists": {
"default": false,
"description": "Attempt to create the table first if no existing one is detected, before writing data with the strategy defined in the `statementType` parameter.",
"type": "boolean",
"x-versionadded": "v2.25"
},
"credentialId": {
"description": "Either the populated value of the field or [redacted] due to permission settings",
"oneOf": [
{
"description": "The ID of the credential holding information about a user with write access to the JDBC data source.",
"type": [
"string",
"null"
]
},
{
"enum": [
"[redacted]"
],
"type": "string"
}
]
},
"dataStoreId": {
"description": "Either the populated value of the field or [redacted] due to permission settings",
"oneOf": [
{
"description": "ID of the data store to connect to",
"type": "string"
},
{
"enum": [
"[redacted]"
],
"type": "string"
}
]
},
"externalDataSource": {
"description": "External data source name",
"type": "string"
},
"schema": {
"description": "The name of the specified database schema to write results to.",
"type": "string"
},
"statementType": {
"description": "The statement type to use when writing the results.",
"enum": [
"insert",
"create_table",
"createTable"
],
"type": "string"
},
"table": {
"description": "The name of the specified database table to write results to.",
"type": "string"
},
"type": {
"description": "Type name for this output type",
"enum": [
"synapse"
],
"type": "string"
}
},
"required": [
"dataStoreId",
"externalDataSource",
"statementType",
"table",
"type"
],
"type": "object"
},
{
"properties": {
"path": {
"description": "Path to results on host filesystem",
"type": "string"
},
"type": {
"description": "Type name for this output type",
"enum": [
"filesystem"
],
"type": "string"
}
},
"required": [
"path",
"type"
],
"type": "object"
},
{
"description": "Save CSV data chunks to HTTP data endpoint",
"properties": {
"headers": {
"description": "Extra headers to send with the request",
"type": "object"
},
"method": {
"description": "Method to use when saving the CSV file",
"enum": [
"POST",
"PUT"
],
"type": "string"
},
"type": {
"description": "Type name for this output type",
"enum": [
"http"
],
"type": "string"
},
"url": {
"description": "URL for the CSV file",
"format": "url",
"type": "string"
}
},
"required": [
"method",
"type",
"url"
],
"type": "object"
},
{
"description": "Save CSV data chunks via JDBC",
"properties": {
"catalog": {
"description": "The name of the specified database catalog to write output data to.",
"type": "string",
"x-versionadded": "v2.22"
},
"commitInterval": {
"default": 600,
"description": "Defines a time interval in seconds between each commit is done to the JDBC source. If set to 0, the batch prediction operation will write the entire job before committing.",
"maximum": 86400,
"minimum": 0,
"type": "integer",
"x-versionadded": "v2.21"
},
"createTableIfNotExists": {
"default": false,
"description": "Attempt to create the table first if no existing one is detected, before writing data with the strategy defined in the `statementType` parameter.",
"type": "boolean",
"x-versionadded": "v2.24"
},
"credentialId": {
"description": "Either the populated value of the field or [redacted] due to permission settings",
"oneOf": [
{
"description": "The ID of the credential holding information about a user with write access to the JDBC data source.",
"type": [
"string",
"null"
]
},
{
"enum": [
"[redacted]"
],
"type": "string"
}
]
},
"dataStoreId": {
"description": "Either the populated value of the field or [redacted] due to permission settings",
"oneOf": [
{
"description": "ID of the data store to connect to",
"type": "string"
},
{
"enum": [
"[redacted]"
],
"type": "string"
}
]
},
"schema": {
"description": "The name of the specified database schema to write the results to.",
"type": "string"
},
"statementType": {
"description": "The statement type to use when writing the results. Deprecation Warning: Use of `create_table` is now discouraged. Use one of the other possibilities along with the parameter `createTableIfNotExists` set to `true`.",
"enum": [
"createTable",
"create_table",
"insert",
"insertUpdate",
"insert_update",
"update"
],
"type": "string"
},
"table": {
"description": "The name of the specified database table to write the results to.If this job is executed with a job definition, then template variables are available which will be substituted for timestamps: {{ current_run_timestamp }}, {{ last_completed_run_time }}, {{ last_scheduled_run_time }}, {{ next_scheduled_run_time }}, {{ current_run_time }}",
"type": "string"
},
"type": {
"description": "Type name for this intake type",
"enum": [
"jdbc"
],
"type": "string"
},
"updateColumns": {
"description": "The column names to be updated if statementType is set to either update or upsert.",
"items": {
"type": "string"
},
"maxItems": 100,
"type": "array"
},
"whereColumns": {
"description": "The column names to be used in the where clause if statementType is set to update or upsert.",
"items": {
"type": "string"
},
"maxItems": 100,
"type": "array"
}
},
"required": [
"dataStoreId",
"statementType",
"table",
"type"
],
"type": "object"
},
{
"description": "Save CSV data chunks to local file storage",
"properties": {
"type": {
"description": "Type name for this output type",
"enum": [
"local_file",
"localFile"
],
"type": "string"
}
},
"required": [
"type"
],
"type": "object"
},
{
"description": "Saves CSV data chunks to Datasphere using browser-datasphere",
"properties": {
"catalog": {
"description": "The name of the specified database catalog to read input data from.",
"type": "string"
},
"credentialId": {
"description": "Either the populated value of the field or [redacted] due to permission settings",
"oneOf": [
{
"description": "The ID of the credential holding information about a user with write access to the data destination.",
"type": "string"
},
{
"enum": [
"[redacted]"
],
"type": "string"
}
]
},
"dataStoreId": {
"description": "Either the populated value of the field or [redacted] due to permission settings",
"oneOf": [
{
"description": "The ID of the data store to connect to",
"type": "string"
},
{
"enum": [
"[redacted]"
],
"type": "string"
}
]
},
"schema": {
"description": "The name of the specified database schema to read input data from.",
"type": "string"
},
"table": {
"description": "The name of the specified database table to read input data from.",
"type": "string"
},
"type": {
"description": "The type name for this output type",
"enum": [
"datasphere"
],
"type": "string"
}
},
"required": [
"credentialId",
"dataStoreId",
"schema",
"table",
"type"
],
"type": "object",
"x-versionadded": "v2.35"
}
]
},
"passthroughColumns": {
"description": "Pass through columns from the original dataset",
"items": {
"description": "A column name from the original dataset to pass through to the resulting predictions",
"maxLength": 50,
"minLength": 1,
"type": "string"
},
"maxItems": 100,
"type": "array"
},
"passthroughColumnsSet": {
"description": "Pass through all columns from the original dataset",
"enum": [
"all"
],
"type": "string"
},
"pinnedModelId": {
"description": "Specify a model ID used for scoring",
"type": "string"
},
"predictionInstance": {
"description": "Override the default prediction instance from the deployment when scoring this job.",
"properties": {
"apiKey": {
"description": "By default, prediction requests will use the API key of the user that created the job. This allows you to make requests on behalf of other users.",
"type": "string"
},
"datarobotKey": {
"description": "If running a job against a prediction instance in the Managed AI Cloud, you must provide the organization level DataRobot-Key.",
"type": "string"
},
"hostName": {
"description": "Override the default host name of the deployment with this.",
"type": "string"
},
"sslEnabled": {
"default": true,
"description": "Use SSL (HTTPS) when communicating with the overriden prediction server.",
"type": "boolean"
}
},
"required": [
"hostName",
"sslEnabled"
],
"type": "object"
},
"predictionWarningEnabled": {
"description": "Enable prediction warnings.",
"type": [
"boolean",
"null"
]
},
"redactedFields": {
"description": "A list of qualified field names from intake- and/or outputSettings that was redacted due to permissions and sharing settings. For example: intakeSettings.dataStoreId",
"items": {
"description": "Field names that are potentially redacted",
"type": "string"
},
"type": "array",
"x-versionadded": "v2.30"
},
"skipDriftTracking": {
"default": false,
"description": "Skip drift tracking for this job.",
"type": "boolean"
},
"thresholdHigh": {
"description": "Compute explanations for predictions above this threshold",
"type": "number"
},
"thresholdLow": {
"description": "Compute explanations for predictions below this threshold",
"type": "number"
},
"timeseriesSettings": {
"description": "Time Series settings included of this job is a Time Series job.",
"oneOf": [
{
"properties": {
"forecastPoint": {
"description": "Used for forecast predictions in order to override the inferred forecast point from the dataset.",
"format": "date-time",
"type": "string"
},
"relaxKnownInAdvanceFeaturesCheck": {
"default": false,
"description": "If activated, missing values in the known in advance features are allowed in the forecast window at prediction time. If omitted or false, missing values are not allowed.",
"type": "boolean"
},
"type": {
"description": "Forecast mode makes predictions using forecastPoint or rows in the dataset without target.",
"enum": [
"forecast"
],
"type": "string"
}
},
"required": [
"type"
],
"type": "object"
},
{
"properties": {
"predictionsEndDate": {
"description": "Used for historical predictions in order to override date to which predictions should be calculated. By default value will be inferred automatically from the dataset.",
"format": "date-time",
"type": "string"
},
"predictionsStartDate": {
"description": "Used for historical predictions in order to override date from which predictions should be calculated. By default value will be inferred automatically from the dataset.",
"format": "date-time",
"type": "string"
},
"relaxKnownInAdvanceFeaturesCheck": {
"default": false,
"description": "If activated, missing values in the known in advance features are allowed in the forecast window at prediction time. If omitted or false, missing values are not allowed.",
"type": "boolean"
},
"type": {
"description": "Historical mode enables bulk predictions which calculates predictions for all possible forecast points and forecast distances in the dataset within the predictionsStartDate/predictionsEndDate range.",
"enum": [
"historical"
],
"type": "string"
}
},
"required": [
"type"
],
"type": "object"
}
],
"x-versionadded": "v2.30"
}
},
"required": [
"abortOnError",
"csvSettings",
"disableRowLevelErrorHandling",
"includePredictionStatus",
"includeProbabilities",
"includeProbabilitiesClasses",
"intakeSettings",
"maxExplanations",
"redactedFields",
"skipDriftTracking"
],
"type": "object",
"x-versionadded": "v2.35"
}
The job configuration used to create this job
Properties
Name |
Type |
Required |
Restrictions |
Description |
abortOnError |
boolean |
true |
|
Should this job abort if too many errors are encountered |
batchJobType |
string |
false |
|
Batch job type. |
chunkSize |
any |
false |
|
Which strategy should be used to determine the chunk size. Can be either a named strategy or a fixed size in bytes. |
oneOf
Name |
Type |
Required |
Restrictions |
Description |
» anonymous |
string |
false |
|
none |
xor
Name |
Type |
Required |
Restrictions |
Description |
» anonymous |
integer |
false |
maximum: 41943040 minimum: 20
|
none |
continued
Name |
Type |
Required |
Restrictions |
Description |
columnNamesRemapping |
any |
false |
|
Remap (rename or remove columns from) the output from this job |
oneOf
Name |
Type |
Required |
Restrictions |
Description |
» anonymous |
object |
false |
|
Provide a dictionary with key/value pairs to remap (deprecated) |
xor
Name |
Type |
Required |
Restrictions |
Description |
» anonymous |
[BatchJobRemapping] |
false |
maxItems: 1000
|
Provide a list of items to remap |
continued
Name |
Type |
Required |
Restrictions |
Description |
csvSettings |
BatchJobCSVSettings |
true |
|
The CSV settings used for this job |
deploymentId |
string |
false |
|
ID of deployment which is used in job for processing predictions dataset |
disableRowLevelErrorHandling |
boolean |
true |
|
Skip row by row error handling |
explanationAlgorithm |
string |
false |
|
Which algorithm will be used to calculate prediction explanations |
explanationClassNames |
[string] |
false |
maxItems: 10 minItems: 1
|
List of class names that will be explained for each row for multiclass. Mutually exclusive with explanationNumTopClasses. If neither specified - we assume explanationNumTopClasses=1 |
explanationNumTopClasses |
integer |
false |
maximum: 10 minimum: 1
|
Number of top predicted classes for each row that will be explained for multiclass. Mutually exclusive with explanationClassNames. If neither specified - we assume explanationNumTopClasses=1 |
includePredictionStatus |
boolean |
true |
|
Include prediction status column in the output |
includeProbabilities |
boolean |
true |
|
Include probabilities for all classes |
includeProbabilitiesClasses |
[string] |
true |
maxItems: 100
|
Include only probabilities for these specific class names. |
intakeSettings |
any |
true |
|
The response option configured for this job |
oneOf
Name |
Type |
Required |
Restrictions |
Description |
» anonymous |
AzureDataStreamer |
false |
|
Stream CSV data chunks from Azure |
xor
Name |
Type |
Required |
Restrictions |
Description |
» anonymous |
DataStageDataStreamer |
false |
|
Stream CSV data chunks from data stage storage |
xor
Name |
Type |
Required |
Restrictions |
Description |
» anonymous |
CatalogDataStreamer |
false |
|
Stream CSV data chunks from AI catalog dataset |
xor
Name |
Type |
Required |
Restrictions |
Description |
» anonymous |
GCPDataStreamer |
false |
|
Stream CSV data chunks from Google Storage |
xor
Name |
Type |
Required |
Restrictions |
Description |
» anonymous |
BigQueryDataStreamer |
false |
|
Stream CSV data chunks from Big Query using GCS |
xor
Name |
Type |
Required |
Restrictions |
Description |
» anonymous |
S3DataStreamer |
false |
|
Stream CSV data chunks from Amazon Cloud Storage S3 |
xor
Name |
Type |
Required |
Restrictions |
Description |
» anonymous |
SnowflakeDataStreamer |
false |
|
Stream CSV data chunks from Snowflake |
xor
Name |
Type |
Required |
Restrictions |
Description |
» anonymous |
SynapseDataStreamer |
false |
|
Stream CSV data chunks from Azure Synapse |
xor
Name |
Type |
Required |
Restrictions |
Description |
» anonymous |
DSSDataStreamer |
false |
|
Stream CSV data chunks from DSS dataset |
xor
xor
Name |
Type |
Required |
Restrictions |
Description |
» anonymous |
HTTPDataStreamer |
false |
|
Stream CSV data chunks from HTTP |
xor
Name |
Type |
Required |
Restrictions |
Description |
» anonymous |
JDBCDataStreamer |
false |
|
Stream CSV data chunks from JDBC |
xor
Name |
Type |
Required |
Restrictions |
Description |
» anonymous |
LocalFileDataStreamer |
false |
|
Stream CSV data chunks from local file storage |
xor
Name |
Type |
Required |
Restrictions |
Description |
» anonymous |
DatasphereDataStreamer |
false |
|
Stream CSV data chunks from Datasphere using browser-datasphere |
continued
Name |
Type |
Required |
Restrictions |
Description |
maxExplanations |
integer |
true |
maximum: 100 minimum: 0
|
Number of explanations requested. Will be ordered by strength. |
maxNgramExplanations |
any |
false |
|
The maximum number of text ngram explanations to supply per row of the dataset. The default recommended maxNgramExplanations is all (no limit) |
oneOf
Name |
Type |
Required |
Restrictions |
Description |
» anonymous |
integer |
false |
minimum: 0
|
none |
xor
Name |
Type |
Required |
Restrictions |
Description |
» anonymous |
string |
false |
|
none |
continued
Name |
Type |
Required |
Restrictions |
Description |
modelId |
string |
false |
|
ID of leaderboard model which is used in job for processing predictions dataset |
modelPackageId |
string |
false |
|
ID of model package from registry is used in job for processing predictions dataset |
monitoringAggregation |
MonitoringAggregation |
false |
|
Defines the aggregation policy for monitoring jobs. |
monitoringBatchPrefix |
string,null |
false |
|
Name of the batch to create with this job |
monitoringColumns |
MonitoringColumnsMapping |
false |
|
Column names mapping for monitoring |
monitoringOutputSettings |
MonitoringOutputSettings |
false |
|
Output settings for monitoring jobs |
numConcurrent |
integer |
false |
minimum: 1
|
Number of simultaneous requests to run against the prediction instance |
outputSettings |
any |
false |
|
The response option configured for this job |
oneOf
Name |
Type |
Required |
Restrictions |
Description |
» anonymous |
AzureOutputAdaptor |
false |
|
Save CSV data chunks to Azure Blob Storage |
xor
Name |
Type |
Required |
Restrictions |
Description |
» anonymous |
GCPOutputAdaptor |
false |
|
Save CSV data chunks to Google Storage |
xor
Name |
Type |
Required |
Restrictions |
Description |
» anonymous |
BigQueryOutputAdaptor |
false |
|
Save CSV data chunks to Google BigQuery in bulk |
xor
Name |
Type |
Required |
Restrictions |
Description |
» anonymous |
S3OutputAdaptor |
false |
|
Saves CSV data chunks to Amazon Cloud Storage S3 |
xor
Name |
Type |
Required |
Restrictions |
Description |
» anonymous |
SnowflakeOutputAdaptor |
false |
|
Save CSV data chunks to Snowflake in bulk |
xor
Name |
Type |
Required |
Restrictions |
Description |
» anonymous |
SynapseOutputAdaptor |
false |
|
Save CSV data chunks to Azure Synapse in bulk |
xor
xor
Name |
Type |
Required |
Restrictions |
Description |
» anonymous |
HttpOutputAdaptor |
false |
|
Save CSV data chunks to HTTP data endpoint |
xor
Name |
Type |
Required |
Restrictions |
Description |
» anonymous |
JdbcOutputAdaptor |
false |
|
Save CSV data chunks via JDBC |
xor
Name |
Type |
Required |
Restrictions |
Description |
» anonymous |
LocalFileOutputAdaptor |
false |
|
Save CSV data chunks to local file storage |
xor
Name |
Type |
Required |
Restrictions |
Description |
» anonymous |
DatasphereOutputAdatpor |
false |
|
Saves CSV data chunks to Datasphere using browser-datasphere |
continued
Name |
Type |
Required |
Restrictions |
Description |
passthroughColumns |
[string] |
false |
maxItems: 100
|
Pass through columns from the original dataset |
passthroughColumnsSet |
string |
false |
|
Pass through all columns from the original dataset |
pinnedModelId |
string |
false |
|
Specify a model ID used for scoring |
predictionInstance |
BatchJobPredictionInstance |
false |
|
Override the default prediction instance from the deployment when scoring this job. |
predictionWarningEnabled |
boolean,null |
false |
|
Enable prediction warnings. |
redactedFields |
[string] |
true |
|
A list of qualified field names from intake- and/or outputSettings that was redacted due to permissions and sharing settings. For example: intakeSettings.dataStoreId |
skipDriftTracking |
boolean |
true |
|
Skip drift tracking for this job. |
thresholdHigh |
number |
false |
|
Compute explanations for predictions above this threshold |
thresholdLow |
number |
false |
|
Compute explanations for predictions below this threshold |
timeseriesSettings |
any |
false |
|
Time Series settings included of this job is a Time Series job. |
oneOf
xor
Enumerated Values
Property |
Value |
batchJobType |
[monitoring , prediction ] |
anonymous |
[auto , fixed , dynamic ] |
explanationAlgorithm |
[shap , xemp ] |
anonymous |
all |
passthroughColumnsSet |
all |
BatchJobTimeSeriesSettingsForecast
{
"properties": {
"forecastPoint": {
"description": "Used for forecast predictions in order to override the inferred forecast point from the dataset.",
"format": "date-time",
"type": "string"
},
"relaxKnownInAdvanceFeaturesCheck": {
"default": false,
"description": "If activated, missing values in the known in advance features are allowed in the forecast window at prediction time. If omitted or false, missing values are not allowed.",
"type": "boolean"
},
"type": {
"description": "Forecast mode makes predictions using forecastPoint or rows in the dataset without target.",
"enum": [
"forecast"
],
"type": "string"
}
},
"required": [
"type"
],
"type": "object"
}
Properties
Name |
Type |
Required |
Restrictions |
Description |
forecastPoint |
string(date-time) |
false |
|
Used for forecast predictions in order to override the inferred forecast point from the dataset. |
relaxKnownInAdvanceFeaturesCheck |
boolean |
false |
|
If activated, missing values in the known in advance features are allowed in the forecast window at prediction time. If omitted or false, missing values are not allowed. |
type |
string |
true |
|
Forecast mode makes predictions using forecastPoint or rows in the dataset without target. |
Enumerated Values
Property |
Value |
type |
forecast |
BatchJobTimeSeriesSettingsHistorical
{
"properties": {
"predictionsEndDate": {
"description": "Used for historical predictions in order to override date to which predictions should be calculated. By default value will be inferred automatically from the dataset.",
"format": "date-time",
"type": "string"
},
"predictionsStartDate": {
"description": "Used for historical predictions in order to override date from which predictions should be calculated. By default value will be inferred automatically from the dataset.",
"format": "date-time",
"type": "string"
},
"relaxKnownInAdvanceFeaturesCheck": {
"default": false,
"description": "If activated, missing values in the known in advance features are allowed in the forecast window at prediction time. If omitted or false, missing values are not allowed.",
"type": "boolean"
},
"type": {
"description": "Historical mode enables bulk predictions which calculates predictions for all possible forecast points and forecast distances in the dataset within the predictionsStartDate/predictionsEndDate range.",
"enum": [
"historical"
],
"type": "string"
}
},
"required": [
"type"
],
"type": "object"
}
Properties
Name |
Type |
Required |
Restrictions |
Description |
predictionsEndDate |
string(date-time) |
false |
|
Used for historical predictions in order to override date to which predictions should be calculated. By default value will be inferred automatically from the dataset. |
predictionsStartDate |
string(date-time) |
false |
|
Used for historical predictions in order to override date from which predictions should be calculated. By default value will be inferred automatically from the dataset. |
relaxKnownInAdvanceFeaturesCheck |
boolean |
false |
|
If activated, missing values in the known in advance features are allowed in the forecast window at prediction time. If omitted or false, missing values are not allowed. |
type |
string |
true |
|
Historical mode enables bulk predictions which calculates predictions for all possible forecast points and forecast distances in the dataset within the predictionsStartDate/predictionsEndDate range. |
Enumerated Values
Property |
Value |
type |
historical |
BatchPredictionCreatedBy
{
"description": "Who created this job",
"properties": {
"fullName": {
"description": "The full name of the user who created this job (if defined by the user)",
"type": [
"string",
"null"
]
},
"userId": {
"description": "The User ID of the user who created this job",
"type": "string"
},
"username": {
"description": "The username (e-mail address) of the user who created this job",
"type": "string"
}
},
"required": [
"fullName",
"userId",
"username"
],
"type": "object"
}
Who created this job
Properties
Name |
Type |
Required |
Restrictions |
Description |
fullName |
string,null |
true |
|
The full name of the user who created this job (if defined by the user) |
userId |
string |
true |
|
The User ID of the user who created this job |
username |
string |
true |
|
The username (e-mail address) of the user who created this job |
BatchPredictionJobCSVSettings
{
"description": "The CSV settings used for this job",
"properties": {
"delimiter": {
"default": ",",
"description": "CSV fields are delimited by this character. Use the string \"tab\" to denote TSV (TAB separated values).",
"oneOf": [
{
"enum": [
"tab"
],
"type": "string"
},
{
"maxLength": 1,
"minLength": 1,
"type": "string"
}
]
},
"encoding": {
"default": "utf-8",
"description": "The encoding to be used for intake and output. For example (but not limited to): \"shift_jis\", \"latin_1\" or \"mskanji\".",
"type": "string"
},
"quotechar": {
"default": "\"",
"description": "Fields containing the delimiter or newlines must be quoted using this character.",
"maxLength": 1,
"minLength": 1,
"type": "string"
}
},
"required": [
"delimiter",
"encoding",
"quotechar"
],
"type": "object"
}
The CSV settings used for this job
Properties
Name |
Type |
Required |
Restrictions |
Description |
delimiter |
any |
true |
|
CSV fields are delimited by this character. Use the string "tab" to denote TSV (TAB separated values). |
oneOf
Name |
Type |
Required |
Restrictions |
Description |
» anonymous |
string |
false |
|
none |
xor
Name |
Type |
Required |
Restrictions |
Description |
» anonymous |
string |
false |
maxLength: 1 minLength: 1 minLength: 1
|
none |
continued
Name |
Type |
Required |
Restrictions |
Description |
encoding |
string |
true |
|
The encoding to be used for intake and output. For example (but not limited to): "shift_jis", "latin_1" or "mskanji". |
quotechar |
string |
true |
maxLength: 1 minLength: 1 minLength: 1
|
Fields containing the delimiter or newlines must be quoted using this character. |
Enumerated Values
Property |
Value |
anonymous |
tab |
BatchPredictionJobCreate
{
"properties": {
"abortOnError": {
"default": true,
"description": "Should this job abort if too many errors are encountered",
"type": "boolean"
},
"batchJobType": {
"default": "prediction",
"description": "Batch job type.",
"enum": [
"monitoring",
"prediction"
],
"type": "string",
"x-versionadded": "v2.35"
},
"chunkSize": {
"default": "auto",
"description": "Which strategy should be used to determine the chunk size. Can be either a named strategy or a fixed size in bytes.",
"oneOf": [
{
"enum": [
"auto",
"fixed",
"dynamic"
],
"type": "string"
},
{
"maximum": 41943040,
"minimum": 20,
"type": "integer"
}
]
},
"columnNamesRemapping": {
"description": "Remap (rename or remove columns from) the output from this job",
"oneOf": [
{
"description": "Provide a dictionary with key/value pairs to remap (deprecated)",
"type": "object"
},
{
"description": "Provide a list of items to remap",
"items": {
"properties": {
"inputName": {
"description": "Rename column with this name",
"type": "string"
},
"outputName": {
"description": "Rename column to this name (leave as null to remove from the output)",
"type": [
"string",
"null"
]
}
},
"required": [
"inputName",
"outputName"
],
"type": "object"
},
"maxItems": 1000,
"type": "array"
}
]
},
"csvSettings": {
"description": "The CSV settings used for this job",
"properties": {
"delimiter": {
"default": ",",
"description": "CSV fields are delimited by this character. Use the string \"tab\" to denote TSV (TAB separated values).",
"oneOf": [
{
"enum": [
"tab"
],
"type": "string"
},
{
"maxLength": 1,
"minLength": 1,
"type": "string"
}
]
},
"encoding": {
"default": "utf-8",
"description": "The encoding to be used for intake and output. For example (but not limited to): \"shift_jis\", \"latin_1\" or \"mskanji\".",
"type": "string"
},
"quotechar": {
"default": "\"",
"description": "Fields containing the delimiter or newlines must be quoted using this character.",
"maxLength": 1,
"minLength": 1,
"type": "string"
}
},
"required": [
"delimiter",
"encoding",
"quotechar"
],
"type": "object"
},
"deploymentId": {
"description": "ID of deployment which is used in job for processing predictions dataset",
"type": "string"
},
"disableRowLevelErrorHandling": {
"default": false,
"description": "Skip row by row error handling",
"type": "boolean"
},
"explanationAlgorithm": {
"description": "Which algorithm will be used to calculate prediction explanations",
"enum": [
"shap",
"xemp"
],
"type": "string"
},
"explanationClassNames": {
"description": "List of class names that will be explained for each row for multiclass. Mutually exclusive with explanationNumTopClasses. If neither specified - we assume explanationNumTopClasses=1",
"items": {
"description": "Class name to explain",
"type": "string"
},
"maxItems": 10,
"minItems": 1,
"type": "array",
"x-versionadded": "v2.29"
},
"explanationNumTopClasses": {
"description": "Number of top predicted classes for each row that will be explained for multiclass. Mutually exclusive with explanationClassNames. If neither specified - we assume explanationNumTopClasses=1",
"maximum": 10,
"minimum": 1,
"type": "integer",
"x-versionadded": "v2.29"
},
"includePredictionStatus": {
"default": false,
"description": "Include prediction status column in the output",
"type": "boolean"
},
"includeProbabilities": {
"default": true,
"description": "Include probabilities for all classes",
"type": "boolean"
},
"includeProbabilitiesClasses": {
"default": [],
"description": "Include only probabilities for these specific class names.",
"items": {
"description": "Include probability for this class name",
"type": "string"
},
"maxItems": 100,
"type": "array"
},
"intakeSettings": {
"default": {
"type": "localFile"
},
"description": "The intake option configured for this job",
"oneOf": [
{
"description": "Stream CSV data chunks from Azure",
"properties": {
"credentialId": {
"description": "Use the specified credential to access the url",
"type": [
"string",
"null"
]
},
"format": {
"default": "csv",
"description": "Type of input file format",
"enum": [
"csv",
"parquet"
],
"type": "string",
"x-versionadded": "v2.25"
},
"type": {
"description": "Type name for this intake type",
"enum": [
"azure"
],
"type": "string"
},
"url": {
"description": "URL for the CSV file",
"format": "url",
"type": "string"
}
},
"required": [
"type",
"url"
],
"type": "object"
},
{
"description": "Stream CSV data chunks from Big Query using GCS",
"properties": {
"bucket": {
"description": "The name of gcs bucket for data export",
"type": "string"
},
"credentialId": {
"description": "The ID of the GCP credentials",
"type": "string"
},
"dataset": {
"description": "The name of the specified big query dataset to read input data from",
"type": "string"
},
"table": {
"description": "The name of the specified big query table to read input data from",
"type": "string"
},
"type": {
"description": "Type name for this intake type",
"enum": [
"bigquery"
],
"type": "string"
}
},
"required": [
"bucket",
"credentialId",
"dataset",
"table",
"type"
],
"type": "object"
},
{
"description": "Stream CSV data chunks from data stage storage",
"properties": {
"dataStageId": {
"description": "The ID of the data stage",
"type": "string"
},
"type": {
"description": "Type name for this intake type",
"enum": [
"dataStage"
],
"type": "string"
}
},
"required": [
"dataStageId",
"type"
],
"type": "object"
},
{
"description": "Stream CSV data chunks from AI catalog dataset",
"properties": {
"datasetId": {
"description": "The ID of the AI catalog dataset",
"type": "string"
},
"datasetVersionId": {
"description": "The ID of the AI catalog dataset version",
"type": "string"
},
"type": {
"description": "Type name for this intake type",
"enum": [
"dataset"
],
"type": "string"
}
},
"required": [
"datasetId",
"type"
],
"type": "object"
},
{
"description": "Stream CSV data chunks from Datasphere using browser-datasphere",
"properties": {
"catalog": {
"description": "The name of the specified database catalog to read input data from.",
"type": "string"
},
"credentialId": {
"description": "The ID of the credential holding information about a user with read access to the data source.",
"type": "string"
},
"dataStoreId": {
"description": "ID of the data store to connect to",
"type": "string"
},
"schema": {
"description": "The name of the specified database schema to read input data from.",
"type": "string"
},
"table": {
"description": "The name of the specified database table to read input data from.",
"type": "string"
},
"type": {
"description": "Type name for this intake type",
"enum": [
"datasphere"
],
"type": "string"
}
},
"required": [
"credentialId",
"dataStoreId",
"schema",
"table",
"type"
],
"type": "object",
"x-versionadded": "v2.35"
},
{
"description": "Stream CSV data chunks from DSS dataset",
"properties": {
"datasetId": {
"description": "The ID of the dataset",
"type": "string"
},
"partition": {
"default": null,
"description": "Partition used to predict",
"enum": [
"holdout",
"validation",
"allBacktests",
null
],
"type": [
"string",
"null"
]
},
"projectId": {
"description": "The ID of the project",
"type": "string"
},
"type": {
"description": "Type name for this intake type",
"enum": [
"dss"
],
"type": "string"
}
},
"required": [
"projectId",
"type"
],
"type": "object"
},
{
"properties": {
"path": {
"description": "Path to data on host filesystem",
"type": "string"
},
"type": {
"description": "Type name for this intake type",
"enum": [
"filesystem"
],
"type": "string"
}
},
"required": [
"path",
"type"
],
"type": "object"
},
{
"description": "Stream CSV data chunks from Google Storage",
"properties": {
"credentialId": {
"description": "Use the specified credential to access the url",
"type": [
"string",
"null"
]
},
"format": {
"default": "csv",
"description": "Type of input file format",
"enum": [
"csv",
"parquet"
],
"type": "string",
"x-versionadded": "v2.25"
},
"type": {
"description": "Type name for this intake type",
"enum": [
"gcp"
],
"type": "string"
},
"url": {
"description": "URL for the CSV file",
"format": "url",
"type": "string"
}
},
"required": [
"type",
"url"
],
"type": "object"
},
{
"description": "Stream CSV data chunks from HTTP",
"properties": {
"type": {
"description": "Type name for this intake type",
"enum": [
"http"
],
"type": "string"
},
"url": {
"description": "URL for the CSV file",
"format": "url",
"type": "string"
}
},
"required": [
"type",
"url"
],
"type": "object"
},
{
"description": "Stream CSV data chunks from JDBC",
"properties": {
"catalog": {
"description": "The name of the specified database catalog to read input data from.",
"type": "string",
"x-versionadded": "v2.22"
},
"credentialId": {
"description": "The ID of the credential holding information about a user with read access to the JDBC data source.",
"type": [
"string",
"null"
]
},
"dataStoreId": {
"description": "ID of the data store to connect to",
"type": "string"
},
"fetchSize": {
"description": "A user specified fetch size. Changing it can be used to balance throughput and memory usage. Deprecated and ignored since v2.21.",
"maximum": 1000000,
"minimum": 1,
"type": "integer"
},
"query": {
"description": "A self-supplied SELECT statement of the dataset you wish to score. Helpful for supplying a more fine-grained selection of data not achievable through specification of \"table\" and/or \"schema\" parameters exclusively.If this job is executed with a job definition, then template variables are available which will be substituted for timestamps: {{ current_run_timestamp }}, {{ last_completed_run_time }}, {{ last_scheduled_run_time }}, {{ next_scheduled_run_time }}, {{ current_run_time }}",
"type": "string"
},
"schema": {
"description": "The name of the specified database schema to read input data from.",
"type": "string"
},
"table": {
"description": "The name of the specified database table to read input data from.",
"type": "string"
},
"type": {
"description": "Type name for this intake type",
"enum": [
"jdbc"
],
"type": "string"
}
},
"required": [
"dataStoreId",
"type"
],
"type": "object"
},
{
"description": "Stream CSV data chunks from local file storage",
"properties": {
"async": {
"description": "The default behavior (async: true) will still submit the job to the queue and start processing as soon as the upload is started.Setting it to false will postpone submitting the job to the queue until all data has been uploaded.This is helpful if the user is on a bad connection and bottlednecked by the upload speed. Instead of blocking the queue this will allow others to submit to the queue until the upload has finished.",
"type": [
"boolean",
"null"
],
"x-versionadded": "v2.28"
},
"multipart": {
"description": "specify if the data will be uploaded in multiple parts instead of a single file",
"type": "boolean",
"x-versionadded": "v2.27"
},
"type": {
"description": "Type name for this intake type",
"enum": [
"local_file",
"localFile"
],
"type": "string"
}
},
"required": [
"type"
],
"type": "object"
},
{
"description": "Stream CSV data chunks from Amazon Cloud Storage S3",
"properties": {
"credentialId": {
"description": "Use the specified credential to access the url",
"type": [
"string",
"null"
]
},
"endpointUrl": {
"description": "Endpoint URL for the S3 connection (omit to use the default)",
"format": "url",
"type": "string",
"x-versionadded": "v2.29"
},
"format": {
"default": "csv",
"description": "Type of input file format",
"enum": [
"csv",
"parquet"
],
"type": "string",
"x-versionadded": "v2.25"
},
"type": {
"description": "Type name for this intake type",
"enum": [
"s3"
],
"type": "string"
},
"url": {
"description": "URL for the CSV file",
"format": "url",
"type": "string"
}
},
"required": [
"type",
"url"
],
"type": "object"
},
{
"description": "Stream CSV data chunks from Snowflake",
"properties": {
"catalog": {
"description": "The name of the specified database catalog to read input data from.",
"type": "string",
"x-versionadded": "v2.28"
},
"cloudStorageCredentialId": {
"description": "The ID of the credential holding information about a user with read access to the cloud storage.",
"type": [
"string",
"null"
]
},
"cloudStorageType": {
"default": "s3",
"description": "Type name for cloud storage",
"enum": [
"azure",
"gcp",
"s3"
],
"type": "string"
},
"credentialId": {
"description": "The ID of the credential holding information about a user with read access to the Snowflake data source.",
"type": [
"string",
"null"
]
},
"dataStoreId": {
"description": "ID of the data store to connect to",
"type": "string"
},
"externalStage": {
"description": "External storage",
"type": "string"
},
"query": {
"description": "A self-supplied SELECT statement of the dataset you wish to score. Helpful for supplying a more fine-grained selection of data not achievable through specification of \"table\" and/or \"schema\" parameters exclusively.If this job is executed with a job definition, then template variables are available which will be substituted for timestamps: {{ current_run_timestamp }}, {{ last_completed_run_time }}, {{ last_scheduled_run_time }}, {{ next_scheduled_run_time }}, {{ current_run_time }}",
"type": "string"
},
"schema": {
"description": "The name of the specified database schema to read input data from.",
"type": "string"
},
"table": {
"description": "The name of the specified database table to read input data from.",
"type": "string"
},
"type": {
"description": "Type name for this intake type",
"enum": [
"snowflake"
],
"type": "string"
}
},
"required": [
"dataStoreId",
"externalStage",
"type"
],
"type": "object"
},
{
"description": "Stream CSV data chunks from Azure Synapse",
"properties": {
"cloudStorageCredentialId": {
"description": "The ID of the Azure credential holding information about a user with read access to the cloud storage.",
"type": [
"string",
"null"
]
},
"credentialId": {
"description": "The ID of the credential holding information about a user with read access to the JDBC data source.",
"type": [
"string",
"null"
]
},
"dataStoreId": {
"description": "ID of the data store to connect to",
"type": "string"
},
"externalDataSource": {
"description": "External datasource name",
"type": "string"
},
"query": {
"description": "A self-supplied SELECT statement of the dataset you wish to score. Helpful for supplying a more fine-grained selection of data not achievable through specification of \"table\" and/or \"schema\" parameters exclusively.If this job is executed with a job definition, then template variables are available which will be substituted for timestamps: {{ current_run_timestamp }}, {{ last_completed_run_time }}, {{ last_scheduled_run_time }}, {{ next_scheduled_run_time }}, {{ current_run_time }}",
"type": "string"
},
"schema": {
"description": "The name of the specified database schema to read input data from.",
"type": "string"
},
"table": {
"description": "The name of the specified database table to read input data from.",
"type": "string"
},
"type": {
"description": "Type name for this intake type",
"enum": [
"synapse"
],
"type": "string"
}
},
"required": [
"dataStoreId",
"externalDataSource",
"type"
],
"type": "object"
}
]
},
"maxExplanations": {
"default": 0,
"description": "Number of explanations requested. Will be ordered by strength.",
"maximum": 100,
"minimum": 0,
"type": "integer"
},
"modelId": {
"description": "ID of leaderboard model which is used in job for processing predictions dataset",
"type": "string",
"x-versionadded": "v2.28"
},
"modelPackageId": {
"description": "ID of model package from registry is used in job for processing predictions dataset",
"type": "string",
"x-versionadded": "v2.28"
},
"monitoringBatchPrefix": {
"description": "Name of the batch to create with this job",
"type": [
"string",
"null"
]
},
"numConcurrent": {
"description": "Number of simultaneous requests to run against the prediction instance",
"minimum": 1,
"type": "integer"
},
"outputSettings": {
"description": "The output option configured for this job",
"oneOf": [
{
"description": "Save CSV data chunks to Azure Blob Storage",
"properties": {
"credentialId": {
"description": "Use the specified credential to access the url",
"type": [
"string",
"null"
]
},
"format": {
"default": "csv",
"description": "Type of output file format",
"enum": [
"csv",
"parquet"
],
"type": "string",
"x-versionadded": "v2.25"
},
"partitionColumns": {
"description": "For Parquet directory-scoring only. The column names of the intake data of which to partition the dataset. Columns are partitioned in the order they are given. At least one value is required if scoring to a directory (meaning the output url ends with a slash (\"/\").",
"items": {
"type": "string"
},
"maxItems": 100,
"type": "array",
"x-versionadded": "v2.26"
},
"type": {
"description": "Type name for this output type",
"enum": [
"azure"
],
"type": "string"
},
"url": {
"description": "URL for the file or directory",
"format": "url",
"type": "string"
}
},
"required": [
"type",
"url"
],
"type": "object"
},
{
"description": "Save CSV data chunks to Google BigQuery in bulk",
"properties": {
"bucket": {
"description": "The name of gcs bucket for data loading",
"type": "string"
},
"credentialId": {
"description": "The ID of the GCP credentials",
"type": "string"
},
"dataset": {
"description": "The name of the specified big query dataset to write data back",
"type": "string"
},
"table": {
"description": "The name of the specified big query table to write data back",
"type": "string"
},
"type": {
"description": "Type name for this output type",
"enum": [
"bigquery"
],
"type": "string"
}
},
"required": [
"bucket",
"credentialId",
"dataset",
"table",
"type"
],
"type": "object"
},
{
"description": "Saves CSV data chunks to Datasphere using browser-datasphere",
"properties": {
"catalog": {
"description": "The name of the specified database catalog to read input data from.",
"type": "string"
},
"credentialId": {
"description": "The ID of the credential holding information about a user with write access to the data destination.",
"type": "string"
},
"dataStoreId": {
"description": "The ID of the data store to connect to",
"type": "string"
},
"schema": {
"description": "The name of the specified database schema to read input data from.",
"type": "string"
},
"table": {
"description": "The name of the specified database table to read input data from.",
"type": "string"
},
"type": {
"description": "The type name for this output type",
"enum": [
"datasphere"
],
"type": "string"
}
},
"required": [
"credentialId",
"dataStoreId",
"schema",
"table",
"type"
],
"type": "object",
"x-versionadded": "v2.35"
},
{
"properties": {
"path": {
"description": "Path to results on host filesystem",
"type": "string"
},
"type": {
"description": "Type name for this output type",
"enum": [
"filesystem"
],
"type": "string"
}
},
"required": [
"path",
"type"
],
"type": "object"
},
{
"description": "Save CSV data chunks to Google Storage",
"properties": {
"credentialId": {
"description": "Use the specified credential to access the url",
"type": [
"string",
"null"
]
},
"format": {
"default": "csv",
"description": "Type of input file format",
"enum": [
"csv",
"parquet"
],
"type": "string",
"x-versionadded": "v2.25"
},
"partitionColumns": {
"description": "For Parquet directory-scoring only. The column names of the intake data of which to partition the dataset. Columns are partitioned in the order they are given. At least one value is required if scoring to a directory (meaning the output url ends with a slash (\"/\").",
"items": {
"type": "string"
},
"maxItems": 100,
"type": "array",
"x-versionadded": "v2.26"
},
"type": {
"description": "Type name for this output type",
"enum": [
"gcp"
],
"type": "string"
},
"url": {
"description": "URL for the CSV file",
"format": "url",
"type": "string"
}
},
"required": [
"type",
"url"
],
"type": "object"
},
{
"description": "Save CSV data chunks to HTTP data endpoint",
"properties": {
"headers": {
"description": "Extra headers to send with the request",
"type": "object"
},
"method": {
"description": "Method to use when saving the CSV file",
"enum": [
"POST",
"PUT"
],
"type": "string"
},
"type": {
"description": "Type name for this output type",
"enum": [
"http"
],
"type": "string"
},
"url": {
"description": "URL for the CSV file",
"format": "url",
"type": "string"
}
},
"required": [
"method",
"type",
"url"
],
"type": "object"
},
{
"description": "Save CSV data chunks via JDBC",
"properties": {
"catalog": {
"description": "The name of the specified database catalog to write output data to.",
"type": "string",
"x-versionadded": "v2.22"
},
"commitInterval": {
"default": 600,
"description": "Defines a time interval in seconds between each commit is done to the JDBC source. If set to 0, the batch prediction operation will write the entire job before committing.",
"maximum": 86400,
"minimum": 0,
"type": "integer",
"x-versionadded": "v2.21"
},
"createTableIfNotExists": {
"default": false,
"description": "Attempt to create the table first if no existing one is detected, before writing data with the strategy defined in the `statementType` parameter.",
"type": "boolean",
"x-versionadded": "v2.24"
},
"credentialId": {
"description": "The ID of the credential holding information about a user with write access to the JDBC data source.",
"type": [
"string",
"null"
]
},
"dataStoreId": {
"description": "ID of the data store to connect to",
"type": "string"
},
"schema": {
"description": "The name of the specified database schema to write the results to.",
"type": "string"
},
"statementType": {
"description": "The statement type to use when writing the results. Deprecation Warning: Use of `create_table` is now discouraged. Use one of the other possibilities along with the parameter `createTableIfNotExists` set to `true`.",
"enum": [
"createTable",
"create_table",
"insert",
"insertUpdate",
"insert_update",
"update"
],
"type": "string"
},
"table": {
"description": "The name of the specified database table to write the results to.If this job is executed with a job definition, then template variables are available which will be substituted for timestamps: {{ current_run_timestamp }}, {{ last_completed_run_time }}, {{ last_scheduled_run_time }}, {{ next_scheduled_run_time }}, {{ current_run_time }}",
"type": "string"
},
"type": {
"description": "Type name for this intake type",
"enum": [
"jdbc"
],
"type": "string"
},
"updateColumns": {
"description": "The column names to be updated if statementType is set to either update or upsert.",
"items": {
"type": "string"
},
"maxItems": 100,
"type": "array"
},
"whereColumns": {
"description": "The column names to be used in the where clause if statementType is set to update or upsert.",
"items": {
"type": "string"
},
"maxItems": 100,
"type": "array"
}
},
"required": [
"dataStoreId",
"statementType",
"table",
"type"
],
"type": "object"
},
{
"description": "Save CSV data chunks to local file storage",
"properties": {
"type": {
"description": "Type name for this output type",
"enum": [
"local_file",
"localFile"
],
"type": "string"
}
},
"required": [
"type"
],
"type": "object"
},
{
"description": "Saves CSV data chunks to Amazon Cloud Storage S3",
"properties": {
"credentialId": {
"description": "Use the specified credential to access the url",
"type": [
"string",
"null"
]
},
"endpointUrl": {
"description": "Endpoint URL for the S3 connection (omit to use the default)",
"format": "url",
"type": "string",
"x-versionadded": "v2.29"
},
"format": {
"default": "csv",
"description": "Type of output file format",
"enum": [
"csv",
"parquet"
],
"type": "string",
"x-versionadded": "v2.25"
},
"partitionColumns": {
"description": "For Parquet directory-scoring only. The column names of the intake data of which to partition the dataset. Columns are partitioned in the order they are given. At least one value is required if scoring to a directory (meaning the output url ends with a slash (\"/\").",
"items": {
"type": "string"
},
"maxItems": 100,
"type": "array",
"x-versionadded": "v2.26"
},
"serverSideEncryption": {
"description": "Configure Server-Side Encryption for S3 output",
"properties": {
"algorithm": {
"description": "The server-side encryption algorithm used when storing this object in Amazon S3 (for example, AES256, aws:kms).",
"type": "string"
},
"customerAlgorithm": {
"description": "Specifies the algorithm to use to when encrypting the object (for example, AES256).",
"type": "string"
},
"customerKey": {
"description": "Specifies the customer-provided encryption key for Amazon S3 to use in encrypting data. This value is used to store the object and then it is discarded; Amazon S3 does not store the encryption key. The key must be appropriate for use with the algorithm specified in customerAlgorithm. The key must be sent as an base64 encoded string.",
"type": "string"
},
"kmsEncryptionContext": {
"description": "Specifies the Amazon Web Services KMS Encryption Context to use for object encryption. The value of this header is a base64-encoded UTF-8 string holding JSON with the encryption context key-value pairs.",
"type": "string"
},
"kmsKeyId": {
"description": "Specifies the ID of the symmetric customer managed key to use for object encryption.",
"type": "string"
}
},
"type": "object"
},
"type": {
"description": "Type name for this output type",
"enum": [
"s3"
],
"type": "string"
},
"url": {
"description": "URL for the CSV file",
"format": "url",
"type": "string"
}
},
"required": [
"type",
"url"
],
"type": "object"
},
{
"description": "Save CSV data chunks to Snowflake in bulk",
"properties": {
"catalog": {
"description": "The name of the specified database catalog to write output data to.",
"type": "string",
"x-versionadded": "v2.28"
},
"cloudStorageCredentialId": {
"description": "The ID of the credential holding information about a user with write access to the cloud storage.",
"type": [
"string",
"null"
]
},
"cloudStorageType": {
"default": "s3",
"description": "Type name for cloud storage",
"enum": [
"azure",
"gcp",
"s3"
],
"type": "string"
},
"createTableIfNotExists": {
"default": false,
"description": "Attempt to create the table first if no existing one is detected, before writing data with the strategy defined in the `statementType` parameter.",
"type": "boolean",
"x-versionadded": "v2.25"
},
"credentialId": {
"description": "The ID of the credential holding information about a user with write access to the Snowflake data source.",
"type": [
"string",
"null"
]
},
"dataStoreId": {
"description": "ID of the data store to connect to",
"type": "string"
},
"externalStage": {
"description": "External storage",
"type": "string"
},
"schema": {
"description": "The name of the specified database schema to write results to.",
"type": "string"
},
"statementType": {
"description": "The statement type to use when writing the results.",
"enum": [
"insert",
"create_table",
"createTable"
],
"type": "string"
},
"table": {
"description": "The name of the specified database table to write results to.",
"type": "string"
},
"type": {
"description": "Type name for this output type",
"enum": [
"snowflake"
],
"type": "string"
}
},
"required": [
"dataStoreId",
"externalStage",
"statementType",
"table",
"type"
],
"type": "object"
},
{
"description": "Save CSV data chunks to Azure Synapse in bulk",
"properties": {
"cloudStorageCredentialId": {
"description": "The ID of the credential holding information about a user with write access to the cloud storage.",
"type": [
"string",
"null"
]
},
"createTableIfNotExists": {
"default": false,
"description": "Attempt to create the table first if no existing one is detected, before writing data with the strategy defined in the `statementType` parameter.",
"type": "boolean",
"x-versionadded": "v2.25"
},
"credentialId": {
"description": "The ID of the credential holding information about a user with write access to the JDBC data source.",
"type": [
"string",
"null"
]
},
"dataStoreId": {
"description": "ID of the data store to connect to",
"type": "string"
},
"externalDataSource": {
"description": "External data source name",
"type": "string"
},
"schema": {
"description": "The name of the specified database schema to write results to.",
"type": "string"
},
"statementType": {
"description": "The statement type to use when writing the results.",
"enum": [
"insert",
"create_table",
"createTable"
],
"type": "string"
},
"table": {
"description": "The name of the specified database table to write results to.",
"type": "string"
},
"type": {
"description": "Type name for this output type",
"enum": [
"synapse"
],
"type": "string"
}
},
"required": [
"dataStoreId",
"externalDataSource",
"statementType",
"table",
"type"
],
"type": "object"
}
]
},
"passthroughColumns": {
"description": "Pass through columns from the original dataset",
"items": {
"description": "A column name from the original dataset to pass through to the resulting predictions",
"maxLength": 50,
"minLength": 1,
"type": "string"
},
"maxItems": 100,
"type": "array"
},
"passthroughColumnsSet": {
"description": "Pass through all columns from the original dataset",
"enum": [
"all"
],
"type": "string"
},
"pinnedModelId": {
"description": "Specify a model ID used for scoring",
"type": "string"
},
"predictionInstance": {
"description": "Override the default prediction instance from the deployment when scoring this job.",
"properties": {
"apiKey": {
"description": "By default, prediction requests will use the API key of the user that created the job. This allows you to make requests on behalf of other users.",
"type": "string"
},
"datarobotKey": {
"description": "If running a job against a prediction instance in the Managed AI Cloud, you must provide the organization level DataRobot-Key.",
"type": "string"
},
"hostName": {
"description": "Override the default host name of the deployment with this.",
"type": "string"
},
"sslEnabled": {
"default": true,
"description": "Use SSL (HTTPS) when communicating with the overriden prediction server.",
"type": "boolean"
}
},
"required": [
"hostName",
"sslEnabled"
],
"type": "object"
},
"predictionThreshold": {
"description": "Threshold is the point that sets the class boundary for a predicted value. The model classifies an observation below the threshold as FALSE, and an observation above the threshold as TRUE. In other words, DataRobot automatically assigns the positive class label to any prediction exceeding the threshold. This value can be set between 0.0 and 1.0.",
"maximum": 1,
"minimum": 0,
"type": "number"
},
"predictionWarningEnabled": {
"description": "Enable prediction warnings.",
"type": [
"boolean",
"null"
]
},
"secondaryDatasetsConfigId": {
"description": "Configuration id for secondary datasets to use when making a prediction.",
"type": "string",
"x-versionadded": "v2.33"
},
"skipDriftTracking": {
"default": false,
"description": "Skip drift tracking for this job.",
"type": "boolean"
},
"thresholdHigh": {
"description": "Compute explanations for predictions above this threshold",
"type": "number"
},
"thresholdLow": {
"description": "Compute explanations for predictions below this threshold",
"type": "number"
},
"timeseriesSettings": {
"description": "Time Series settings included of this job is a Time Series job.",
"oneOf": [
{
"properties": {
"forecastPoint": {
"description": "Used for forecast predictions in order to override the inferred forecast point from the dataset.",
"format": "date-time",
"type": "string"
},
"relaxKnownInAdvanceFeaturesCheck": {
"default": false,
"description": "If activated, missing values in the known in advance features are allowed in the forecast window at prediction time. If omitted or false, missing values are not allowed.",
"type": "boolean"
},
"type": {
"description": "Forecast mode makes predictions using forecastPoint or rows in the dataset without target.",
"enum": [
"forecast"
],
"type": "string"
}
},
"required": [
"type"
],
"type": "object"
},
{
"properties": {
"predictionsEndDate": {
"description": "Used for historical predictions in order to override date to which predictions should be calculated. By default value will be inferred automatically from the dataset.",
"format": "date-time",
"type": "string"
},
"predictionsStartDate": {
"description": "Used for historical predictions in order to override date from which predictions should be calculated. By default value will be inferred automatically from the dataset.",
"format": "date-time",
"type": "string"
},
"relaxKnownInAdvanceFeaturesCheck": {
"default": false,
"description": "If activated, missing values in the known in advance features are allowed in the forecast window at prediction time. If omitted or false, missing values are not allowed.",
"type": "boolean"
},
"type": {
"description": "Historical mode enables bulk predictions which calculates predictions for all possible forecast points and forecast distances in the dataset within the predictionsStartDate/predictionsEndDate range.",
"enum": [
"historical"
],
"type": "string"
}
},
"required": [
"type"
],
"type": "object"
},
{
"properties": {
"relaxKnownInAdvanceFeaturesCheck": {
"default": false,
"description": "If activated, missing values in the known in advance features are allowed in the forecast window at prediction time. If omitted or false, missing values are not allowed.",
"type": "boolean"
},
"type": {
"description": "Forecast mode used for making predictions on subsets of training data.",
"enum": [
"training"
],
"type": "string"
}
},
"required": [
"type"
],
"type": "object"
}
],
"x-versionadded": "v2.20"
}
},
"required": [
"abortOnError",
"csvSettings",
"disableRowLevelErrorHandling",
"includePredictionStatus",
"includeProbabilities",
"includeProbabilitiesClasses",
"intakeSettings",
"maxExplanations",
"skipDriftTracking"
],
"type": "object"
}
Properties
Name |
Type |
Required |
Restrictions |
Description |
abortOnError |
boolean |
true |
|
Should this job abort if too many errors are encountered |
batchJobType |
string |
false |
|
Batch job type. |
chunkSize |
any |
false |
|
Which strategy should be used to determine the chunk size. Can be either a named strategy or a fixed size in bytes. |
oneOf
Name |
Type |
Required |
Restrictions |
Description |
» anonymous |
string |
false |
|
none |
xor
Name |
Type |
Required |
Restrictions |
Description |
» anonymous |
integer |
false |
maximum: 41943040 minimum: 20
|
none |
continued
Name |
Type |
Required |
Restrictions |
Description |
columnNamesRemapping |
any |
false |
|
Remap (rename or remove columns from) the output from this job |
oneOf
Name |
Type |
Required |
Restrictions |
Description |
» anonymous |
object |
false |
|
Provide a dictionary with key/value pairs to remap (deprecated) |
xor
Name |
Type |
Required |
Restrictions |
Description |
» anonymous |
[BatchPredictionJobRemapping] |
false |
maxItems: 1000
|
Provide a list of items to remap |
continued
Name |
Type |
Required |
Restrictions |
Description |
csvSettings |
BatchPredictionJobCSVSettings |
true |
|
The CSV settings used for this job |
deploymentId |
string |
false |
|
ID of deployment which is used in job for processing predictions dataset |
disableRowLevelErrorHandling |
boolean |
true |
|
Skip row by row error handling |
explanationAlgorithm |
string |
false |
|
Which algorithm will be used to calculate prediction explanations |
explanationClassNames |
[string] |
false |
maxItems: 10 minItems: 1
|
List of class names that will be explained for each row for multiclass. Mutually exclusive with explanationNumTopClasses. If neither specified - we assume explanationNumTopClasses=1 |
explanationNumTopClasses |
integer |
false |
maximum: 10 minimum: 1
|
Number of top predicted classes for each row that will be explained for multiclass. Mutually exclusive with explanationClassNames. If neither specified - we assume explanationNumTopClasses=1 |
includePredictionStatus |
boolean |
true |
|
Include prediction status column in the output |
includeProbabilities |
boolean |
true |
|
Include probabilities for all classes |
includeProbabilitiesClasses |
[string] |
true |
maxItems: 100
|
Include only probabilities for these specific class names. |
intakeSettings |
any |
true |
|
The intake option configured for this job |
oneOf
Name |
Type |
Required |
Restrictions |
Description |
» anonymous |
AzureIntake |
false |
|
Stream CSV data chunks from Azure |
xor
Name |
Type |
Required |
Restrictions |
Description |
» anonymous |
BigQueryIntake |
false |
|
Stream CSV data chunks from Big Query using GCS |
xor
Name |
Type |
Required |
Restrictions |
Description |
» anonymous |
DataStageIntake |
false |
|
Stream CSV data chunks from data stage storage |
xor
Name |
Type |
Required |
Restrictions |
Description |
» anonymous |
Catalog |
false |
|
Stream CSV data chunks from AI catalog dataset |
xor
Name |
Type |
Required |
Restrictions |
Description |
» anonymous |
DatasphereIntake |
false |
|
Stream CSV data chunks from Datasphere using browser-datasphere |
xor
Name |
Type |
Required |
Restrictions |
Description |
» anonymous |
DSS |
false |
|
Stream CSV data chunks from DSS dataset |
xor
Name |
Type |
Required |
Restrictions |
Description |
» anonymous |
FileSystemIntake |
false |
|
none |
xor
Name |
Type |
Required |
Restrictions |
Description |
» anonymous |
GCPIntake |
false |
|
Stream CSV data chunks from Google Storage |
xor
Name |
Type |
Required |
Restrictions |
Description |
» anonymous |
HTTPIntake |
false |
|
Stream CSV data chunks from HTTP |
xor
Name |
Type |
Required |
Restrictions |
Description |
» anonymous |
JDBCIntake |
false |
|
Stream CSV data chunks from JDBC |
xor
Name |
Type |
Required |
Restrictions |
Description |
» anonymous |
LocalFileIntake |
false |
|
Stream CSV data chunks from local file storage |
xor
Name |
Type |
Required |
Restrictions |
Description |
» anonymous |
S3Intake |
false |
|
Stream CSV data chunks from Amazon Cloud Storage S3 |
xor
Name |
Type |
Required |
Restrictions |
Description |
» anonymous |
SnowflakeIntake |
false |
|
Stream CSV data chunks from Snowflake |
xor
Name |
Type |
Required |
Restrictions |
Description |
» anonymous |
SynapseIntake |
false |
|
Stream CSV data chunks from Azure Synapse |
continued
Name |
Type |
Required |
Restrictions |
Description |
maxExplanations |
integer |
true |
maximum: 100 minimum: 0
|
Number of explanations requested. Will be ordered by strength. |
modelId |
string |
false |
|
ID of leaderboard model which is used in job for processing predictions dataset |
modelPackageId |
string |
false |
|
ID of model package from registry is used in job for processing predictions dataset |
monitoringBatchPrefix |
string,null |
false |
|
Name of the batch to create with this job |
numConcurrent |
integer |
false |
minimum: 1
|
Number of simultaneous requests to run against the prediction instance |
outputSettings |
any |
false |
|
The output option configured for this job |
oneOf
Name |
Type |
Required |
Restrictions |
Description |
» anonymous |
AzureOutput |
false |
|
Save CSV data chunks to Azure Blob Storage |
xor
Name |
Type |
Required |
Restrictions |
Description |
» anonymous |
BigQueryOutput |
false |
|
Save CSV data chunks to Google BigQuery in bulk |
xor
Name |
Type |
Required |
Restrictions |
Description |
» anonymous |
DatasphereOutput |
false |
|
Saves CSV data chunks to Datasphere using browser-datasphere |
xor
Name |
Type |
Required |
Restrictions |
Description |
» anonymous |
FileSystemOutput |
false |
|
none |
xor
Name |
Type |
Required |
Restrictions |
Description |
» anonymous |
GCPOutput |
false |
|
Save CSV data chunks to Google Storage |
xor
Name |
Type |
Required |
Restrictions |
Description |
» anonymous |
HTTPOutput |
false |
|
Save CSV data chunks to HTTP data endpoint |
xor
Name |
Type |
Required |
Restrictions |
Description |
» anonymous |
JDBCOutput |
false |
|
Save CSV data chunks via JDBC |
xor
Name |
Type |
Required |
Restrictions |
Description |
» anonymous |
LocalFileOutput |
false |
|
Save CSV data chunks to local file storage |
xor
Name |
Type |
Required |
Restrictions |
Description |
» anonymous |
S3Output |
false |
|
Saves CSV data chunks to Amazon Cloud Storage S3 |
xor
Name |
Type |
Required |
Restrictions |
Description |
» anonymous |
SnowflakeOutput |
false |
|
Save CSV data chunks to Snowflake in bulk |
xor
Name |
Type |
Required |
Restrictions |
Description |
» anonymous |
SynapseOutput |
false |
|
Save CSV data chunks to Azure Synapse in bulk |
continued
Name |
Type |
Required |
Restrictions |
Description |
passthroughColumns |
[string] |
false |
maxItems: 100
|
Pass through columns from the original dataset |
passthroughColumnsSet |
string |
false |
|
Pass through all columns from the original dataset |
pinnedModelId |
string |
false |
|
Specify a model ID used for scoring |
predictionInstance |
BatchPredictionJobPredictionInstance |
false |
|
Override the default prediction instance from the deployment when scoring this job. |
predictionThreshold |
number |
false |
maximum: 1 minimum: 0
|
Threshold is the point that sets the class boundary for a predicted value. The model classifies an observation below the threshold as FALSE, and an observation above the threshold as TRUE. In other words, DataRobot automatically assigns the positive class label to any prediction exceeding the threshold. This value can be set between 0.0 and 1.0. |
predictionWarningEnabled |
boolean,null |
false |
|
Enable prediction warnings. |
secondaryDatasetsConfigId |
string |
false |
|
Configuration id for secondary datasets to use when making a prediction. |
skipDriftTracking |
boolean |
true |
|
Skip drift tracking for this job. |
thresholdHigh |
number |
false |
|
Compute explanations for predictions above this threshold |
thresholdLow |
number |
false |
|
Compute explanations for predictions below this threshold |
timeseriesSettings |
any |
false |
|
Time Series settings included of this job is a Time Series job. |
oneOf
xor
xor
Enumerated Values
Property |
Value |
batchJobType |
[monitoring , prediction ] |
anonymous |
[auto , fixed , dynamic ] |
explanationAlgorithm |
[shap , xemp ] |
passthroughColumnsSet |
all |
BatchPredictionJobDefinitionId
{
"properties": {
"jobDefinitionId": {
"description": "ID of the Batch Prediction job definition",
"type": "string"
}
},
"required": [
"jobDefinitionId"
],
"type": "object"
}
Properties
Name |
Type |
Required |
Restrictions |
Description |
jobDefinitionId |
string |
true |
|
ID of the Batch Prediction job definition |
BatchPredictionJobDefinitionJobSpecResponse
{
"description": "The Batch Prediction Job specification to be put on the queue in intervals",
"properties": {
"abortOnError": {
"default": true,
"description": "Should this job abort if too many errors are encountered",
"type": "boolean"
},
"batchJobType": {
"description": "Batch job type.",
"enum": [
"monitoring",
"prediction"
],
"type": "string",
"x-versionadded": "v2.30"
},
"chunkSize": {
"default": "auto",
"description": "Which strategy should be used to determine the chunk size. Can be either a named strategy or a fixed size in bytes.",
"oneOf": [
{
"enum": [
"auto",
"fixed",
"dynamic"
],
"type": "string"
},
{
"maximum": 41943040,
"minimum": 20,
"type": "integer"
}
]
},
"columnNamesRemapping": {
"description": "Remap (rename or remove columns from) the output from this job",
"oneOf": [
{
"description": "Provide a dictionary with key/value pairs to remap (deprecated)",
"type": "object"
},
{
"description": "Provide a list of items to remap",
"items": {
"properties": {
"inputName": {
"description": "Rename column with this name",
"type": "string"
},
"outputName": {
"description": "Rename column to this name (leave as null to remove from the output)",
"type": [
"string",
"null"
]
}
},
"required": [
"inputName",
"outputName"
],
"type": "object"
},
"maxItems": 1000,
"type": "array"
}
]
},
"csvSettings": {
"description": "The CSV settings used for this job",
"properties": {
"delimiter": {
"default": ",",
"description": "CSV fields are delimited by this character. Use the string \"tab\" to denote TSV (TAB separated values).",
"oneOf": [
{
"enum": [
"tab"
],
"type": "string"
},
{
"maxLength": 1,
"minLength": 1,
"type": "string"
}
]
},
"encoding": {
"default": "utf-8",
"description": "The encoding to be used for intake and output. For example (but not limited to): \"shift_jis\", \"latin_1\" or \"mskanji\".",
"type": "string"
},
"quotechar": {
"default": "\"",
"description": "Fields containing the delimiter or newlines must be quoted using this character.",
"maxLength": 1,
"minLength": 1,
"type": "string"
}
},
"required": [
"delimiter",
"encoding",
"quotechar"
],
"type": "object"
},
"deploymentId": {
"description": "ID of deployment which is used in job for processing predictions dataset",
"type": "string"
},
"disableRowLevelErrorHandling": {
"default": false,
"description": "Skip row by row error handling",
"type": "boolean"
},
"explanationAlgorithm": {
"description": "Which algorithm will be used to calculate prediction explanations",
"enum": [
"shap",
"xemp"
],
"type": "string"
},
"explanationClassNames": {
"description": "List of class names that will be explained for each row for multiclass. Mutually exclusive with explanationNumTopClasses. If neither specified - we assume explanationNumTopClasses=1",
"items": {
"description": "Class name to explain",
"type": "string"
},
"maxItems": 10,
"minItems": 1,
"type": "array",
"x-versionadded": "v2.30"
},
"explanationNumTopClasses": {
"description": "Number of top predicted classes for each row that will be explained for multiclass. Mutually exclusive with explanationClassNames. If neither specified - we assume explanationNumTopClasses=1",
"maximum": 10,
"minimum": 1,
"type": "integer",
"x-versionadded": "v2.30"
},
"includePredictionStatus": {
"default": false,
"description": "Include prediction status column in the output",
"type": "boolean"
},
"includeProbabilities": {
"default": true,
"description": "Include probabilities for all classes",
"type": "boolean"
},
"includeProbabilitiesClasses": {
"default": [],
"description": "Include only probabilities for these specific class names.",
"items": {
"description": "Include probability for this class name",
"type": "string"
},
"maxItems": 100,
"type": "array"
},
"intakeSettings": {
"default": {
"type": "localFile"
},
"description": "The response option configured for this job",
"oneOf": [
{
"description": "Stream CSV data chunks from Azure",
"properties": {
"credentialId": {
"description": "Either the populated value of the field or [redacted] due to permission settings",
"oneOf": [
{
"description": "Use the specified credential to access the url",
"type": [
"string",
"null"
]
},
{
"enum": [
"[redacted]"
],
"type": "string"
}
]
},
"format": {
"default": "csv",
"description": "Type of input file format",
"enum": [
"csv",
"parquet"
],
"type": "string",
"x-versionadded": "v2.25"
},
"type": {
"description": "Type name for this intake type",
"enum": [
"azure"
],
"type": "string"
},
"url": {
"description": "URL for the CSV file",
"format": "url",
"type": "string"
}
},
"required": [
"type",
"url"
],
"type": "object"
},
{
"description": "Stream CSV data chunks from data stage storage",
"properties": {
"dataStageId": {
"description": "The ID of the data stage",
"type": "string"
},
"type": {
"description": "Type name for this intake type",
"enum": [
"dataStage"
],
"type": "string"
}
},
"required": [
"dataStageId",
"type"
],
"type": "object"
},
{
"description": "Stream CSV data chunks from AI catalog dataset",
"properties": {
"datasetId": {
"description": "Either the populated value of the field or [redacted] due to permission settings",
"oneOf": [
{
"description": "The ID of the AI catalog dataset",
"type": "string"
},
{
"enum": [
"[redacted]"
],
"type": "string"
}
]
},
"datasetVersionId": {
"description": "The ID of the AI catalog dataset version",
"type": "string"
},
"type": {
"description": "Type name for this intake type",
"enum": [
"dataset"
],
"type": "string"
}
},
"required": [
"datasetId",
"type"
],
"type": "object"
},
{
"description": "Stream CSV data chunks from Google Storage",
"properties": {
"credentialId": {
"description": "Either the populated value of the field or [redacted] due to permission settings",
"oneOf": [
{
"description": "Use the specified credential to access the url",
"type": [
"string",
"null"
]
},
{
"enum": [
"[redacted]"
],
"type": "string"
}
]
},
"format": {
"default": "csv",
"description": "Type of input file format",
"enum": [
"csv",
"parquet"
],
"type": "string",
"x-versionadded": "v2.25"
},
"type": {
"description": "Type name for this intake type",
"enum": [
"gcp"
],
"type": "string"
},
"url": {
"description": "URL for the CSV file",
"format": "url",
"type": "string"
}
},
"required": [
"type",
"url"
],
"type": "object"
},
{
"description": "Stream CSV data chunks from Big Query using GCS",
"properties": {
"bucket": {
"description": "The name of gcs bucket for data export",
"type": "string"
},
"credentialId": {
"description": "Either the populated value of the field or [redacted] due to permission settings",
"oneOf": [
{
"description": "The ID of the GCP credentials",
"type": "string"
},
{
"enum": [
"[redacted]"
],
"type": "string"
}
]
},
"dataset": {
"description": "The name of the specified big query dataset to read input data from",
"type": "string"
},
"table": {
"description": "The name of the specified big query table to read input data from",
"type": "string"
},
"type": {
"description": "Type name for this intake type",
"enum": [
"bigquery"
],
"type": "string"
}
},
"required": [
"bucket",
"credentialId",
"dataset",
"table",
"type"
],
"type": "object"
},
{
"description": "Stream CSV data chunks from Amazon Cloud Storage S3",
"properties": {
"credentialId": {
"description": "Either the populated value of the field or [redacted] due to permission settings",
"oneOf": [
{
"description": "Use the specified credential to access the url",
"type": [
"string",
"null"
]
},
{
"enum": [
"[redacted]"
],
"type": "string"
}
]
},
"endpointUrl": {
"description": "Endpoint URL for the S3 connection (omit to use the default)",
"format": "url",
"type": "string",
"x-versionadded": "v2.29"
},
"format": {
"default": "csv",
"description": "Type of input file format",
"enum": [
"csv",
"parquet"
],
"type": "string",
"x-versionadded": "v2.25"
},
"type": {
"description": "Type name for this intake type",
"enum": [
"s3"
],
"type": "string"
},
"url": {
"description": "URL for the CSV file",
"format": "url",
"type": "string"
}
},
"required": [
"type",
"url"
],
"type": "object"
},
{
"description": "Stream CSV data chunks from Snowflake",
"properties": {
"catalog": {
"description": "The name of the specified database catalog to read input data from.",
"type": "string",
"x-versionadded": "v2.28"
},
"cloudStorageCredentialId": {
"description": "Either the populated value of the field or [redacted] due to permission settings",
"oneOf": [
{
"description": "The ID of the credential holding information about a user with read access to the cloud storage.",
"type": [
"string",
"null"
]
},
{
"enum": [
"[redacted]"
],
"type": "string"
}
]
},
"cloudStorageType": {
"default": "s3",
"description": "Type name for cloud storage",
"enum": [
"azure",
"gcp",
"s3"
],
"type": "string"
},
"credentialId": {
"description": "Either the populated value of the field or [redacted] due to permission settings",
"oneOf": [
{
"description": "The ID of the credential holding information about a user with read access to the Snowflake data source.",
"type": [
"string",
"null"
]
},
{
"enum": [
"[redacted]"
],
"type": "string"
}
]
},
"dataStoreId": {
"description": "Either the populated value of the field or [redacted] due to permission settings",
"oneOf": [
{
"description": "ID of the data store to connect to",
"type": "string"
},
{
"enum": [
"[redacted]"
],
"type": "string"
}
]
},
"externalStage": {
"description": "External storage",
"type": "string"
},
"query": {
"description": "A self-supplied SELECT statement of the dataset you wish to score. Helpful for supplying a more fine-grained selection of data not achievable through specification of \"table\" and/or \"schema\" parameters exclusively.If this job is executed with a job definition, then template variables are available which will be substituted for timestamps: {{ current_run_timestamp }}, {{ last_completed_run_time }}, {{ last_scheduled_run_time }}, {{ next_scheduled_run_time }}, {{ current_run_time }}",
"type": "string"
},
"schema": {
"description": "The name of the specified database schema to read input data from.",
"type": "string"
},
"table": {
"description": "The name of the specified database table to read input data from.",
"type": "string"
},
"type": {
"description": "Type name for this intake type",
"enum": [
"snowflake"
],
"type": "string"
}
},
"required": [
"dataStoreId",
"externalStage",
"type"
],
"type": "object"
},
{
"description": "Stream CSV data chunks from Azure Synapse",
"properties": {
"cloudStorageCredentialId": {
"description": "Either the populated value of the field or [redacted] due to permission settings",
"oneOf": [
{
"description": "The ID of the Azure credential holding information about a user with read access to the cloud storage.",
"type": [
"string",
"null"
]
},
{
"enum": [
"[redacted]"
],
"type": "string"
}
]
},
"credentialId": {
"description": "Either the populated value of the field or [redacted] due to permission settings",
"oneOf": [
{
"description": "The ID of the credential holding information about a user with read access to the JDBC data source.",
"type": [
"string",
"null"
]
},
{
"enum": [
"[redacted]"
],
"type": "string"
}
]
},
"dataStoreId": {
"description": "Either the populated value of the field or [redacted] due to permission settings",
"oneOf": [
{
"description": "ID of the data store to connect to",
"type": "string"
},
{
"enum": [
"[redacted]"
],
"type": "string"
}
]
},
"externalDataSource": {
"description": "External datasource name",
"type": "string"
},
"query": {
"description": "A self-supplied SELECT statement of the dataset you wish to score. Helpful for supplying a more fine-grained selection of data not achievable through specification of \"table\" and/or \"schema\" parameters exclusively.If this job is executed with a job definition, then template variables are available which will be substituted for timestamps: {{ current_run_timestamp }}, {{ last_completed_run_time }}, {{ last_scheduled_run_time }}, {{ next_scheduled_run_time }}, {{ current_run_time }}",
"type": "string"
},
"schema": {
"description": "The name of the specified database schema to read input data from.",
"type": "string"
},
"table": {
"description": "The name of the specified database table to read input data from.",
"type": "string"
},
"type": {
"description": "Type name for this intake type",
"enum": [
"synapse"
],
"type": "string"
}
},
"required": [
"dataStoreId",
"externalDataSource",
"type"
],
"type": "object"
},
{
"description": "Stream CSV data chunks from DSS dataset",
"properties": {
"datasetId": {
"description": "Either the populated value of the field or [redacted] due to permission settings",
"oneOf": [
{
"description": "The ID of the dataset",
"type": "string"
},
{
"enum": [
"[redacted]"
],
"type": "string"
}
]
},
"partition": {
"default": null,
"description": "Partition used to predict",
"enum": [
"holdout",
"validation",
"allBacktests",
null
],
"type": [
"string",
"null"
]
},
"projectId": {
"description": "The ID of the project",
"type": "string"
},
"type": {
"description": "Type name for this intake type",
"enum": [
"dss"
],
"type": "string"
}
},
"required": [
"projectId",
"type"
],
"type": "object"
},
{
"properties": {
"path": {
"description": "Path to data on host filesystem",
"type": "string"
},
"type": {
"description": "Type name for this intake type",
"enum": [
"filesystem"
],
"type": "string"
}
},
"required": [
"path",
"type"
],
"type": "object"
},
{
"description": "Stream CSV data chunks from HTTP",
"properties": {
"type": {
"description": "Type name for this intake type",
"enum": [
"http"
],
"type": "string"
},
"url": {
"description": "URL for the CSV file",
"format": "url",
"type": "string"
}
},
"required": [
"type",
"url"
],
"type": "object"
},
{
"description": "Stream CSV data chunks from JDBC",
"properties": {
"catalog": {
"description": "The name of the specified database catalog to read input data from.",
"type": "string",
"x-versionadded": "v2.22"
},
"credentialId": {
"description": "Either the populated value of the field or [redacted] due to permission settings",
"oneOf": [
{
"description": "The ID of the credential holding information about a user with read access to the JDBC data source.",
"type": [
"string",
"null"
]
},
{
"enum": [
"[redacted]"
],
"type": "string"
}
]
},
"dataStoreId": {
"description": "Either the populated value of the field or [redacted] due to permission settings",
"oneOf": [
{
"description": "ID of the data store to connect to",
"type": "string"
},
{
"enum": [
"[redacted]"
],
"type": "string"
}
]
},
"fetchSize": {
"description": "A user specified fetch size. Changing it can be used to balance throughput and memory usage. Deprecated and ignored since v2.21.",
"maximum": 1000000,
"minimum": 1,
"type": "integer"
},
"query": {
"description": "A self-supplied SELECT statement of the dataset you wish to score. Helpful for supplying a more fine-grained selection of data not achievable through specification of \"table\" and/or \"schema\" parameters exclusively.If this job is executed with a job definition, then template variables are available which will be substituted for timestamps: {{ current_run_timestamp }}, {{ last_completed_run_time }}, {{ last_scheduled_run_time }}, {{ next_scheduled_run_time }}, {{ current_run_time }}",
"type": "string"
},
"schema": {
"description": "The name of the specified database schema to read input data from.",
"type": "string"
},
"table": {
"description": "The name of the specified database table to read input data from.",
"type": "string"
},
"type": {
"description": "Type name for this intake type",
"enum": [
"jdbc"
],
"type": "string"
}
},
"required": [
"dataStoreId",
"type"
],
"type": "object"
},
{
"description": "Stream CSV data chunks from local file storage",
"properties": {
"async": {
"description": "The default behavior (async: true) will still submit the job to the queue and start processing as soon as the upload is started.Setting it to false will postpone submitting the job to the queue until all data has been uploaded.This is helpful if the user is on a bad connection and bottlednecked by the upload speed. Instead of blocking the queue this will allow others to submit to the queue until the upload has finished.",
"type": [
"boolean",
"null"
],
"x-versionadded": "v2.28"
},
"multipart": {
"description": "specify if the data will be uploaded in multiple parts instead of a single file",
"type": "boolean",
"x-versionadded": "v2.27"
},
"type": {
"description": "Type name for this intake type",
"enum": [
"local_file",
"localFile"
],
"type": "string"
}
},
"required": [
"type"
],
"type": "object"
},
{
"description": "Stream CSV data chunks from Datasphere using browser-datasphere",
"properties": {
"catalog": {
"description": "The name of the specified database catalog to read input data from.",
"type": "string"
},
"credentialId": {
"description": "Either the populated value of the field or [redacted] due to permission settings",
"oneOf": [
{
"description": "The ID of the credential holding information about a user with read access to the data source.",
"type": "string"
},
{
"enum": [
"[redacted]"
],
"type": "string"
}
]
},
"dataStoreId": {
"description": "Either the populated value of the field or [redacted] due to permission settings",
"oneOf": [
{
"description": "ID of the data store to connect to",
"type": "string"
},
{
"enum": [
"[redacted]"
],
"type": "string"
}
]
},
"schema": {
"description": "The name of the specified database schema to read input data from.",
"type": "string"
},
"table": {
"description": "The name of the specified database table to read input data from.",
"type": "string"
},
"type": {
"description": "Type name for this intake type",
"enum": [
"datasphere"
],
"type": "string"
}
},
"required": [
"credentialId",
"dataStoreId",
"schema",
"table",
"type"
],
"type": "object",
"x-versionadded": "v2.35"
}
]
},
"maxExplanations": {
"default": 0,
"description": "Number of explanations requested. Will be ordered by strength.",
"maximum": 100,
"minimum": 0,
"type": "integer"
},
"maxNgramExplanations": {
"description": "The maximum number of text ngram explanations to supply per row of the dataset. The default recommended `maxNgramExplanations` is `all` (no limit)",
"oneOf": [
{
"minimum": 0,
"type": "integer"
},
{
"enum": [
"all"
],
"type": "string"
}
],
"x-versionadded": "v2.30"
},
"modelId": {
"description": "ID of leaderboard model which is used in job for processing predictions dataset",
"type": "string",
"x-versionadded": "v2.30"
},
"modelPackageId": {
"description": "ID of model package from registry is used in job for processing predictions dataset",
"type": "string",
"x-versionadded": "v2.30"
},
"monitoringAggregation": {
"description": "Defines the aggregation policy for monitoring jobs.",
"properties": {
"retentionPolicy": {
"default": "percentage",
"description": "Monitoring jobs retention policy for aggregation.",
"enum": [
"samples",
"percentage"
],
"type": "string"
},
"retentionValue": {
"default": 0,
"description": "Amount/percentage of samples to retain.",
"type": "integer"
}
},
"type": "object"
},
"monitoringBatchPrefix": {
"description": "Name of the batch to create with this job",
"type": [
"string",
"null"
]
},
"monitoringColumns": {
"description": "Column names mapping for monitoring",
"properties": {
"actedUponColumn": {
"description": "Name of column that contains value for acted_on.",
"type": "string"
},
"actualsTimestampColumn": {
"description": "Name of column that contains actual timestamps.",
"type": "string"
},
"actualsValueColumn": {
"description": "Name of column that contains actuals value.",
"type": "string"
},
"associationIdColumn": {
"description": "Name of column that contains association Id.",
"type": "string"
},
"customMetricId": {
"description": "Id of custom metric to process values for.",
"type": "string"
},
"customMetricTimestampColumn": {
"description": "Name of column that contains custom metric values timestamps.",
"type": "string"
},
"customMetricTimestampFormat": {
"description": "Format of timestamps from customMetricTimestampColumn.",
"type": "string"
},
"customMetricValueColumn": {
"description": "Name of column that contains values for custom metric.",
"type": "string"
},
"monitoredStatusColumn": {
"description": "Column name used to mark monitored rows.",
"type": "string"
},
"predictionsColumns": {
"description": "Name of the column(s) which contain prediction values.",
"oneOf": [
{
"description": "Map containing column name(s) and class name(s) for multiclass problem.",
"items": {
"properties": {
"className": {
"description": "Class name.",
"type": "string"
},
"columnName": {
"description": "Column name that contains the prediction for a specific class.",
"type": "string"
}
},
"required": [
"className",
"columnName"
],
"type": "object"
},
"maxItems": 100,
"type": "array"
},
{
"description": "Column name that contains the prediction for regressions problem.",
"type": "string"
}
]
},
"reportDrift": {
"description": "True to report drift, False otherwise.",
"type": "boolean"
},
"reportPredictions": {
"description": "True to report prediction, False otherwise.",
"type": "boolean"
},
"uniqueRowIdentifierColumns": {
"description": "Column(s) name of unique row identifiers.",
"items": {
"type": "string"
},
"maxItems": 100,
"type": "array"
}
},
"type": "object"
},
"monitoringOutputSettings": {
"description": "Output settings for monitoring jobs",
"properties": {
"monitoredStatusColumn": {
"description": "Column name used to mark monitored rows.",
"type": "string"
},
"uniqueRowIdentifierColumns": {
"description": "Column(s) name of unique row identifiers.",
"items": {
"type": "string"
},
"maxItems": 100,
"type": "array"
}
},
"required": [
"monitoredStatusColumn",
"uniqueRowIdentifierColumns"
],
"type": "object"
},
"numConcurrent": {
"description": "Number of simultaneous requests to run against the prediction instance",
"minimum": 0,
"type": "integer"
},
"outputSettings": {
"description": "The response option configured for this job",
"oneOf": [
{
"description": "Save CSV data chunks to Azure Blob Storage",
"properties": {
"credentialId": {
"description": "Either the populated value of the field or [redacted] due to permission settings",
"oneOf": [
{
"description": "Use the specified credential to access the url",
"type": [
"string",
"null"
]
},
{
"enum": [
"[redacted]"
],
"type": "string"
}
]
},
"format": {
"default": "csv",
"description": "Type of output file format",
"enum": [
"csv",
"parquet"
],
"type": "string",
"x-versionadded": "v2.25"
},
"partitionColumns": {
"description": "For Parquet directory-scoring only. The column names of the intake data of which to partition the dataset. Columns are partitioned in the order they are given. At least one value is required if scoring to a directory (meaning the output url ends with a slash (\"/\").",
"items": {
"type": "string"
},
"maxItems": 100,
"type": "array",
"x-versionadded": "v2.26"
},
"type": {
"description": "Type name for this output type",
"enum": [
"azure"
],
"type": "string"
},
"url": {
"description": "URL for the file or directory",
"format": "url",
"type": "string"
}
},
"required": [
"type",
"url"
],
"type": "object"
},
{
"description": "Save CSV data chunks to Google Storage",
"properties": {
"credentialId": {
"description": "Either the populated value of the field or [redacted] due to permission settings",
"oneOf": [
{
"description": "Use the specified credential to access the url",
"type": [
"string",
"null"
]
},
{
"enum": [
"[redacted]"
],
"type": "string"
}
]
},
"format": {
"default": "csv",
"description": "Type of input file format",
"enum": [
"csv",
"parquet"
],
"type": "string",
"x-versionadded": "v2.25"
},
"partitionColumns": {
"description": "For Parquet directory-scoring only. The column names of the intake data of which to partition the dataset. Columns are partitioned in the order they are given. At least one value is required if scoring to a directory (meaning the output url ends with a slash (\"/\").",
"items": {
"type": "string"
},
"maxItems": 100,
"type": "array",
"x-versionadded": "v2.26"
},
"type": {
"description": "Type name for this output type",
"enum": [
"gcp"
],
"type": "string"
},
"url": {
"description": "URL for the CSV file",
"format": "url",
"type": "string"
}
},
"required": [
"type",
"url"
],
"type": "object"
},
{
"description": "Save CSV data chunks to Google BigQuery in bulk",
"properties": {
"bucket": {
"description": "The name of gcs bucket for data loading",
"type": "string"
},
"credentialId": {
"description": "Either the populated value of the field or [redacted] due to permission settings",
"oneOf": [
{
"description": "The ID of the GCP credentials",
"type": "string"
},
{
"enum": [
"[redacted]"
],
"type": "string"
}
]
},
"dataset": {
"description": "The name of the specified big query dataset to write data back",
"type": "string"
},
"table": {
"description": "The name of the specified big query table to write data back",
"type": "string"
},
"type": {
"description": "Type name for this output type",
"enum": [
"bigquery"
],
"type": "string"
}
},
"required": [
"bucket",
"credentialId",
"dataset",
"table",
"type"
],
"type": "object"
},
{
"description": "Saves CSV data chunks to Amazon Cloud Storage S3",
"properties": {
"credentialId": {
"description": "Either the populated value of the field or [redacted] due to permission settings",
"oneOf": [
{
"description": "Use the specified credential to access the url",
"type": [
"string",
"null"
]
},
{
"enum": [
"[redacted]"
],
"type": "string"
}
]
},
"endpointUrl": {
"description": "Endpoint URL for the S3 connection (omit to use the default)",
"format": "url",
"type": "string",
"x-versionadded": "v2.29"
},
"format": {
"default": "csv",
"description": "Type of output file format",
"enum": [
"csv",
"parquet"
],
"type": "string",
"x-versionadded": "v2.25"
},
"partitionColumns": {
"description": "For Parquet directory-scoring only. The column names of the intake data of which to partition the dataset. Columns are partitioned in the order they are given. At least one value is required if scoring to a directory (meaning the output url ends with a slash (\"/\").",
"items": {
"type": "string"
},
"maxItems": 100,
"type": "array",
"x-versionadded": "v2.26"
},
"serverSideEncryption": {
"description": "Configure Server-Side Encryption for S3 output",
"properties": {
"algorithm": {
"description": "The server-side encryption algorithm used when storing this object in Amazon S3 (for example, AES256, aws:kms).",
"type": "string"
},
"customerAlgorithm": {
"description": "Specifies the algorithm to use to when encrypting the object (for example, AES256).",
"type": "string"
},
"customerKey": {
"description": "Specifies the customer-provided encryption key for Amazon S3 to use in encrypting data. This value is used to store the object and then it is discarded; Amazon S3 does not store the encryption key. The key must be appropriate for use with the algorithm specified in customerAlgorithm. The key must be sent as an base64 encoded string.",
"type": "string"
},
"kmsEncryptionContext": {
"description": "Specifies the Amazon Web Services KMS Encryption Context to use for object encryption. The value of this header is a base64-encoded UTF-8 string holding JSON with the encryption context key-value pairs.",
"type": "string"
},
"kmsKeyId": {
"description": "Specifies the ID of the symmetric customer managed key to use for object encryption.",
"type": "string"
}
},
"type": "object"
},
"type": {
"description": "Type name for this output type",
"enum": [
"s3"
],
"type": "string"
},
"url": {
"description": "URL for the CSV file",
"format": "url",
"type": "string"
}
},
"required": [
"type",
"url"
],
"type": "object"
},
{
"description": "Save CSV data chunks to Snowflake in bulk",
"properties": {
"catalog": {
"description": "The name of the specified database catalog to write output data to.",
"type": "string",
"x-versionadded": "v2.28"
},
"cloudStorageCredentialId": {
"description": "Either the populated value of the field or [redacted] due to permission settings",
"oneOf": [
{
"description": "The ID of the credential holding information about a user with write access to the cloud storage.",
"type": [
"string",
"null"
]
},
{
"enum": [
"[redacted]"
],
"type": "string"
}
]
},
"cloudStorageType": {
"default": "s3",
"description": "Type name for cloud storage",
"enum": [
"azure",
"gcp",
"s3"
],
"type": "string"
},
"createTableIfNotExists": {
"default": false,
"description": "Attempt to create the table first if no existing one is detected, before writing data with the strategy defined in the `statementType` parameter.",
"type": "boolean",
"x-versionadded": "v2.25"
},
"credentialId": {
"description": "Either the populated value of the field or [redacted] due to permission settings",
"oneOf": [
{
"description": "The ID of the credential holding information about a user with write access to the Snowflake data source.",
"type": [
"string",
"null"
]
},
{
"enum": [
"[redacted]"
],
"type": "string"
}
]
},
"dataStoreId": {
"description": "Either the populated value of the field or [redacted] due to permission settings",
"oneOf": [
{
"description": "ID of the data store to connect to",
"type": "string"
},
{
"enum": [
"[redacted]"
],
"type": "string"
}
]
},
"externalStage": {
"description": "External storage",
"type": "string"
},
"schema": {
"description": "The name of the specified database schema to write results to.",
"type": "string"
},
"statementType": {
"description": "The statement type to use when writing the results.",
"enum": [
"insert",
"create_table",
"createTable"
],
"type": "string"
},
"table": {
"description": "The name of the specified database table to write results to.",
"type": "string"
},
"type": {
"description": "Type name for this output type",
"enum": [
"snowflake"
],
"type": "string"
}
},
"required": [
"dataStoreId",
"externalStage",
"statementType",
"table",
"type"
],
"type": "object"
},
{
"description": "Save CSV data chunks to Azure Synapse in bulk",
"properties": {
"cloudStorageCredentialId": {
"description": "Either the populated value of the field or [redacted] due to permission settings",
"oneOf": [
{
"description": "The ID of the credential holding information about a user with write access to the cloud storage.",
"type": [
"string",
"null"
]
},
{
"enum": [
"[redacted]"
],
"type": "string"
}
]
},
"createTableIfNotExists": {
"default": false,
"description": "Attempt to create the table first if no existing one is detected, before writing data with the strategy defined in the `statementType` parameter.",
"type": "boolean",
"x-versionadded": "v2.25"
},
"credentialId": {
"description": "Either the populated value of the field or [redacted] due to permission settings",
"oneOf": [
{
"description": "The ID of the credential holding information about a user with write access to the JDBC data source.",
"type": [
"string",
"null"
]
},
{
"enum": [
"[redacted]"
],
"type": "string"
}
]
},
"dataStoreId": {
"description": "Either the populated value of the field or [redacted] due to permission settings",
"oneOf": [
{
"description": "ID of the data store to connect to",
"type": "string"
},
{
"enum": [
"[redacted]"
],
"type": "string"
}
]
},
"externalDataSource": {
"description": "External data source name",
"type": "string"
},
"schema": {
"description": "The name of the specified database schema to write results to.",
"type": "string"
},
"statementType": {
"description": "The statement type to use when writing the results.",
"enum": [
"insert",
"create_table",
"createTable"
],
"type": "string"
},
"table": {
"description": "The name of the specified database table to write results to.",
"type": "string"
},
"type": {
"description": "Type name for this output type",
"enum": [
"synapse"
],
"type": "string"
}
},
"required": [
"dataStoreId",
"externalDataSource",
"statementType",
"table",
"type"
],
"type": "object"
},
{
"properties": {
"path": {
"description": "Path to results on host filesystem",
"type": "string"
},
"type": {
"description": "Type name for this output type",
"enum": [
"filesystem"
],
"type": "string"
}
},
"required": [
"path",
"type"
],
"type": "object"
},
{
"description": "Save CSV data chunks to HTTP data endpoint",
"properties": {
"headers": {
"description": "Extra headers to send with the request",
"type": "object"
},
"method": {
"description": "Method to use when saving the CSV file",
"enum": [
"POST",
"PUT"
],
"type": "string"
},
"type": {
"description": "Type name for this output type",
"enum": [
"http"
],
"type": "string"
},
"url": {
"description": "URL for the CSV file",
"format": "url",
"type": "string"
}
},
"required": [
"method",
"type",
"url"
],
"type": "object"
},
{
"description": "Save CSV data chunks via JDBC",
"properties": {
"catalog": {
"description": "The name of the specified database catalog to write output data to.",
"type": "string",
"x-versionadded": "v2.22"
},
"commitInterval": {
"default": 600,
"description": "Defines a time interval in seconds between each commit is done to the JDBC source. If set to 0, the batch prediction operation will write the entire job before committing.",
"maximum": 86400,
"minimum": 0,
"type": "integer",
"x-versionadded": "v2.21"
},
"createTableIfNotExists": {
"default": false,
"description": "Attempt to create the table first if no existing one is detected, before writing data with the strategy defined in the `statementType` parameter.",
"type": "boolean",
"x-versionadded": "v2.24"
},
"credentialId": {
"description": "Either the populated value of the field or [redacted] due to permission settings",
"oneOf": [
{
"description": "The ID of the credential holding information about a user with write access to the JDBC data source.",
"type": [
"string",
"null"
]
},
{
"enum": [
"[redacted]"
],
"type": "string"
}
]
},
"dataStoreId": {
"description": "Either the populated value of the field or [redacted] due to permission settings",
"oneOf": [
{
"description": "ID of the data store to connect to",
"type": "string"
},
{
"enum": [
"[redacted]"
],
"type": "string"
}
]
},
"schema": {
"description": "The name of the specified database schema to write the results to.",
"type": "string"
},
"statementType": {
"description": "The statement type to use when writing the results. Deprecation Warning: Use of `create_table` is now discouraged. Use one of the other possibilities along with the parameter `createTableIfNotExists` set to `true`.",
"enum": [
"createTable",
"create_table",
"insert",
"insertUpdate",
"insert_update",
"update"
],
"type": "string"
},
"table": {
"description": "The name of the specified database table to write the results to.If this job is executed with a job definition, then template variables are available which will be substituted for timestamps: {{ current_run_timestamp }}, {{ last_completed_run_time }}, {{ last_scheduled_run_time }}, {{ next_scheduled_run_time }}, {{ current_run_time }}",
"type": "string"
},
"type": {
"description": "Type name for this intake type",
"enum": [
"jdbc"
],
"type": "string"
},
"updateColumns": {
"description": "The column names to be updated if statementType is set to either update or upsert.",
"items": {
"type": "string"
},
"maxItems": 100,
"type": "array"
},
"whereColumns": {
"description": "The column names to be used in the where clause if statementType is set to update or upsert.",
"items": {
"type": "string"
},
"maxItems": 100,
"type": "array"
}
},
"required": [
"dataStoreId",
"statementType",
"table",
"type"
],
"type": "object"
},
{
"description": "Save CSV data chunks to local file storage",
"properties": {
"type": {
"description": "Type name for this output type",
"enum": [
"local_file",
"localFile"
],
"type": "string"
}
},
"required": [
"type"
],
"type": "object"
},
{
"description": "Saves CSV data chunks to Datasphere using browser-datasphere",
"properties": {
"catalog": {
"description": "The name of the specified database catalog to read input data from.",
"type": "string"
},
"credentialId": {
"description": "Either the populated value of the field or [redacted] due to permission settings",
"oneOf": [
{
"description": "The ID of the credential holding information about a user with write access to the data destination.",
"type": "string"
},
{
"enum": [
"[redacted]"
],
"type": "string"
}
]
},
"dataStoreId": {
"description": "Either the populated value of the field or [redacted] due to permission settings",
"oneOf": [
{
"description": "The ID of the data store to connect to",
"type": "string"
},
{
"enum": [
"[redacted]"
],
"type": "string"
}
]
},
"schema": {
"description": "The name of the specified database schema to read input data from.",
"type": "string"
},
"table": {
"description": "The name of the specified database table to read input data from.",
"type": "string"
},
"type": {
"description": "The type name for this output type",
"enum": [
"datasphere"
],
"type": "string"
}
},
"required": [
"credentialId",
"dataStoreId",
"schema",
"table",
"type"
],
"type": "object",
"x-versionadded": "v2.35"
}
]
},
"passthroughColumns": {
"description": "Pass through columns from the original dataset",
"items": {
"description": "A column name from the original dataset to pass through to the resulting predictions",
"maxLength": 50,
"minLength": 1,
"type": "string"
},
"maxItems": 100,
"type": "array"
},
"passthroughColumnsSet": {
"description": "Pass through all columns from the original dataset",
"enum": [
"all"
],
"type": "string"
},
"pinnedModelId": {
"description": "Specify a model ID used for scoring",
"type": "string"
},
"predictionInstance": {
"description": "Override the default prediction instance from the deployment when scoring this job.",
"properties": {
"apiKey": {
"description": "By default, prediction requests will use the API key of the user that created the job. This allows you to make requests on behalf of other users.",
"type": "string"
},
"datarobotKey": {
"description": "If running a job against a prediction instance in the Managed AI Cloud, you must provide the organization level DataRobot-Key.",
"type": "string"
},
"hostName": {
"description": "Override the default host name of the deployment with this.",
"type": "string"
},
"sslEnabled": {
"default": true,
"description": "Use SSL (HTTPS) when communicating with the overriden prediction server.",
"type": "boolean"
}
},
"required": [
"hostName",
"sslEnabled"
],
"type": "object"
},
"predictionWarningEnabled": {
"description": "Enable prediction warnings.",
"type": [
"boolean",
"null"
]
},
"redactedFields": {
"description": "A list of qualified field names from intake- and/or outputSettings that was redacted due to permissions and sharing settings. For example: intakeSettings.dataStoreId",
"items": {
"description": "Field names that are potentially redacted",
"type": "string"
},
"type": "array",
"x-versionadded": "v2.30"
},
"skipDriftTracking": {
"default": false,
"description": "Skip drift tracking for this job.",
"type": "boolean"
},
"thresholdHigh": {
"description": "Compute explanations for predictions above this threshold",
"type": "number"
},
"thresholdLow": {
"description": "Compute explanations for predictions below this threshold",
"type": "number"
},
"timeseriesSettings": {
"description": "Time Series settings included of this job is a Time Series job.",
"oneOf": [
{
"properties": {
"forecastPoint": {
"description": "Used for forecast predictions in order to override the inferred forecast point from the dataset.",
"format": "date-time",
"type": "string"
},
"relaxKnownInAdvanceFeaturesCheck": {
"default": false,
"description": "If activated, missing values in the known in advance features are allowed in the forecast window at prediction time. If omitted or false, missing values are not allowed.",
"type": "boolean"
},
"type": {
"description": "Forecast mode makes predictions using forecastPoint or rows in the dataset without target.",
"enum": [
"forecast"
],
"type": "string"
}
},
"required": [
"type"
],
"type": "object"
},
{
"properties": {
"forecastPointPolicy": {
"description": "Forecast point policy",
"properties": {
"configuration": {
"description": "Customize if forecast point based on job run time needs to be shifted.",
"properties": {
"offset": {
"description": "Offset to apply to scheduled run time of the job in a ISO-8601 format toobtain a relative forecast point. Example of the positive offset 'P2DT5H3M', example of the negative offset '-P2DT5H4M'",
"format": "offset",
"type": "string"
}
},
"required": [
"offset"
],
"type": "object"
},
"type": {
"description": "Type of the forecast point policy. Forecast point will be based on the scheduled run time of the job or the current moment in UTC if job was launched manually. Run time can be adjusted backwards or forwards.",
"enum": [
"jobRunTimeBased"
],
"type": "string"
}
},
"required": [
"type"
],
"type": "object"
},
"relaxKnownInAdvanceFeaturesCheck": {
"default": false,
"description": "If activated, missing values in the known in advance features are allowed in the forecast window at prediction time. If omitted or false, missing values are not allowed.",
"type": "boolean"
},
"type": {
"description": "Forecast mode makes predictions using forecastPoint or rows in the dataset without target.",
"enum": [
"forecast"
],
"type": "string"
}
},
"required": [
"forecastPointPolicy",
"type"
],
"type": "object"
},
{
"properties": {
"predictionsEndDate": {
"description": "Used for historical predictions in order to override date to which predictions should be calculated. By default value will be inferred automatically from the dataset.",
"format": "date-time",
"type": "string"
},
"predictionsStartDate": {
"description": "Used for historical predictions in order to override date from which predictions should be calculated. By default value will be inferred automatically from the dataset.",
"format": "date-time",
"type": "string"
},
"relaxKnownInAdvanceFeaturesCheck": {
"default": false,
"description": "If activated, missing values in the known in advance features are allowed in the forecast window at prediction time. If omitted or false, missing values are not allowed.",
"type": "boolean"
},
"type": {
"description": "Historical mode enables bulk predictions which calculates predictions for all possible forecast points and forecast distances in the dataset within the predictionsStartDate/predictionsEndDate range.",
"enum": [
"historical"
],
"type": "string"
}
},
"required": [
"type"
],
"type": "object"
}
],
"x-versionadded": "v2.20"
}
},
"required": [
"abortOnError",
"csvSettings",
"disableRowLevelErrorHandling",
"includePredictionStatus",
"includeProbabilities",
"includeProbabilitiesClasses",
"intakeSettings",
"maxExplanations",
"numConcurrent",
"redactedFields",
"skipDriftTracking"
],
"type": "object"
}
The Batch Prediction Job specification to be put on the queue in intervals
Properties
Name |
Type |
Required |
Restrictions |
Description |
abortOnError |
boolean |
true |
|
Should this job abort if too many errors are encountered |
batchJobType |
string |
false |
|
Batch job type. |
chunkSize |
any |
false |
|
Which strategy should be used to determine the chunk size. Can be either a named strategy or a fixed size in bytes. |
oneOf
Name |
Type |
Required |
Restrictions |
Description |
» anonymous |
string |
false |
|
none |
xor
Name |
Type |
Required |
Restrictions |
Description |
» anonymous |
integer |
false |
maximum: 41943040 minimum: 20
|
none |
continued
Name |
Type |
Required |
Restrictions |
Description |
columnNamesRemapping |
any |
false |
|
Remap (rename or remove columns from) the output from this job |
oneOf
Name |
Type |
Required |
Restrictions |
Description |
» anonymous |
object |
false |
|
Provide a dictionary with key/value pairs to remap (deprecated) |
xor
Name |
Type |
Required |
Restrictions |
Description |
» anonymous |
[BatchJobRemapping] |
false |
maxItems: 1000
|
Provide a list of items to remap |
continued
Name |
Type |
Required |
Restrictions |
Description |
csvSettings |
BatchJobCSVSettings |
true |
|
The CSV settings used for this job |
deploymentId |
string |
false |
|
ID of deployment which is used in job for processing predictions dataset |
disableRowLevelErrorHandling |
boolean |
true |
|
Skip row by row error handling |
explanationAlgorithm |
string |
false |
|
Which algorithm will be used to calculate prediction explanations |
explanationClassNames |
[string] |
false |
maxItems: 10 minItems: 1
|
List of class names that will be explained for each row for multiclass. Mutually exclusive with explanationNumTopClasses. If neither specified - we assume explanationNumTopClasses=1 |
explanationNumTopClasses |
integer |
false |
maximum: 10 minimum: 1
|
Number of top predicted classes for each row that will be explained for multiclass. Mutually exclusive with explanationClassNames. If neither specified - we assume explanationNumTopClasses=1 |
includePredictionStatus |
boolean |
true |
|
Include prediction status column in the output |
includeProbabilities |
boolean |
true |
|
Include probabilities for all classes |
includeProbabilitiesClasses |
[string] |
true |
maxItems: 100
|
Include only probabilities for these specific class names. |
intakeSettings |
any |
true |
|
The response option configured for this job |
oneOf
Name |
Type |
Required |
Restrictions |
Description |
» anonymous |
AzureDataStreamer |
false |
|
Stream CSV data chunks from Azure |
xor
Name |
Type |
Required |
Restrictions |
Description |
» anonymous |
DataStageDataStreamer |
false |
|
Stream CSV data chunks from data stage storage |
xor
Name |
Type |
Required |
Restrictions |
Description |
» anonymous |
CatalogDataStreamer |
false |
|
Stream CSV data chunks from AI catalog dataset |
xor
Name |
Type |
Required |
Restrictions |
Description |
» anonymous |
GCPDataStreamer |
false |
|
Stream CSV data chunks from Google Storage |
xor
Name |
Type |
Required |
Restrictions |
Description |
» anonymous |
BigQueryDataStreamer |
false |
|
Stream CSV data chunks from Big Query using GCS |
xor
Name |
Type |
Required |
Restrictions |
Description |
» anonymous |
S3DataStreamer |
false |
|
Stream CSV data chunks from Amazon Cloud Storage S3 |
xor
Name |
Type |
Required |
Restrictions |
Description |
» anonymous |
SnowflakeDataStreamer |
false |
|
Stream CSV data chunks from Snowflake |
xor
Name |
Type |
Required |
Restrictions |
Description |
» anonymous |
SynapseDataStreamer |
false |
|
Stream CSV data chunks from Azure Synapse |
xor
Name |
Type |
Required |
Restrictions |
Description |
» anonymous |
DSSDataStreamer |
false |
|
Stream CSV data chunks from DSS dataset |
xor
xor
Name |
Type |
Required |
Restrictions |
Description |
» anonymous |
HTTPDataStreamer |
false |
|
Stream CSV data chunks from HTTP |
xor
Name |
Type |
Required |
Restrictions |
Description |
» anonymous |
JDBCDataStreamer |
false |
|
Stream CSV data chunks from JDBC |
xor
Name |
Type |
Required |
Restrictions |
Description |
» anonymous |
LocalFileDataStreamer |
false |
|
Stream CSV data chunks from local file storage |
xor
Name |
Type |
Required |
Restrictions |
Description |
» anonymous |
DatasphereDataStreamer |
false |
|
Stream CSV data chunks from Datasphere using browser-datasphere |
continued
Name |
Type |
Required |
Restrictions |
Description |
maxExplanations |
integer |
true |
maximum: 100 minimum: 0
|
Number of explanations requested. Will be ordered by strength. |
maxNgramExplanations |
any |
false |
|
The maximum number of text ngram explanations to supply per row of the dataset. The default recommended maxNgramExplanations is all (no limit) |
oneOf
Name |
Type |
Required |
Restrictions |
Description |
» anonymous |
integer |
false |
minimum: 0
|
none |
xor
Name |
Type |
Required |
Restrictions |
Description |
» anonymous |
string |
false |
|
none |
continued
Name |
Type |
Required |
Restrictions |
Description |
modelId |
string |
false |
|
ID of leaderboard model which is used in job for processing predictions dataset |
modelPackageId |
string |
false |
|
ID of model package from registry is used in job for processing predictions dataset |
monitoringAggregation |
MonitoringAggregation |
false |
|
Defines the aggregation policy for monitoring jobs. |
monitoringBatchPrefix |
string,null |
false |
|
Name of the batch to create with this job |
monitoringColumns |
MonitoringColumnsMapping |
false |
|
Column names mapping for monitoring |
monitoringOutputSettings |
MonitoringOutputSettings |
false |
|
Output settings for monitoring jobs |
numConcurrent |
integer |
true |
minimum: 0
|
Number of simultaneous requests to run against the prediction instance |
outputSettings |
any |
false |
|
The response option configured for this job |
oneOf
Name |
Type |
Required |
Restrictions |
Description |
» anonymous |
AzureOutputAdaptor |
false |
|
Save CSV data chunks to Azure Blob Storage |
xor
Name |
Type |
Required |
Restrictions |
Description |
» anonymous |
GCPOutputAdaptor |
false |
|
Save CSV data chunks to Google Storage |
xor
Name |
Type |
Required |
Restrictions |
Description |
» anonymous |
BigQueryOutputAdaptor |
false |
|
Save CSV data chunks to Google BigQuery in bulk |
xor
Name |
Type |
Required |
Restrictions |
Description |
» anonymous |
S3OutputAdaptor |
false |
|
Saves CSV data chunks to Amazon Cloud Storage S3 |
xor
Name |
Type |
Required |
Restrictions |
Description |
» anonymous |
SnowflakeOutputAdaptor |
false |
|
Save CSV data chunks to Snowflake in bulk |
xor
Name |
Type |
Required |
Restrictions |
Description |
» anonymous |
SynapseOutputAdaptor |
false |
|
Save CSV data chunks to Azure Synapse in bulk |
xor
xor
Name |
Type |
Required |
Restrictions |
Description |
» anonymous |
HttpOutputAdaptor |
false |
|
Save CSV data chunks to HTTP data endpoint |
xor
Name |
Type |
Required |
Restrictions |
Description |
» anonymous |
JdbcOutputAdaptor |
false |
|
Save CSV data chunks via JDBC |
xor
Name |
Type |
Required |
Restrictions |
Description |
» anonymous |
LocalFileOutputAdaptor |
false |
|
Save CSV data chunks to local file storage |
xor
Name |
Type |
Required |
Restrictions |
Description |
» anonymous |
DatasphereOutputAdatpor |
false |
|
Saves CSV data chunks to Datasphere using browser-datasphere |
continued
Name |
Type |
Required |
Restrictions |
Description |
passthroughColumns |
[string] |
false |
maxItems: 100
|
Pass through columns from the original dataset |
passthroughColumnsSet |
string |
false |
|
Pass through all columns from the original dataset |
pinnedModelId |
string |
false |
|
Specify a model ID used for scoring |
predictionInstance |
BatchJobPredictionInstance |
false |
|
Override the default prediction instance from the deployment when scoring this job. |
predictionWarningEnabled |
boolean,null |
false |
|
Enable prediction warnings. |
redactedFields |
[string] |
true |
|
A list of qualified field names from intake- and/or outputSettings that was redacted due to permissions and sharing settings. For example: intakeSettings.dataStoreId |
skipDriftTracking |
boolean |
true |
|
Skip drift tracking for this job. |
thresholdHigh |
number |
false |
|
Compute explanations for predictions above this threshold |
thresholdLow |
number |
false |
|
Compute explanations for predictions below this threshold |
timeseriesSettings |
any |
false |
|
Time Series settings included of this job is a Time Series job. |
oneOf
xor
xor
Enumerated Values
Property |
Value |
batchJobType |
[monitoring , prediction ] |
anonymous |
[auto , fixed , dynamic ] |
explanationAlgorithm |
[shap , xemp ] |
anonymous |
all |
passthroughColumnsSet |
all |
BatchPredictionJobDefinitionResponse
{
"description": "The Batch Prediction Job Definition linking to this job, if any.",
"properties": {
"createdBy": {
"description": "The ID of creator of this job definition",
"type": "string"
},
"id": {
"description": "The ID of the Batch Prediction job definition",
"type": "string"
},
"name": {
"description": "A human-readable name for the definition, must be unique across organisations",
"type": "string"
}
},
"required": [
"createdBy",
"id",
"name"
],
"type": "object"
}
The Batch Prediction Job Definition linking to this job, if any.
Properties
Name |
Type |
Required |
Restrictions |
Description |
createdBy |
string |
true |
|
The ID of creator of this job definition |
id |
string |
true |
|
The ID of the Batch Prediction job definition |
name |
string |
true |
|
A human-readable name for the definition, must be unique across organisations |
BatchPredictionJobDefinitionsCreate
{
"properties": {
"abortOnError": {
"default": true,
"description": "Should this job abort if too many errors are encountered",
"type": "boolean"
},
"batchJobType": {
"default": "prediction",
"description": "Batch job type.",
"enum": [
"monitoring",
"prediction"
],
"type": "string",
"x-versionadded": "v2.35"
},
"chunkSize": {
"default": "auto",
"description": "Which strategy should be used to determine the chunk size. Can be either a named strategy or a fixed size in bytes.",
"oneOf": [
{
"enum": [
"auto",
"fixed",
"dynamic"
],
"type": "string"
},
{
"maximum": 41943040,
"minimum": 20,
"type": "integer"
}
]
},
"columnNamesRemapping": {
"description": "Remap (rename or remove columns from) the output from this job",
"oneOf": [
{
"description": "Provide a dictionary with key/value pairs to remap (deprecated)",
"type": "object"
},
{
"description": "Provide a list of items to remap",
"items": {
"properties": {
"inputName": {
"description": "Rename column with this name",
"type": "string"
},
"outputName": {
"description": "Rename column to this name (leave as null to remove from the output)",
"type": [
"string",
"null"
]
}
},
"required": [
"inputName",
"outputName"
],
"type": "object"
},
"maxItems": 1000,
"type": "array"
}
]
},
"csvSettings": {
"description": "The CSV settings used for this job",
"properties": {
"delimiter": {
"default": ",",
"description": "CSV fields are delimited by this character. Use the string \"tab\" to denote TSV (TAB separated values).",
"oneOf": [
{
"enum": [
"tab"
],
"type": "string"
},
{
"maxLength": 1,
"minLength": 1,
"type": "string"
}
]
},
"encoding": {
"default": "utf-8",
"description": "The encoding to be used for intake and output. For example (but not limited to): \"shift_jis\", \"latin_1\" or \"mskanji\".",
"type": "string"
},
"quotechar": {
"default": "\"",
"description": "Fields containing the delimiter or newlines must be quoted using this character.",
"maxLength": 1,
"minLength": 1,
"type": "string"
}
},
"required": [
"delimiter",
"encoding",
"quotechar"
],
"type": "object"
},
"deploymentId": {
"description": "ID of deployment which is used in job for processing predictions dataset",
"type": "string"
},
"disableRowLevelErrorHandling": {
"default": false,
"description": "Skip row by row error handling",
"type": "boolean"
},
"enabled": {
"description": "If this job definition is enabled as a scheduled job. Optional if no schedule is supplied.",
"type": "boolean"
},
"explanationAlgorithm": {
"description": "Which algorithm will be used to calculate prediction explanations",
"enum": [
"shap",
"xemp"
],
"type": "string"
},
"explanationClassNames": {
"description": "List of class names that will be explained for each row for multiclass. Mutually exclusive with explanationNumTopClasses. If neither specified - we assume explanationNumTopClasses=1",
"items": {
"description": "Class name to explain",
"type": "string"
},
"maxItems": 10,
"minItems": 1,
"type": "array",
"x-versionadded": "v2.29"
},
"explanationNumTopClasses": {
"description": "Number of top predicted classes for each row that will be explained for multiclass. Mutually exclusive with explanationClassNames. If neither specified - we assume explanationNumTopClasses=1",
"maximum": 10,
"minimum": 1,
"type": "integer",
"x-versionadded": "v2.29"
},
"includePredictionStatus": {
"default": false,
"description": "Include prediction status column in the output",
"type": "boolean"
},
"includeProbabilities": {
"default": true,
"description": "Include probabilities for all classes",
"type": "boolean"
},
"includeProbabilitiesClasses": {
"default": [],
"description": "Include only probabilities for these specific class names.",
"items": {
"description": "Include probability for this class name",
"type": "string"
},
"maxItems": 100,
"type": "array"
},
"intakeSettings": {
"default": {
"type": "localFile"
},
"description": "The intake option configured for this job",
"oneOf": [
{
"description": "Stream CSV data chunks from Azure",
"properties": {
"credentialId": {
"description": "Use the specified credential to access the url",
"type": [
"string",
"null"
]
},
"format": {
"default": "csv",
"description": "Type of input file format",
"enum": [
"csv",
"parquet"
],
"type": "string",
"x-versionadded": "v2.25"
},
"type": {
"description": "Type name for this intake type",
"enum": [
"azure"
],
"type": "string"
},
"url": {
"description": "URL for the CSV file",
"format": "url",
"type": "string"
}
},
"required": [
"type",
"url"
],
"type": "object"
},
{
"description": "Stream CSV data chunks from Big Query using GCS",
"properties": {
"bucket": {
"description": "The name of gcs bucket for data export",
"type": "string"
},
"credentialId": {
"description": "The ID of the GCP credentials",
"type": "string"
},
"dataset": {
"description": "The name of the specified big query dataset to read input data from",
"type": "string"
},
"table": {
"description": "The name of the specified big query table to read input data from",
"type": "string"
},
"type": {
"description": "Type name for this intake type",
"enum": [
"bigquery"
],
"type": "string"
}
},
"required": [
"bucket",
"credentialId",
"dataset",
"table",
"type"
],
"type": "object"
},
{
"description": "Stream CSV data chunks from data stage storage",
"properties": {
"dataStageId": {
"description": "The ID of the data stage",
"type": "string"
},
"type": {
"description": "Type name for this intake type",
"enum": [
"dataStage"
],
"type": "string"
}
},
"required": [
"dataStageId",
"type"
],
"type": "object"
},
{
"description": "Stream CSV data chunks from AI catalog dataset",
"properties": {
"datasetId": {
"description": "The ID of the AI catalog dataset",
"type": "string"
},
"datasetVersionId": {
"description": "The ID of the AI catalog dataset version",
"type": "string"
},
"type": {
"description": "Type name for this intake type",
"enum": [
"dataset"
],
"type": "string"
}
},
"required": [
"datasetId",
"type"
],
"type": "object"
},
{
"description": "Stream CSV data chunks from Datasphere using browser-datasphere",
"properties": {
"catalog": {
"description": "The name of the specified database catalog to read input data from.",
"type": "string"
},
"credentialId": {
"description": "The ID of the credential holding information about a user with read access to the data source.",
"type": "string"
},
"dataStoreId": {
"description": "ID of the data store to connect to",
"type": "string"
},
"schema": {
"description": "The name of the specified database schema to read input data from.",
"type": "string"
},
"table": {
"description": "The name of the specified database table to read input data from.",
"type": "string"
},
"type": {
"description": "Type name for this intake type",
"enum": [
"datasphere"
],
"type": "string"
}
},
"required": [
"credentialId",
"dataStoreId",
"schema",
"table",
"type"
],
"type": "object",
"x-versionadded": "v2.35"
},
{
"description": "Stream CSV data chunks from DSS dataset",
"properties": {
"datasetId": {
"description": "The ID of the dataset",
"type": "string"
},
"partition": {
"default": null,
"description": "Partition used to predict",
"enum": [
"holdout",
"validation",
"allBacktests",
null
],
"type": [
"string",
"null"
]
},
"projectId": {
"description": "The ID of the project",
"type": "string"
},
"type": {
"description": "Type name for this intake type",
"enum": [
"dss"
],
"type": "string"
}
},
"required": [
"projectId",
"type"
],
"type": "object"
},
{
"properties": {
"path": {
"description": "Path to data on host filesystem",
"type": "string"
},
"type": {
"description": "Type name for this intake type",
"enum": [
"filesystem"
],
"type": "string"
}
},
"required": [
"path",
"type"
],
"type": "object"
},
{
"description": "Stream CSV data chunks from Google Storage",
"properties": {
"credentialId": {
"description": "Use the specified credential to access the url",
"type": [
"string",
"null"
]
},
"format": {
"default": "csv",
"description": "Type of input file format",
"enum": [
"csv",
"parquet"
],
"type": "string",
"x-versionadded": "v2.25"
},
"type": {
"description": "Type name for this intake type",
"enum": [
"gcp"
],
"type": "string"
},
"url": {
"description": "URL for the CSV file",
"format": "url",
"type": "string"
}
},
"required": [
"type",
"url"
],
"type": "object"
},
{
"description": "Stream CSV data chunks from HTTP",
"properties": {
"type": {
"description": "Type name for this intake type",
"enum": [
"http"
],
"type": "string"
},
"url": {
"description": "URL for the CSV file",
"format": "url",
"type": "string"
}
},
"required": [
"type",
"url"
],
"type": "object"
},
{
"description": "Stream CSV data chunks from JDBC",
"properties": {
"catalog": {
"description": "The name of the specified database catalog to read input data from.",
"type": "string",
"x-versionadded": "v2.22"
},
"credentialId": {
"description": "The ID of the credential holding information about a user with read access to the JDBC data source.",
"type": [
"string",
"null"
]
},
"dataStoreId": {
"description": "ID of the data store to connect to",
"type": "string"
},
"fetchSize": {
"description": "A user specified fetch size. Changing it can be used to balance throughput and memory usage. Deprecated and ignored since v2.21.",
"maximum": 1000000,
"minimum": 1,
"type": "integer"
},
"query": {
"description": "A self-supplied SELECT statement of the dataset you wish to score. Helpful for supplying a more fine-grained selection of data not achievable through specification of \"table\" and/or \"schema\" parameters exclusively.If this job is executed with a job definition, then template variables are available which will be substituted for timestamps: {{ current_run_timestamp }}, {{ last_completed_run_time }}, {{ last_scheduled_run_time }}, {{ next_scheduled_run_time }}, {{ current_run_time }}",
"type": "string"
},
"schema": {
"description": "The name of the specified database schema to read input data from.",
"type": "string"
},
"table": {
"description": "The name of the specified database table to read input data from.",
"type": "string"
},
"type": {
"description": "Type name for this intake type",
"enum": [
"jdbc"
],
"type": "string"
}
},
"required": [
"dataStoreId",
"type"
],
"type": "object"
},
{
"description": "Stream CSV data chunks from local file storage",
"properties": {
"async": {
"description": "The default behavior (async: true) will still submit the job to the queue and start processing as soon as the upload is started.Setting it to false will postpone submitting the job to the queue until all data has been uploaded.This is helpful if the user is on a bad connection and bottlednecked by the upload speed. Instead of blocking the queue this will allow others to submit to the queue until the upload has finished.",
"type": [
"boolean",
"null"
],
"x-versionadded": "v2.28"
},
"multipart": {
"description": "specify if the data will be uploaded in multiple parts instead of a single file",
"type": "boolean",
"x-versionadded": "v2.27"
},
"type": {
"description": "Type name for this intake type",
"enum": [
"local_file",
"localFile"
],
"type": "string"
}
},
"required": [
"type"
],
"type": "object"
},
{
"description": "Stream CSV data chunks from Amazon Cloud Storage S3",
"properties": {
"credentialId": {
"description": "Use the specified credential to access the url",
"type": [
"string",
"null"
]
},
"endpointUrl": {
"description": "Endpoint URL for the S3 connection (omit to use the default)",
"format": "url",
"type": "string",
"x-versionadded": "v2.29"
},
"format": {
"default": "csv",
"description": "Type of input file format",
"enum": [
"csv",
"parquet"
],
"type": "string",
"x-versionadded": "v2.25"
},
"type": {
"description": "Type name for this intake type",
"enum": [
"s3"
],
"type": "string"
},
"url": {
"description": "URL for the CSV file",
"format": "url",
"type": "string"
}
},
"required": [
"type",
"url"
],
"type": "object"
},
{
"description": "Stream CSV data chunks from Snowflake",
"properties": {
"catalog": {
"description": "The name of the specified database catalog to read input data from.",
"type": "string",
"x-versionadded": "v2.28"
},
"cloudStorageCredentialId": {
"description": "The ID of the credential holding information about a user with read access to the cloud storage.",
"type": [
"string",
"null"
]
},
"cloudStorageType": {
"default": "s3",
"description": "Type name for cloud storage",
"enum": [
"azure",
"gcp",
"s3"
],
"type": "string"
},
"credentialId": {
"description": "The ID of the credential holding information about a user with read access to the Snowflake data source.",
"type": [
"string",
"null"
]
},
"dataStoreId": {
"description": "ID of the data store to connect to",
"type": "string"
},
"externalStage": {
"description": "External storage",
"type": "string"
},
"query": {
"description": "A self-supplied SELECT statement of the dataset you wish to score. Helpful for supplying a more fine-grained selection of data not achievable through specification of \"table\" and/or \"schema\" parameters exclusively.If this job is executed with a job definition, then template variables are available which will be substituted for timestamps: {{ current_run_timestamp }}, {{ last_completed_run_time }}, {{ last_scheduled_run_time }}, {{ next_scheduled_run_time }}, {{ current_run_time }}",
"type": "string"
},
"schema": {
"description": "The name of the specified database schema to read input data from.",
"type": "string"
},
"table": {
"description": "The name of the specified database table to read input data from.",
"type": "string"
},
"type": {
"description": "Type name for this intake type",
"enum": [
"snowflake"
],
"type": "string"
}
},
"required": [
"dataStoreId",
"externalStage",
"type"
],
"type": "object"
},
{
"description": "Stream CSV data chunks from Azure Synapse",
"properties": {
"cloudStorageCredentialId": {
"description": "The ID of the Azure credential holding information about a user with read access to the cloud storage.",
"type": [
"string",
"null"
]
},
"credentialId": {
"description": "The ID of the credential holding information about a user with read access to the JDBC data source.",
"type": [
"string",
"null"
]
},
"dataStoreId": {
"description": "ID of the data store to connect to",
"type": "string"
},
"externalDataSource": {
"description": "External datasource name",
"type": "string"
},
"query": {
"description": "A self-supplied SELECT statement of the dataset you wish to score. Helpful for supplying a more fine-grained selection of data not achievable through specification of \"table\" and/or \"schema\" parameters exclusively.If this job is executed with a job definition, then template variables are available which will be substituted for timestamps: {{ current_run_timestamp }}, {{ last_completed_run_time }}, {{ last_scheduled_run_time }}, {{ next_scheduled_run_time }}, {{ current_run_time }}",
"type": "string"
},
"schema": {
"description": "The name of the specified database schema to read input data from.",
"type": "string"
},
"table": {
"description": "The name of the specified database table to read input data from.",
"type": "string"
},
"type": {
"description": "Type name for this intake type",
"enum": [
"synapse"
],
"type": "string"
}
},
"required": [
"dataStoreId",
"externalDataSource",
"type"
],
"type": "object"
}
]
},
"maxExplanations": {
"default": 0,
"description": "Number of explanations requested. Will be ordered by strength.",
"maximum": 100,
"minimum": 0,
"type": "integer"
},
"modelId": {
"description": "ID of leaderboard model which is used in job for processing predictions dataset",
"type": "string",
"x-versionadded": "v2.28"
},
"modelPackageId": {
"description": "ID of model package from registry is used in job for processing predictions dataset",
"type": "string",
"x-versionadded": "v2.28"
},
"monitoringBatchPrefix": {
"description": "Name of the batch to create with this job",
"type": [
"string",
"null"
]
},
"name": {
"description": "A human-readable name for the definition, must be unique across organisations, if left out the backend will generate one for you.",
"maxLength": 100,
"minLength": 1,
"type": "string"
},
"numConcurrent": {
"description": "Number of simultaneous requests to run against the prediction instance",
"minimum": 1,
"type": "integer"
},
"outputSettings": {
"description": "The output option configured for this job",
"oneOf": [
{
"description": "Save CSV data chunks to Azure Blob Storage",
"properties": {
"credentialId": {
"description": "Use the specified credential to access the url",
"type": [
"string",
"null"
]
},
"format": {
"default": "csv",
"description": "Type of output file format",
"enum": [
"csv",
"parquet"
],
"type": "string",
"x-versionadded": "v2.25"
},
"partitionColumns": {
"description": "For Parquet directory-scoring only. The column names of the intake data of which to partition the dataset. Columns are partitioned in the order they are given. At least one value is required if scoring to a directory (meaning the output url ends with a slash (\"/\").",
"items": {
"type": "string"
},
"maxItems": 100,
"type": "array",
"x-versionadded": "v2.26"
},
"type": {
"description": "Type name for this output type",
"enum": [
"azure"
],
"type": "string"
},
"url": {
"description": "URL for the file or directory",
"format": "url",
"type": "string"
}
},
"required": [
"type",
"url"
],
"type": "object"
},
{
"description": "Save CSV data chunks to Google BigQuery in bulk",
"properties": {
"bucket": {
"description": "The name of gcs bucket for data loading",
"type": "string"
},
"credentialId": {
"description": "The ID of the GCP credentials",
"type": "string"
},
"dataset": {
"description": "The name of the specified big query dataset to write data back",
"type": "string"
},
"table": {
"description": "The name of the specified big query table to write data back",
"type": "string"
},
"type": {
"description": "Type name for this output type",
"enum": [
"bigquery"
],
"type": "string"
}
},
"required": [
"bucket",
"credentialId",
"dataset",
"table",
"type"
],
"type": "object"
},
{
"description": "Saves CSV data chunks to Datasphere using browser-datasphere",
"properties": {
"catalog": {
"description": "The name of the specified database catalog to read input data from.",
"type": "string"
},
"credentialId": {
"description": "The ID of the credential holding information about a user with write access to the data destination.",
"type": "string"
},
"dataStoreId": {
"description": "The ID of the data store to connect to",
"type": "string"
},
"schema": {
"description": "The name of the specified database schema to read input data from.",
"type": "string"
},
"table": {
"description": "The name of the specified database table to read input data from.",
"type": "string"
},
"type": {
"description": "The type name for this output type",
"enum": [
"datasphere"
],
"type": "string"
}
},
"required": [
"credentialId",
"dataStoreId",
"schema",
"table",
"type"
],
"type": "object",
"x-versionadded": "v2.35"
},
{
"properties": {
"path": {
"description": "Path to results on host filesystem",
"type": "string"
},
"type": {
"description": "Type name for this output type",
"enum": [
"filesystem"
],
"type": "string"
}
},
"required": [
"path",
"type"
],
"type": "object"
},
{
"description": "Save CSV data chunks to Google Storage",
"properties": {
"credentialId": {
"description": "Use the specified credential to access the url",
"type": [
"string",
"null"
]
},
"format": {
"default": "csv",
"description": "Type of input file format",
"enum": [
"csv",
"parquet"
],
"type": "string",
"x-versionadded": "v2.25"
},
"partitionColumns": {
"description": "For Parquet directory-scoring only. The column names of the intake data of which to partition the dataset. Columns are partitioned in the order they are given. At least one value is required if scoring to a directory (meaning the output url ends with a slash (\"/\").",
"items": {
"type": "string"
},
"maxItems": 100,
"type": "array",
"x-versionadded": "v2.26"
},
"type": {
"description": "Type name for this output type",
"enum": [
"gcp"
],
"type": "string"
},
"url": {
"description": "URL for the CSV file",
"format": "url",
"type": "string"
}
},
"required": [
"type",
"url"
],
"type": "object"
},
{
"description": "Save CSV data chunks to HTTP data endpoint",
"properties": {
"headers": {
"description": "Extra headers to send with the request",
"type": "object"
},
"method": {
"description": "Method to use when saving the CSV file",
"enum": [
"POST",
"PUT"
],
"type": "string"
},
"type": {
"description": "Type name for this output type",
"enum": [
"http"
],
"type": "string"
},
"url": {
"description": "URL for the CSV file",
"format": "url",
"type": "string"
}
},
"required": [
"method",
"type",
"url"
],
"type": "object"
},
{
"description": "Save CSV data chunks via JDBC",
"properties": {
"catalog": {
"description": "The name of the specified database catalog to write output data to.",
"type": "string",
"x-versionadded": "v2.22"
},
"commitInterval": {
"default": 600,
"description": "Defines a time interval in seconds between each commit is done to the JDBC source. If set to 0, the batch prediction operation will write the entire job before committing.",
"maximum": 86400,
"minimum": 0,
"type": "integer",
"x-versionadded": "v2.21"
},
"createTableIfNotExists": {
"default": false,
"description": "Attempt to create the table first if no existing one is detected, before writing data with the strategy defined in the `statementType` parameter.",
"type": "boolean",
"x-versionadded": "v2.24"
},
"credentialId": {
"description": "The ID of the credential holding information about a user with write access to the JDBC data source.",
"type": [
"string",
"null"
]
},
"dataStoreId": {
"description": "ID of the data store to connect to",
"type": "string"
},
"schema": {
"description": "The name of the specified database schema to write the results to.",
"type": "string"
},
"statementType": {
"description": "The statement type to use when writing the results. Deprecation Warning: Use of `create_table` is now discouraged. Use one of the other possibilities along with the parameter `createTableIfNotExists` set to `true`.",
"enum": [
"createTable",
"create_table",
"insert",
"insertUpdate",
"insert_update",
"update"
],
"type": "string"
},
"table": {
"description": "The name of the specified database table to write the results to.If this job is executed with a job definition, then template variables are available which will be substituted for timestamps: {{ current_run_timestamp }}, {{ last_completed_run_time }}, {{ last_scheduled_run_time }}, {{ next_scheduled_run_time }}, {{ current_run_time }}",
"type": "string"
},
"type": {
"description": "Type name for this intake type",
"enum": [
"jdbc"
],
"type": "string"
},
"updateColumns": {
"description": "The column names to be updated if statementType is set to either update or upsert.",
"items": {
"type": "string"
},
"maxItems": 100,
"type": "array"
},
"whereColumns": {
"description": "The column names to be used in the where clause if statementType is set to update or upsert.",
"items": {
"type": "string"
},
"maxItems": 100,
"type": "array"
}
},
"required": [
"dataStoreId",
"statementType",
"table",
"type"
],
"type": "object"
},
{
"description": "Save CSV data chunks to local file storage",
"properties": {
"type": {
"description": "Type name for this output type",
"enum": [
"local_file",
"localFile"
],
"type": "string"
}
},
"required": [
"type"
],
"type": "object"
},
{
"description": "Saves CSV data chunks to Amazon Cloud Storage S3",
"properties": {
"credentialId": {
"description": "Use the specified credential to access the url",
"type": [
"string",
"null"
]
},
"endpointUrl": {
"description": "Endpoint URL for the S3 connection (omit to use the default)",
"format": "url",
"type": "string",
"x-versionadded": "v2.29"
},
"format": {
"default": "csv",
"description": "Type of output file format",
"enum": [
"csv",
"parquet"
],
"type": "string",
"x-versionadded": "v2.25"
},
"partitionColumns": {
"description": "For Parquet directory-scoring only. The column names of the intake data of which to partition the dataset. Columns are partitioned in the order they are given. At least one value is required if scoring to a directory (meaning the output url ends with a slash (\"/\").",
"items": {
"type": "string"
},
"maxItems": 100,
"type": "array",
"x-versionadded": "v2.26"
},
"serverSideEncryption": {
"description": "Configure Server-Side Encryption for S3 output",
"properties": {
"algorithm": {
"description": "The server-side encryption algorithm used when storing this object in Amazon S3 (for example, AES256, aws:kms).",
"type": "string"
},
"customerAlgorithm": {
"description": "Specifies the algorithm to use to when encrypting the object (for example, AES256).",
"type": "string"
},
"customerKey": {
"description": "Specifies the customer-provided encryption key for Amazon S3 to use in encrypting data. This value is used to store the object and then it is discarded; Amazon S3 does not store the encryption key. The key must be appropriate for use with the algorithm specified in customerAlgorithm. The key must be sent as an base64 encoded string.",
"type": "string"
},
"kmsEncryptionContext": {
"description": "Specifies the Amazon Web Services KMS Encryption Context to use for object encryption. The value of this header is a base64-encoded UTF-8 string holding JSON with the encryption context key-value pairs.",
"type": "string"
},
"kmsKeyId": {
"description": "Specifies the ID of the symmetric customer managed key to use for object encryption.",
"type": "string"
}
},
"type": "object"
},
"type": {
"description": "Type name for this output type",
"enum": [
"s3"
],
"type": "string"
},
"url": {
"description": "URL for the CSV file",
"format": "url",
"type": "string"
}
},
"required": [
"type",
"url"
],
"type": "object"
},
{
"description": "Save CSV data chunks to Snowflake in bulk",
"properties": {
"catalog": {
"description": "The name of the specified database catalog to write output data to.",
"type": "string",
"x-versionadded": "v2.28"
},
"cloudStorageCredentialId": {
"description": "The ID of the credential holding information about a user with write access to the cloud storage.",
"type": [
"string",
"null"
]
},
"cloudStorageType": {
"default": "s3",
"description": "Type name for cloud storage",
"enum": [
"azure",
"gcp",
"s3"
],
"type": "string"
},
"createTableIfNotExists": {
"default": false,
"description": "Attempt to create the table first if no existing one is detected, before writing data with the strategy defined in the `statementType` parameter.",
"type": "boolean",
"x-versionadded": "v2.25"
},
"credentialId": {
"description": "The ID of the credential holding information about a user with write access to the Snowflake data source.",
"type": [
"string",
"null"
]
},
"dataStoreId": {
"description": "ID of the data store to connect to",
"type": "string"
},
"externalStage": {
"description": "External storage",
"type": "string"
},
"schema": {
"description": "The name of the specified database schema to write results to.",
"type": "string"
},
"statementType": {
"description": "The statement type to use when writing the results.",
"enum": [
"insert",
"create_table",
"createTable"
],
"type": "string"
},
"table": {
"description": "The name of the specified database table to write results to.",
"type": "string"
},
"type": {
"description": "Type name for this output type",
"enum": [
"snowflake"
],
"type": "string"
}
},
"required": [
"dataStoreId",
"externalStage",
"statementType",
"table",
"type"
],
"type": "object"
},
{
"description": "Save CSV data chunks to Azure Synapse in bulk",
"properties": {
"cloudStorageCredentialId": {
"description": "The ID of the credential holding information about a user with write access to the cloud storage.",
"type": [
"string",
"null"
]
},
"createTableIfNotExists": {
"default": false,
"description": "Attempt to create the table first if no existing one is detected, before writing data with the strategy defined in the `statementType` parameter.",
"type": "boolean",
"x-versionadded": "v2.25"
},
"credentialId": {
"description": "The ID of the credential holding information about a user with write access to the JDBC data source.",
"type": [
"string",
"null"
]
},
"dataStoreId": {
"description": "ID of the data store to connect to",
"type": "string"
},
"externalDataSource": {
"description": "External data source name",
"type": "string"
},
"schema": {
"description": "The name of the specified database schema to write results to.",
"type": "string"
},
"statementType": {
"description": "The statement type to use when writing the results.",
"enum": [
"insert",
"create_table",
"createTable"
],
"type": "string"
},
"table": {
"description": "The name of the specified database table to write results to.",
"type": "string"
},
"type": {
"description": "Type name for this output type",
"enum": [
"synapse"
],
"type": "string"
}
},
"required": [
"dataStoreId",
"externalDataSource",
"statementType",
"table",
"type"
],
"type": "object"
}
]
},
"passthroughColumns": {
"description": "Pass through columns from the original dataset",
"items": {
"description": "A column name from the original dataset to pass through to the resulting predictions",
"maxLength": 50,
"minLength": 1,
"type": "string"
},
"maxItems": 100,
"type": "array"
},
"passthroughColumnsSet": {
"description": "Pass through all columns from the original dataset",
"enum": [
"all"
],
"type": "string"
},
"pinnedModelId": {
"description": "Specify a model ID used for scoring",
"type": "string"
},
"predictionInstance": {
"description": "Override the default prediction instance from the deployment when scoring this job.",
"properties": {
"apiKey": {
"description": "By default, prediction requests will use the API key of the user that created the job. This allows you to make requests on behalf of other users.",
"type": "string"
},
"datarobotKey": {
"description": "If running a job against a prediction instance in the Managed AI Cloud, you must provide the organization level DataRobot-Key.",
"type": "string"
},
"hostName": {
"description": "Override the default host name of the deployment with this.",
"type": "string"
},
"sslEnabled": {
"default": true,
"description": "Use SSL (HTTPS) when communicating with the overriden prediction server.",
"type": "boolean"
}
},
"required": [
"hostName",
"sslEnabled"
],
"type": "object"
},
"predictionThreshold": {
"description": "Threshold is the point that sets the class boundary for a predicted value. The model classifies an observation below the threshold as FALSE, and an observation above the threshold as TRUE. In other words, DataRobot automatically assigns the positive class label to any prediction exceeding the threshold. This value can be set between 0.0 and 1.0.",
"maximum": 1,
"minimum": 0,
"type": "number"
},
"predictionWarningEnabled": {
"description": "Enable prediction warnings.",
"type": [
"boolean",
"null"
]
},
"schedule": {
"description": "The scheduling information defining how often and when to execute this job to the Job Scheduling service. Optional if enabled = False.",
"properties": {
"dayOfMonth": {
"description": "The date(s) of the month that the job will run. Allowed values are either ``[1 ... 31]`` or ``[\"*\"]`` for all days of the month. This field is additive with ``dayOfWeek``, meaning the job will run both on the date(s) defined in this field and the day specified by ``dayOfWeek`` (for example, dates 1st, 2nd, 3rd, plus every Tuesday). If ``dayOfMonth`` is set to ``[\"*\"]`` and ``dayOfWeek`` is defined, the scheduler will trigger on every day of the month that matches ``dayOfWeek`` (for example, Tuesday the 2nd, 9th, 16th, 23rd, 30th). Invalid dates such as February 31st are ignored.",
"items": {
"enum": [
"*",
1,
2,
3,
4,
5,
6,
7,
8,
9,
10,
11,
12,
13,
14,
15,
16,
17,
18,
19,
20,
21,
22,
23,
24,
25,
26,
27,
28,
29,
30,
31
],
"type": [
"number",
"string"
]
},
"maxItems": 31,
"type": "array"
},
"dayOfWeek": {
"description": "The day(s) of the week that the job will run. Allowed values are ``[0 .. 6]``, where (Sunday=0), or ``[\"*\"]``, for all days of the week. Strings, either 3-letter abbreviations or the full name of the day, can be used interchangeably (e.g., \"sunday\", \"Sunday\", \"sun\", or \"Sun\", all map to ``[0]``. This field is additive with ``dayOfMonth``, meaning the job will run both on the date specified by ``dayOfMonth`` and the day defined in this field.",
"items": {
"enum": [
"*",
0,
1,
2,
3,
4,
5,
6,
"sunday",
"SUNDAY",
"Sunday",
"monday",
"MONDAY",
"Monday",
"tuesday",
"TUESDAY",
"Tuesday",
"wednesday",
"WEDNESDAY",
"Wednesday",
"thursday",
"THURSDAY",
"Thursday",
"friday",
"FRIDAY",
"Friday",
"saturday",
"SATURDAY",
"Saturday",
"sun",
"SUN",
"Sun",
"mon",
"MON",
"Mon",
"tue",
"TUE",
"Tue",
"wed",
"WED",
"Wed",
"thu",
"THU",
"Thu",
"fri",
"FRI",
"Fri",
"sat",
"SAT",
"Sat"
],
"type": [
"number",
"string"
]
},
"maxItems": 7,
"type": "array"
},
"hour": {
"description": "The hour(s) of the day that the job will run. Allowed values are either ``[\"*\"]`` meaning every hour of the day or ``[0 ... 23]``.",
"items": {
"enum": [
"*",
0,
1,
2,
3,
4,
5,
6,
7,
8,
9,
10,
11,
12,
13,
14,
15,
16,
17,
18,
19,
20,
21,
22,
23
],
"type": [
"number",
"string"
]
},
"maxItems": 24,
"type": "array"
},
"minute": {
"description": "The minute(s) of the day that the job will run. Allowed values are either ``[\"*\"]`` meaning every minute of the day or``[0 ... 59]``.",
"items": {
"enum": [
"*",
0,
1,
2,
3,
4,
5,
6,
7,
8,
9,
10,
11,
12,
13,
14,
15,
16,
17,
18,
19,
20,
21,
22,
23,
24,
25,
26,
27,
28,
29,
30,
31,
32,
33,
34,
35,
36,
37,
38,
39,
40,
41,
42,
43,
44,
45,
46,
47,
48,
49,
50,
51,
52,
53,
54,
55,
56,
57,
58,
59
],
"type": [
"number",
"string"
]
},
"maxItems": 60,
"type": "array"
},
"month": {
"description": "The month(s) of the year that the job will run. Allowed values are either ``[1 ... 12]`` or ``[\"*\"]`` for all months of the year. Strings, either 3-letter abbreviations or the full name of the month, can be used interchangeably (e.g., \"jan\" or \"october\"). Months that are not compatible with ``dayOfMonth`` are ignored, for example ``{\"dayOfMonth\": [31], \"month\":[\"feb\"]}``.",
"items": {
"enum": [
"*",
1,
2,
3,
4,
5,
6,
7,
8,
9,
10,
11,
12,
"january",
"JANUARY",
"January",
"february",
"FEBRUARY",
"February",
"march",
"MARCH",
"March",
"april",
"APRIL",
"April",
"may",
"MAY",
"May",
"june",
"JUNE",
"June",
"july",
"JULY",
"July",
"august",
"AUGUST",
"August",
"september",
"SEPTEMBER",
"September",
"october",
"OCTOBER",
"October",
"november",
"NOVEMBER",
"November",
"december",
"DECEMBER",
"December",
"jan",
"JAN",
"Jan",
"feb",
"FEB",
"Feb",
"mar",
"MAR",
"Mar",
"apr",
"APR",
"Apr",
"jun",
"JUN",
"Jun",
"jul",
"JUL",
"Jul",
"aug",
"AUG",
"Aug",
"sep",
"SEP",
"Sep",
"oct",
"OCT",
"Oct",
"nov",
"NOV",
"Nov",
"dec",
"DEC",
"Dec"
],
"type": [
"number",
"string"
]
},
"maxItems": 12,
"type": "array"
}
},
"required": [
"dayOfMonth",
"dayOfWeek",
"hour",
"minute",
"month"
],
"type": "object"
},
"secondaryDatasetsConfigId": {
"description": "Configuration id for secondary datasets to use when making a prediction.",
"type": "string",
"x-versionadded": "v2.33"
},
"skipDriftTracking": {
"default": false,
"description": "Skip drift tracking for this job.",
"type": "boolean"
},
"thresholdHigh": {
"description": "Compute explanations for predictions above this threshold",
"type": "number"
},
"thresholdLow": {
"description": "Compute explanations for predictions below this threshold",
"type": "number"
},
"timeseriesSettings": {
"description": "Time Series settings included of this job is a Time Series job.",
"oneOf": [
{
"properties": {
"forecastPoint": {
"description": "Used for forecast predictions in order to override the inferred forecast point from the dataset.",
"format": "date-time",
"type": "string"
},
"relaxKnownInAdvanceFeaturesCheck": {
"default": false,
"description": "If activated, missing values in the known in advance features are allowed in the forecast window at prediction time. If omitted or false, missing values are not allowed.",
"type": "boolean"
},
"type": {
"description": "Forecast mode makes predictions using forecastPoint or rows in the dataset without target.",
"enum": [
"forecast"
],
"type": "string"
}
},
"required": [
"type"
],
"type": "object"
},
{
"properties": {
"forecastPointPolicy": {
"description": "Forecast point policy",
"properties": {
"configuration": {
"description": "Customize if forecast point based on job run time needs to be shifted.",
"properties": {
"offset": {
"description": "Offset to apply to scheduled run time of the job in a ISO-8601 format toobtain a relative forecast point. Example of the positive offset 'P2DT5H3M', example of the negative offset '-P2DT5H4M'",
"format": "offset",
"type": "string"
}
},
"required": [
"offset"
],
"type": "object"
},
"type": {
"description": "Type of the forecast point policy. Forecast point will be based on the scheduled run time of the job or the current moment in UTC if job was launched manually. Run time can be adjusted backwards or forwards.",
"enum": [
"jobRunTimeBased"
],
"type": "string"
}
},
"required": [
"type"
],
"type": "object"
},
"relaxKnownInAdvanceFeaturesCheck": {
"default": false,
"description": "If activated, missing values in the known in advance features are allowed in the forecast window at prediction time. If omitted or false, missing values are not allowed.",
"type": "boolean"
},
"type": {
"description": "Forecast mode makes predictions using forecastPoint or rows in the dataset without target.",
"enum": [
"forecast"
],
"type": "string"
}
},
"required": [
"forecastPointPolicy",
"type"
],
"type": "object"
},
{
"properties": {
"predictionsEndDate": {
"description": "Used for historical predictions in order to override date to which predictions should be calculated. By default value will be inferred automatically from the dataset.",
"format": "date-time",
"type": "string"
},
"predictionsStartDate": {
"description": "Used for historical predictions in order to override date from which predictions should be calculated. By default value will be inferred automatically from the dataset.",
"format": "date-time",
"type": "string"
},
"relaxKnownInAdvanceFeaturesCheck": {
"default": false,
"description": "If activated, missing values in the known in advance features are allowed in the forecast window at prediction time. If omitted or false, missing values are not allowed.",
"type": "boolean"
},
"type": {
"description": "Historical mode enables bulk predictions which calculates predictions for all possible forecast points and forecast distances in the dataset within the predictionsStartDate/predictionsEndDate range.",
"enum": [
"historical"
],
"type": "string"
}
},
"required": [
"type"
],
"type": "object"
}
],
"x-versionadded": "v2.20"
}
},
"required": [
"abortOnError",
"csvSettings",
"deploymentId",
"disableRowLevelErrorHandling",
"includePredictionStatus",
"includeProbabilities",
"includeProbabilitiesClasses",
"intakeSettings",
"maxExplanations",
"skipDriftTracking"
],
"type": "object"
}
Properties
Name |
Type |
Required |
Restrictions |
Description |
abortOnError |
boolean |
true |
|
Should this job abort if too many errors are encountered |
batchJobType |
string |
false |
|
Batch job type. |
chunkSize |
any |
false |
|
Which strategy should be used to determine the chunk size. Can be either a named strategy or a fixed size in bytes. |
oneOf
Name |
Type |
Required |
Restrictions |
Description |
» anonymous |
string |
false |
|
none |
xor
Name |
Type |
Required |
Restrictions |
Description |
» anonymous |
integer |
false |
maximum: 41943040 minimum: 20
|
none |
continued
Name |
Type |
Required |
Restrictions |
Description |
columnNamesRemapping |
any |
false |
|
Remap (rename or remove columns from) the output from this job |
oneOf
Name |
Type |
Required |
Restrictions |
Description |
» anonymous |
object |
false |
|
Provide a dictionary with key/value pairs to remap (deprecated) |
xor
Name |
Type |
Required |
Restrictions |
Description |
» anonymous |
[BatchPredictionJobRemapping] |
false |
maxItems: 1000
|
Provide a list of items to remap |
continued
Name |
Type |
Required |
Restrictions |
Description |
csvSettings |
BatchPredictionJobCSVSettings |
true |
|
The CSV settings used for this job |
deploymentId |
string |
true |
|
ID of deployment which is used in job for processing predictions dataset |
disableRowLevelErrorHandling |
boolean |
true |
|
Skip row by row error handling |
enabled |
boolean |
false |
|
If this job definition is enabled as a scheduled job. Optional if no schedule is supplied. |
explanationAlgorithm |
string |
false |
|
Which algorithm will be used to calculate prediction explanations |
explanationClassNames |
[string] |
false |
maxItems: 10 minItems: 1
|
List of class names that will be explained for each row for multiclass. Mutually exclusive with explanationNumTopClasses. If neither specified - we assume explanationNumTopClasses=1 |
explanationNumTopClasses |
integer |
false |
maximum: 10 minimum: 1
|
Number of top predicted classes for each row that will be explained for multiclass. Mutually exclusive with explanationClassNames. If neither specified - we assume explanationNumTopClasses=1 |
includePredictionStatus |
boolean |
true |
|
Include prediction status column in the output |
includeProbabilities |
boolean |
true |
|
Include probabilities for all classes |
includeProbabilitiesClasses |
[string] |
true |
maxItems: 100
|
Include only probabilities for these specific class names. |
intakeSettings |
any |
true |
|
The intake option configured for this job |
oneOf
Name |
Type |
Required |
Restrictions |
Description |
» anonymous |
AzureIntake |
false |
|
Stream CSV data chunks from Azure |
xor
Name |
Type |
Required |
Restrictions |
Description |
» anonymous |
BigQueryIntake |
false |
|
Stream CSV data chunks from Big Query using GCS |
xor
Name |
Type |
Required |
Restrictions |
Description |
» anonymous |
DataStageIntake |
false |
|
Stream CSV data chunks from data stage storage |
xor
Name |
Type |
Required |
Restrictions |
Description |
» anonymous |
Catalog |
false |
|
Stream CSV data chunks from AI catalog dataset |
xor
Name |
Type |
Required |
Restrictions |
Description |
» anonymous |
DatasphereIntake |
false |
|
Stream CSV data chunks from Datasphere using browser-datasphere |
xor
Name |
Type |
Required |
Restrictions |
Description |
» anonymous |
DSS |
false |
|
Stream CSV data chunks from DSS dataset |
xor
Name |
Type |
Required |
Restrictions |
Description |
» anonymous |
FileSystemIntake |
false |
|
none |
xor
Name |
Type |
Required |
Restrictions |
Description |
» anonymous |
GCPIntake |
false |
|
Stream CSV data chunks from Google Storage |
xor
Name |
Type |
Required |
Restrictions |
Description |
» anonymous |
HTTPIntake |
false |
|
Stream CSV data chunks from HTTP |
xor
Name |
Type |
Required |
Restrictions |
Description |
» anonymous |
JDBCIntake |
false |
|
Stream CSV data chunks from JDBC |
xor
Name |
Type |
Required |
Restrictions |
Description |
» anonymous |
LocalFileIntake |
false |
|
Stream CSV data chunks from local file storage |
xor
Name |
Type |
Required |
Restrictions |
Description |
» anonymous |
S3Intake |
false |
|
Stream CSV data chunks from Amazon Cloud Storage S3 |
xor
Name |
Type |
Required |
Restrictions |
Description |
» anonymous |
SnowflakeIntake |
false |
|
Stream CSV data chunks from Snowflake |
xor
Name |
Type |
Required |
Restrictions |
Description |
» anonymous |
SynapseIntake |
false |
|
Stream CSV data chunks from Azure Synapse |
continued
Name |
Type |
Required |
Restrictions |
Description |
maxExplanations |
integer |
true |
maximum: 100 minimum: 0
|
Number of explanations requested. Will be ordered by strength. |
modelId |
string |
false |
|
ID of leaderboard model which is used in job for processing predictions dataset |
modelPackageId |
string |
false |
|
ID of model package from registry is used in job for processing predictions dataset |
monitoringBatchPrefix |
string,null |
false |
|
Name of the batch to create with this job |
name |
string |
false |
maxLength: 100 minLength: 1 minLength: 1
|
A human-readable name for the definition, must be unique across organisations, if left out the backend will generate one for you. |
numConcurrent |
integer |
false |
minimum: 1
|
Number of simultaneous requests to run against the prediction instance |
outputSettings |
any |
false |
|
The output option configured for this job |
oneOf
Name |
Type |
Required |
Restrictions |
Description |
» anonymous |
AzureOutput |
false |
|
Save CSV data chunks to Azure Blob Storage |
xor
Name |
Type |
Required |
Restrictions |
Description |
» anonymous |
BigQueryOutput |
false |
|
Save CSV data chunks to Google BigQuery in bulk |
xor
Name |
Type |
Required |
Restrictions |
Description |
» anonymous |
DatasphereOutput |
false |
|
Saves CSV data chunks to Datasphere using browser-datasphere |
xor
Name |
Type |
Required |
Restrictions |
Description |
» anonymous |
FileSystemOutput |
false |
|
none |
xor
Name |
Type |
Required |
Restrictions |
Description |
» anonymous |
GCPOutput |
false |
|
Save CSV data chunks to Google Storage |
xor
Name |
Type |
Required |
Restrictions |
Description |
» anonymous |
HTTPOutput |
false |
|
Save CSV data chunks to HTTP data endpoint |
xor
Name |
Type |
Required |
Restrictions |
Description |
» anonymous |
JDBCOutput |
false |
|
Save CSV data chunks via JDBC |
xor
Name |
Type |
Required |
Restrictions |
Description |
» anonymous |
LocalFileOutput |
false |
|
Save CSV data chunks to local file storage |
xor
Name |
Type |
Required |
Restrictions |
Description |
» anonymous |
S3Output |
false |
|
Saves CSV data chunks to Amazon Cloud Storage S3 |
xor
Name |
Type |
Required |
Restrictions |
Description |
» anonymous |
SnowflakeOutput |
false |
|
Save CSV data chunks to Snowflake in bulk |
xor
Name |
Type |
Required |
Restrictions |
Description |
» anonymous |
SynapseOutput |
false |
|
Save CSV data chunks to Azure Synapse in bulk |
continued
Name |
Type |
Required |
Restrictions |
Description |
passthroughColumns |
[string] |
false |
maxItems: 100
|
Pass through columns from the original dataset |
passthroughColumnsSet |
string |
false |
|
Pass through all columns from the original dataset |
pinnedModelId |
string |
false |
|
Specify a model ID used for scoring |
predictionInstance |
BatchPredictionJobPredictionInstance |
false |
|
Override the default prediction instance from the deployment when scoring this job. |
predictionThreshold |
number |
false |
maximum: 1 minimum: 0
|
Threshold is the point that sets the class boundary for a predicted value. The model classifies an observation below the threshold as FALSE, and an observation above the threshold as TRUE. In other words, DataRobot automatically assigns the positive class label to any prediction exceeding the threshold. This value can be set between 0.0 and 1.0. |
predictionWarningEnabled |
boolean,null |
false |
|
Enable prediction warnings. |
schedule |
Schedule |
false |
|
The scheduling information defining how often and when to execute this job to the Job Scheduling service. Optional if enabled = False. |
secondaryDatasetsConfigId |
string |
false |
|
Configuration id for secondary datasets to use when making a prediction. |
skipDriftTracking |
boolean |
true |
|
Skip drift tracking for this job. |
thresholdHigh |
number |
false |
|
Compute explanations for predictions above this threshold |
thresholdLow |
number |
false |
|
Compute explanations for predictions below this threshold |
timeseriesSettings |
any |
false |
|
Time Series settings included of this job is a Time Series job. |
oneOf
xor
xor
Enumerated Values
Property |
Value |
batchJobType |
[monitoring , prediction ] |
anonymous |
[auto , fixed , dynamic ] |
explanationAlgorithm |
[shap , xemp ] |
passthroughColumnsSet |
all |
BatchPredictionJobDefinitionsListResponse
{
"properties": {
"count": {
"description": "Number of items returned on this page.",
"type": "integer"
},
"data": {
"description": "An array of scheduled jobs",
"items": {
"properties": {
"batchPredictionJob": {
"description": "The Batch Prediction Job specification to be put on the queue in intervals",
"properties": {
"abortOnError": {
"default": true,
"description": "Should this job abort if too many errors are encountered",
"type": "boolean"
},
"batchJobType": {
"description": "Batch job type.",
"enum": [
"monitoring",
"prediction"
],
"type": "string",
"x-versionadded": "v2.30"
},
"chunkSize": {
"default": "auto",
"description": "Which strategy should be used to determine the chunk size. Can be either a named strategy or a fixed size in bytes.",
"oneOf": [
{
"enum": [
"auto",
"fixed",
"dynamic"
],
"type": "string"
},
{
"maximum": 41943040,
"minimum": 20,
"type": "integer"
}
]
},
"columnNamesRemapping": {
"description": "Remap (rename or remove columns from) the output from this job",
"oneOf": [
{
"description": "Provide a dictionary with key/value pairs to remap (deprecated)",
"type": "object"
},
{
"description": "Provide a list of items to remap",
"items": {
"properties": {
"inputName": {
"description": "Rename column with this name",
"type": "string"
},
"outputName": {
"description": "Rename column to this name (leave as null to remove from the output)",
"type": [
"string",
"null"
]
}
},
"required": [
"inputName",
"outputName"
],
"type": "object"
},
"maxItems": 1000,
"type": "array"
}
]
},
"csvSettings": {
"description": "The CSV settings used for this job",
"properties": {
"delimiter": {
"default": ",",
"description": "CSV fields are delimited by this character. Use the string \"tab\" to denote TSV (TAB separated values).",
"oneOf": [
{
"enum": [
"tab"
],
"type": "string"
},
{
"maxLength": 1,
"minLength": 1,
"type": "string"
}
]
},
"encoding": {
"default": "utf-8",
"description": "The encoding to be used for intake and output. For example (but not limited to): \"shift_jis\", \"latin_1\" or \"mskanji\".",
"type": "string"
},
"quotechar": {
"default": "\"",
"description": "Fields containing the delimiter or newlines must be quoted using this character.",
"maxLength": 1,
"minLength": 1,
"type": "string"
}
},
"required": [
"delimiter",
"encoding",
"quotechar"
],
"type": "object"
},
"deploymentId": {
"description": "ID of deployment which is used in job for processing predictions dataset",
"type": "string"
},
"disableRowLevelErrorHandling": {
"default": false,
"description": "Skip row by row error handling",
"type": "boolean"
},
"explanationAlgorithm": {
"description": "Which algorithm will be used to calculate prediction explanations",
"enum": [
"shap",
"xemp"
],
"type": "string"
},
"explanationClassNames": {
"description": "List of class names that will be explained for each row for multiclass. Mutually exclusive with explanationNumTopClasses. If neither specified - we assume explanationNumTopClasses=1",
"items": {
"description": "Class name to explain",
"type": "string"
},
"maxItems": 10,
"minItems": 1,
"type": "array",
"x-versionadded": "v2.30"
},
"explanationNumTopClasses": {
"description": "Number of top predicted classes for each row that will be explained for multiclass. Mutually exclusive with explanationClassNames. If neither specified - we assume explanationNumTopClasses=1",
"maximum": 10,
"minimum": 1,
"type": "integer",
"x-versionadded": "v2.30"
},
"includePredictionStatus": {
"default": false,
"description": "Include prediction status column in the output",
"type": "boolean"
},
"includeProbabilities": {
"default": true,
"description": "Include probabilities for all classes",
"type": "boolean"
},
"includeProbabilitiesClasses": {
"default": [],
"description": "Include only probabilities for these specific class names.",
"items": {
"description": "Include probability for this class name",
"type": "string"
},
"maxItems": 100,
"type": "array"
},
"intakeSettings": {
"default": {
"type": "localFile"
},
"description": "The response option configured for this job",
"oneOf": [
{
"description": "Stream CSV data chunks from Azure",
"properties": {
"credentialId": {
"description": "Either the populated value of the field or [redacted] due to permission settings",
"oneOf": [
{
"description": "Use the specified credential to access the url",
"type": [
"string",
"null"
]
},
{
"enum": [
"[redacted]"
],
"type": "string"
}
]
},
"format": {
"default": "csv",
"description": "Type of input file format",
"enum": [
"csv",
"parquet"
],
"type": "string",
"x-versionadded": "v2.25"
},
"type": {
"description": "Type name for this intake type",
"enum": [
"azure"
],
"type": "string"
},
"url": {
"description": "URL for the CSV file",
"format": "url",
"type": "string"
}
},
"required": [
"type",
"url"
],
"type": "object"
},
{
"description": "Stream CSV data chunks from data stage storage",
"properties": {
"dataStageId": {
"description": "The ID of the data stage",
"type": "string"
},
"type": {
"description": "Type name for this intake type",
"enum": [
"dataStage"
],
"type": "string"
}
},
"required": [
"dataStageId",
"type"
],
"type": "object"
},
{
"description": "Stream CSV data chunks from AI catalog dataset",
"properties": {
"datasetId": {
"description": "Either the populated value of the field or [redacted] due to permission settings",
"oneOf": [
{
"description": "The ID of the AI catalog dataset",
"type": "string"
},
{
"enum": [
"[redacted]"
],
"type": "string"
}
]
},
"datasetVersionId": {
"description": "The ID of the AI catalog dataset version",
"type": "string"
},
"type": {
"description": "Type name for this intake type",
"enum": [
"dataset"
],
"type": "string"
}
},
"required": [
"datasetId",
"type"
],
"type": "object"
},
{
"description": "Stream CSV data chunks from Google Storage",
"properties": {
"credentialId": {
"description": "Either the populated value of the field or [redacted] due to permission settings",
"oneOf": [
{
"description": "Use the specified credential to access the url",
"type": [
"string",
"null"
]
},
{
"enum": [
"[redacted]"
],
"type": "string"
}
]
},
"format": {
"default": "csv",
"description": "Type of input file format",
"enum": [
"csv",
"parquet"
],
"type": "string",
"x-versionadded": "v2.25"
},
"type": {
"description": "Type name for this intake type",
"enum": [
"gcp"
],
"type": "string"
},
"url": {
"description": "URL for the CSV file",
"format": "url",
"type": "string"
}
},
"required": [
"type",
"url"
],
"type": "object"
},
{
"description": "Stream CSV data chunks from Big Query using GCS",
"properties": {
"bucket": {
"description": "The name of gcs bucket for data export",
"type": "string"
},
"credentialId": {
"description": "Either the populated value of the field or [redacted] due to permission settings",
"oneOf": [
{
"description": "The ID of the GCP credentials",
"type": "string"
},
{
"enum": [
"[redacted]"
],
"type": "string"
}
]
},
"dataset": {
"description": "The name of the specified big query dataset to read input data from",
"type": "string"
},
"table": {
"description": "The name of the specified big query table to read input data from",
"type": "string"
},
"type": {
"description": "Type name for this intake type",
"enum": [
"bigquery"
],
"type": "string"
}
},
"required": [
"bucket",
"credentialId",
"dataset",
"table",
"type"
],
"type": "object"
},
{
"description": "Stream CSV data chunks from Amazon Cloud Storage S3",
"properties": {
"credentialId": {
"description": "Either the populated value of the field or [redacted] due to permission settings",
"oneOf": [
{
"description": "Use the specified credential to access the url",
"type": [
"string",
"null"
]
},
{
"enum": [
"[redacted]"
],
"type": "string"
}
]
},
"endpointUrl": {
"description": "Endpoint URL for the S3 connection (omit to use the default)",
"format": "url",
"type": "string",
"x-versionadded": "v2.29"
},
"format": {
"default": "csv",
"description": "Type of input file format",
"enum": [
"csv",
"parquet"
],
"type": "string",
"x-versionadded": "v2.25"
},
"type": {
"description": "Type name for this intake type",
"enum": [
"s3"
],
"type": "string"
},
"url": {
"description": "URL for the CSV file",
"format": "url",
"type": "string"
}
},
"required": [
"type",
"url"
],
"type": "object"
},
{
"description": "Stream CSV data chunks from Snowflake",
"properties": {
"catalog": {
"description": "The name of the specified database catalog to read input data from.",
"type": "string",
"x-versionadded": "v2.28"
},
"cloudStorageCredentialId": {
"description": "Either the populated value of the field or [redacted] due to permission settings",
"oneOf": [
{
"description": "The ID of the credential holding information about a user with read access to the cloud storage.",
"type": [
"string",
"null"
]
},
{
"enum": [
"[redacted]"
],
"type": "string"
}
]
},
"cloudStorageType": {
"default": "s3",
"description": "Type name for cloud storage",
"enum": [
"azure",
"gcp",
"s3"
],
"type": "string"
},
"credentialId": {
"description": "Either the populated value of the field or [redacted] due to permission settings",
"oneOf": [
{
"description": "The ID of the credential holding information about a user with read access to the Snowflake data source.",
"type": [
"string",
"null"
]
},
{
"enum": [
"[redacted]"
],
"type": "string"
}
]
},
"dataStoreId": {
"description": "Either the populated value of the field or [redacted] due to permission settings",
"oneOf": [
{
"description": "ID of the data store to connect to",
"type": "string"
},
{
"enum": [
"[redacted]"
],
"type": "string"
}
]
},
"externalStage": {
"description": "External storage",
"type": "string"
},
"query": {
"description": "A self-supplied SELECT statement of the dataset you wish to score. Helpful for supplying a more fine-grained selection of data not achievable through specification of \"table\" and/or \"schema\" parameters exclusively.If this job is executed with a job definition, then template variables are available which will be substituted for timestamps: {{ current_run_timestamp }}, {{ last_completed_run_time }}, {{ last_scheduled_run_time }}, {{ next_scheduled_run_time }}, {{ current_run_time }}",
"type": "string"
},
"schema": {
"description": "The name of the specified database schema to read input data from.",
"type": "string"
},
"table": {
"description": "The name of the specified database table to read input data from.",
"type": "string"
},
"type": {
"description": "Type name for this intake type",
"enum": [
"snowflake"
],
"type": "string"
}
},
"required": [
"dataStoreId",
"externalStage",
"type"
],
"type": "object"
},
{
"description": "Stream CSV data chunks from Azure Synapse",
"properties": {
"cloudStorageCredentialId": {
"description": "Either the populated value of the field or [redacted] due to permission settings",
"oneOf": [
{
"description": "The ID of the Azure credential holding information about a user with read access to the cloud storage.",
"type": [
"string",
"null"
]
},
{
"enum": [
"[redacted]"
],
"type": "string"
}
]
},
"credentialId": {
"description": "Either the populated value of the field or [redacted] due to permission settings",
"oneOf": [
{
"description": "The ID of the credential holding information about a user with read access to the JDBC data source.",
"type": [
"string",
"null"
]
},
{
"enum": [
"[redacted]"
],
"type": "string"
}
]
},
"dataStoreId": {
"description": "Either the populated value of the field or [redacted] due to permission settings",
"oneOf": [
{
"description": "ID of the data store to connect to",
"type": "string"
},
{
"enum": [
"[redacted]"
],
"type": "string"
}
]
},
"externalDataSource": {
"description": "External datasource name",
"type": "string"
},
"query": {
"description": "A self-supplied SELECT statement of the dataset you wish to score. Helpful for supplying a more fine-grained selection of data not achievable through specification of \"table\" and/or \"schema\" parameters exclusively.If this job is executed with a job definition, then template variables are available which will be substituted for timestamps: {{ current_run_timestamp }}, {{ last_completed_run_time }}, {{ last_scheduled_run_time }}, {{ next_scheduled_run_time }}, {{ current_run_time }}",
"type": "string"
},
"schema": {
"description": "The name of the specified database schema to read input data from.",
"type": "string"
},
"table": {
"description": "The name of the specified database table to read input data from.",
"type": "string"
},
"type": {
"description": "Type name for this intake type",
"enum": [
"synapse"
],
"type": "string"
}
},
"required": [
"dataStoreId",
"externalDataSource",
"type"
],
"type": "object"
},
{
"description": "Stream CSV data chunks from DSS dataset",
"properties": {
"datasetId": {
"description": "Either the populated value of the field or [redacted] due to permission settings",
"oneOf": [
{
"description": "The ID of the dataset",
"type": "string"
},
{
"enum": [
"[redacted]"
],
"type": "string"
}
]
},
"partition": {
"default": null,
"description": "Partition used to predict",
"enum": [
"holdout",
"validation",
"allBacktests",
null
],
"type": [
"string",
"null"
]
},
"projectId": {
"description": "The ID of the project",
"type": "string"
},
"type": {
"description": "Type name for this intake type",
"enum": [
"dss"
],
"type": "string"
}
},
"required": [
"projectId",
"type"
],
"type": "object"
},
{
"properties": {
"path": {
"description": "Path to data on host filesystem",
"type": "string"
},
"type": {
"description": "Type name for this intake type",
"enum": [
"filesystem"
],
"type": "string"
}
},
"required": [
"path",
"type"
],
"type": "object"
},
{
"description": "Stream CSV data chunks from HTTP",
"properties": {
"type": {
"description": "Type name for this intake type",
"enum": [
"http"
],
"type": "string"
},
"url": {
"description": "URL for the CSV file",
"format": "url",
"type": "string"
}
},
"required": [
"type",
"url"
],
"type": "object"
},
{
"description": "Stream CSV data chunks from JDBC",
"properties": {
"catalog": {
"description": "The name of the specified database catalog to read input data from.",
"type": "string",
"x-versionadded": "v2.22"
},
"credentialId": {
"description": "Either the populated value of the field or [redacted] due to permission settings",
"oneOf": [
{
"description": "The ID of the credential holding information about a user with read access to the JDBC data source.",
"type": [
"string",
"null"
]
},
{
"enum": [
"[redacted]"
],
"type": "string"
}
]
},
"dataStoreId": {
"description": "Either the populated value of the field or [redacted] due to permission settings",
"oneOf": [
{
"description": "ID of the data store to connect to",
"type": "string"
},
{
"enum": [
"[redacted]"
],
"type": "string"
}
]
},
"fetchSize": {
"description": "A user specified fetch size. Changing it can be used to balance throughput and memory usage. Deprecated and ignored since v2.21.",
"maximum": 1000000,
"minimum": 1,
"type": "integer"
},
"query": {
"description": "A self-supplied SELECT statement of the dataset you wish to score. Helpful for supplying a more fine-grained selection of data not achievable through specification of \"table\" and/or \"schema\" parameters exclusively.If this job is executed with a job definition, then template variables are available which will be substituted for timestamps: {{ current_run_timestamp }}, {{ last_completed_run_time }}, {{ last_scheduled_run_time }}, {{ next_scheduled_run_time }}, {{ current_run_time }}",
"type": "string"
},
"schema": {
"description": "The name of the specified database schema to read input data from.",
"type": "string"
},
"table": {
"description": "The name of the specified database table to read input data from.",
"type": "string"
},
"type": {
"description": "Type name for this intake type",
"enum": [
"jdbc"
],
"type": "string"
}
},
"required": [
"dataStoreId",
"type"
],
"type": "object"
},
{
"description": "Stream CSV data chunks from local file storage",
"properties": {
"async": {
"description": "The default behavior (async: true) will still submit the job to the queue and start processing as soon as the upload is started.Setting it to false will postpone submitting the job to the queue until all data has been uploaded.This is helpful if the user is on a bad connection and bottlednecked by the upload speed. Instead of blocking the queue this will allow others to submit to the queue until the upload has finished.",
"type": [
"boolean",
"null"
],
"x-versionadded": "v2.28"
},
"multipart": {
"description": "specify if the data will be uploaded in multiple parts instead of a single file",
"type": "boolean",
"x-versionadded": "v2.27"
},
"type": {
"description": "Type name for this intake type",
"enum": [
"local_file",
"localFile"
],
"type": "string"
}
},
"required": [
"type"
],
"type": "object"
},
{
"description": "Stream CSV data chunks from Datasphere using browser-datasphere",
"properties": {
"catalog": {
"description": "The name of the specified database catalog to read input data from.",
"type": "string"
},
"credentialId": {
"description": "Either the populated value of the field or [redacted] due to permission settings",
"oneOf": [
{
"description": "The ID of the credential holding information about a user with read access to the data source.",
"type": "string"
},
{
"enum": [
"[redacted]"
],
"type": "string"
}
]
},
"dataStoreId": {
"description": "Either the populated value of the field or [redacted] due to permission settings",
"oneOf": [
{
"description": "ID of the data store to connect to",
"type": "string"
},
{
"enum": [
"[redacted]"
],
"type": "string"
}
]
},
"schema": {
"description": "The name of the specified database schema to read input data from.",
"type": "string"
},
"table": {
"description": "The name of the specified database table to read input data from.",
"type": "string"
},
"type": {
"description": "Type name for this intake type",
"enum": [
"datasphere"
],
"type": "string"
}
},
"required": [
"credentialId",
"dataStoreId",
"schema",
"table",
"type"
],
"type": "object",
"x-versionadded": "v2.35"
}
]
},
"maxExplanations": {
"default": 0,
"description": "Number of explanations requested. Will be ordered by strength.",
"maximum": 100,
"minimum": 0,
"type": "integer"
},
"maxNgramExplanations": {
"description": "The maximum number of text ngram explanations to supply per row of the dataset. The default recommended `maxNgramExplanations` is `all` (no limit)",
"oneOf": [
{
"minimum": 0,
"type": "integer"
},
{
"enum": [
"all"
],
"type": "string"
}
],
"x-versionadded": "v2.30"
},
"modelId": {
"description": "ID of leaderboard model which is used in job for processing predictions dataset",
"type": "string",
"x-versionadded": "v2.30"
},
"modelPackageId": {
"description": "ID of model package from registry is used in job for processing predictions dataset",
"type": "string",
"x-versionadded": "v2.30"
},
"monitoringAggregation": {
"description": "Defines the aggregation policy for monitoring jobs.",
"properties": {
"retentionPolicy": {
"default": "percentage",
"description": "Monitoring jobs retention policy for aggregation.",
"enum": [
"samples",
"percentage"
],
"type": "string"
},
"retentionValue": {
"default": 0,
"description": "Amount/percentage of samples to retain.",
"type": "integer"
}
},
"type": "object"
},
"monitoringBatchPrefix": {
"description": "Name of the batch to create with this job",
"type": [
"string",
"null"
]
},
"monitoringColumns": {
"description": "Column names mapping for monitoring",
"properties": {
"actedUponColumn": {
"description": "Name of column that contains value for acted_on.",
"type": "string"
},
"actualsTimestampColumn": {
"description": "Name of column that contains actual timestamps.",
"type": "string"
},
"actualsValueColumn": {
"description": "Name of column that contains actuals value.",
"type": "string"
},
"associationIdColumn": {
"description": "Name of column that contains association Id.",
"type": "string"
},
"customMetricId": {
"description": "Id of custom metric to process values for.",
"type": "string"
},
"customMetricTimestampColumn": {
"description": "Name of column that contains custom metric values timestamps.",
"type": "string"
},
"customMetricTimestampFormat": {
"description": "Format of timestamps from customMetricTimestampColumn.",
"type": "string"
},
"customMetricValueColumn": {
"description": "Name of column that contains values for custom metric.",
"type": "string"
},
"monitoredStatusColumn": {
"description": "Column name used to mark monitored rows.",
"type": "string"
},
"predictionsColumns": {
"description": "Name of the column(s) which contain prediction values.",
"oneOf": [
{
"description": "Map containing column name(s) and class name(s) for multiclass problem.",
"items": {
"properties": {
"className": {
"description": "Class name.",
"type": "string"
},
"columnName": {
"description": "Column name that contains the prediction for a specific class.",
"type": "string"
}
},
"required": [
"className",
"columnName"
],
"type": "object"
},
"maxItems": 100,
"type": "array"
},
{
"description": "Column name that contains the prediction for regressions problem.",
"type": "string"
}
]
},
"reportDrift": {
"description": "True to report drift, False otherwise.",
"type": "boolean"
},
"reportPredictions": {
"description": "True to report prediction, False otherwise.",
"type": "boolean"
},
"uniqueRowIdentifierColumns": {
"description": "Column(s) name of unique row identifiers.",
"items": {
"type": "string"
},
"maxItems": 100,
"type": "array"
}
},
"type": "object"
},
"monitoringOutputSettings": {
"description": "Output settings for monitoring jobs",
"properties": {
"monitoredStatusColumn": {
"description": "Column name used to mark monitored rows.",
"type": "string"
},
"uniqueRowIdentifierColumns": {
"description": "Column(s) name of unique row identifiers.",
"items": {
"type": "string"
},
"maxItems": 100,
"type": "array"
}
},
"required": [
"monitoredStatusColumn",
"uniqueRowIdentifierColumns"
],
"type": "object"
},
"numConcurrent": {
"description": "Number of simultaneous requests to run against the prediction instance",
"minimum": 0,
"type": "integer"
},
"outputSettings": {
"description": "The response option configured for this job",
"oneOf": [
{
"description": "Save CSV data chunks to Azure Blob Storage",
"properties": {
"credentialId": {
"description": "Either the populated value of the field or [redacted] due to permission settings",
"oneOf": [
{
"description": "Use the specified credential to access the url",
"type": [
"string",
"null"
]
},
{
"enum": [
"[redacted]"
],
"type": "string"
}
]
},
"format": {
"default": "csv",
"description": "Type of output file format",
"enum": [
"csv",
"parquet"
],
"type": "string",
"x-versionadded": "v2.25"
},
"partitionColumns": {
"description": "For Parquet directory-scoring only. The column names of the intake data of which to partition the dataset. Columns are partitioned in the order they are given. At least one value is required if scoring to a directory (meaning the output url ends with a slash (\"/\").",
"items": {
"type": "string"
},
"maxItems": 100,
"type": "array",
"x-versionadded": "v2.26"
},
"type": {
"description": "Type name for this output type",
"enum": [
"azure"
],
"type": "string"
},
"url": {
"description": "URL for the file or directory",
"format": "url",
"type": "string"
}
},
"required": [
"type",
"url"
],
"type": "object"
},
{
"description": "Save CSV data chunks to Google Storage",
"properties": {
"credentialId": {
"description": "Either the populated value of the field or [redacted] due to permission settings",
"oneOf": [
{
"description": "Use the specified credential to access the url",
"type": [
"string",
"null"
]
},
{
"enum": [
"[redacted]"
],
"type": "string"
}
]
},
"format": {
"default": "csv",
"description": "Type of input file format",
"enum": [
"csv",
"parquet"
],
"type": "string",
"x-versionadded": "v2.25"
},
"partitionColumns": {
"description": "For Parquet directory-scoring only. The column names of the intake data of which to partition the dataset. Columns are partitioned in the order they are given. At least one value is required if scoring to a directory (meaning the output url ends with a slash (\"/\").",
"items": {
"type": "string"
},
"maxItems": 100,
"type": "array",
"x-versionadded": "v2.26"
},
"type": {
"description": "Type name for this output type",
"enum": [
"gcp"
],
"type": "string"
},
"url": {
"description": "URL for the CSV file",
"format": "url",
"type": "string"
}
},
"required": [
"type",
"url"
],
"type": "object"
},
{
"description": "Save CSV data chunks to Google BigQuery in bulk",
"properties": {
"bucket": {
"description": "The name of gcs bucket for data loading",
"type": "string"
},
"credentialId": {
"description": "Either the populated value of the field or [redacted] due to permission settings",
"oneOf": [
{
"description": "The ID of the GCP credentials",
"type": "string"
},
{
"enum": [
"[redacted]"
],
"type": "string"
}
]
},
"dataset": {
"description": "The name of the specified big query dataset to write data back",
"type": "string"
},
"table": {
"description": "The name of the specified big query table to write data back",
"type": "string"
},
"type": {
"description": "Type name for this output type",
"enum": [
"bigquery"
],
"type": "string"
}
},
"required": [
"bucket",
"credentialId",
"dataset",
"table",
"type"
],
"type": "object"
},
{
"description": "Saves CSV data chunks to Amazon Cloud Storage S3",
"properties": {
"credentialId": {
"description": "Either the populated value of the field or [redacted] due to permission settings",
"oneOf": [
{
"description": "Use the specified credential to access the url",
"type": [
"string",
"null"
]
},
{
"enum": [
"[redacted]"
],
"type": "string"
}
]
},
"endpointUrl": {
"description": "Endpoint URL for the S3 connection (omit to use the default)",
"format": "url",
"type": "string",
"x-versionadded": "v2.29"
},
"format": {
"default": "csv",
"description": "Type of output file format",
"enum": [
"csv",
"parquet"
],
"type": "string",
"x-versionadded": "v2.25"
},
"partitionColumns": {
"description": "For Parquet directory-scoring only. The column names of the intake data of which to partition the dataset. Columns are partitioned in the order they are given. At least one value is required if scoring to a directory (meaning the output url ends with a slash (\"/\").",
"items": {
"type": "string"
},
"maxItems": 100,
"type": "array",
"x-versionadded": "v2.26"
},
"serverSideEncryption": {
"description": "Configure Server-Side Encryption for S3 output",
"properties": {
"algorithm": {
"description": "The server-side encryption algorithm used when storing this object in Amazon S3 (for example, AES256, aws:kms).",
"type": "string"
},
"customerAlgorithm": {
"description": "Specifies the algorithm to use to when encrypting the object (for example, AES256).",
"type": "string"
},
"customerKey": {
"description": "Specifies the customer-provided encryption key for Amazon S3 to use in encrypting data. This value is used to store the object and then it is discarded; Amazon S3 does not store the encryption key. The key must be appropriate for use with the algorithm specified in customerAlgorithm. The key must be sent as an base64 encoded string.",
"type": "string"
},
"kmsEncryptionContext": {
"description": "Specifies the Amazon Web Services KMS Encryption Context to use for object encryption. The value of this header is a base64-encoded UTF-8 string holding JSON with the encryption context key-value pairs.",
"type": "string"
},
"kmsKeyId": {
"description": "Specifies the ID of the symmetric customer managed key to use for object encryption.",
"type": "string"
}
},
"type": "object"
},
"type": {
"description": "Type name for this output type",
"enum": [
"s3"
],
"type": "string"
},
"url": {
"description": "URL for the CSV file",
"format": "url",
"type": "string"
}
},
"required": [
"type",
"url"
],
"type": "object"
},
{
"description": "Save CSV data chunks to Snowflake in bulk",
"properties": {
"catalog": {
"description": "The name of the specified database catalog to write output data to.",
"type": "string",
"x-versionadded": "v2.28"
},
"cloudStorageCredentialId": {
"description": "Either the populated value of the field or [redacted] due to permission settings",
"oneOf": [
{
"description": "The ID of the credential holding information about a user with write access to the cloud storage.",
"type": [
"string",
"null"
]
},
{
"enum": [
"[redacted]"
],
"type": "string"
}
]
},
"cloudStorageType": {
"default": "s3",
"description": "Type name for cloud storage",
"enum": [
"azure",
"gcp",
"s3"
],
"type": "string"
},
"createTableIfNotExists": {
"default": false,
"description": "Attempt to create the table first if no existing one is detected, before writing data with the strategy defined in the `statementType` parameter.",
"type": "boolean",
"x-versionadded": "v2.25"
},
"credentialId": {
"description": "Either the populated value of the field or [redacted] due to permission settings",
"oneOf": [
{
"description": "The ID of the credential holding information about a user with write access to the Snowflake data source.",
"type": [
"string",
"null"
]
},
{
"enum": [
"[redacted]"
],
"type": "string"
}
]
},
"dataStoreId": {
"description": "Either the populated value of the field or [redacted] due to permission settings",
"oneOf": [
{
"description": "ID of the data store to connect to",
"type": "string"
},
{
"enum": [
"[redacted]"
],
"type": "string"
}
]
},
"externalStage": {
"description": "External storage",
"type": "string"
},
"schema": {
"description": "The name of the specified database schema to write results to.",
"type": "string"
},
"statementType": {
"description": "The statement type to use when writing the results.",
"enum": [
"insert",
"create_table",
"createTable"
],
"type": "string"
},
"table": {
"description": "The name of the specified database table to write results to.",
"type": "string"
},
"type": {
"description": "Type name for this output type",
"enum": [
"snowflake"
],
"type": "string"
}
},
"required": [
"dataStoreId",
"externalStage",
"statementType",
"table",
"type"
],
"type": "object"
},
{
"description": "Save CSV data chunks to Azure Synapse in bulk",
"properties": {
"cloudStorageCredentialId": {
"description": "Either the populated value of the field or [redacted] due to permission settings",
"oneOf": [
{
"description": "The ID of the credential holding information about a user with write access to the cloud storage.",
"type": [
"string",
"null"
]
},
{
"enum": [
"[redacted]"
],
"type": "string"
}
]
},
"createTableIfNotExists": {
"default": false,
"description": "Attempt to create the table first if no existing one is detected, before writing data with the strategy defined in the `statementType` parameter.",
"type": "boolean",
"x-versionadded": "v2.25"
},
"credentialId": {
"description": "Either the populated value of the field or [redacted] due to permission settings",
"oneOf": [
{
"description": "The ID of the credential holding information about a user with write access to the JDBC data source.",
"type": [
"string",
"null"
]
},
{
"enum": [
"[redacted]"
],
"type": "string"
}
]
},
"dataStoreId": {
"description": "Either the populated value of the field or [redacted] due to permission settings",
"oneOf": [
{
"description": "ID of the data store to connect to",
"type": "string"
},
{
"enum": [
"[redacted]"
],
"type": "string"
}
]
},
"externalDataSource": {
"description": "External data source name",
"type": "string"
},
"schema": {
"description": "The name of the specified database schema to write results to.",
"type": "string"
},
"statementType": {
"description": "The statement type to use when writing the results.",
"enum": [
"insert",
"create_table",
"createTable"
],
"type": "string"
},
"table": {
"description": "The name of the specified database table to write results to.",
"type": "string"
},
"type": {
"description": "Type name for this output type",
"enum": [
"synapse"
],
"type": "string"
}
},
"required": [
"dataStoreId",
"externalDataSource",
"statementType",
"table",
"type"
],
"type": "object"
},
{
"properties": {
"path": {
"description": "Path to results on host filesystem",
"type": "string"
},
"type": {
"description": "Type name for this output type",
"enum": [
"filesystem"
],
"type": "string"
}
},
"required": [
"path",
"type"
],
"type": "object"
},
{
"description": "Save CSV data chunks to HTTP data endpoint",
"properties": {
"headers": {
"description": "Extra headers to send with the request",
"type": "object"
},
"method": {
"description": "Method to use when saving the CSV file",
"enum": [
"POST",
"PUT"
],
"type": "string"
},
"type": {
"description": "Type name for this output type",
"enum": [
"http"
],
"type": "string"
},
"url": {
"description": "URL for the CSV file",
"format": "url",
"type": "string"
}
},
"required": [
"method",
"type",
"url"
],
"type": "object"
},
{
"description": "Save CSV data chunks via JDBC",
"properties": {
"catalog": {
"description": "The name of the specified database catalog to write output data to.",
"type": "string",
"x-versionadded": "v2.22"
},
"commitInterval": {
"default": 600,
"description": "Defines a time interval in seconds between each commit is done to the JDBC source. If set to 0, the batch prediction operation will write the entire job before committing.",
"maximum": 86400,
"minimum": 0,
"type": "integer",
"x-versionadded": "v2.21"
},
"createTableIfNotExists": {
"default": false,
"description": "Attempt to create the table first if no existing one is detected, before writing data with the strategy defined in the `statementType` parameter.",
"type": "boolean",
"x-versionadded": "v2.24"
},
"credentialId": {
"description": "Either the populated value of the field or [redacted] due to permission settings",
"oneOf": [
{
"description": "The ID of the credential holding information about a user with write access to the JDBC data source.",
"type": [
"string",
"null"
]
},
{
"enum": [
"[redacted]"
],
"type": "string"
}
]
},
"dataStoreId": {
"description": "Either the populated value of the field or [redacted] due to permission settings",
"oneOf": [
{
"description": "ID of the data store to connect to",
"type": "string"
},
{
"enum": [
"[redacted]"
],
"type": "string"
}
]
},
"schema": {
"description": "The name of the specified database schema to write the results to.",
"type": "string"
},
"statementType": {
"description": "The statement type to use when writing the results. Deprecation Warning: Use of `create_table` is now discouraged. Use one of the other possibilities along with the parameter `createTableIfNotExists` set to `true`.",
"enum": [
"createTable",
"create_table",
"insert",
"insertUpdate",
"insert_update",
"update"
],
"type": "string"
},
"table": {
"description": "The name of the specified database table to write the results to.If this job is executed with a job definition, then template variables are available which will be substituted for timestamps: {{ current_run_timestamp }}, {{ last_completed_run_time }}, {{ last_scheduled_run_time }}, {{ next_scheduled_run_time }}, {{ current_run_time }}",
"type": "string"
},
"type": {
"description": "Type name for this intake type",
"enum": [
"jdbc"
],
"type": "string"
},
"updateColumns": {
"description": "The column names to be updated if statementType is set to either update or upsert.",
"items": {
"type": "string"
},
"maxItems": 100,
"type": "array"
},
"whereColumns": {
"description": "The column names to be used in the where clause if statementType is set to update or upsert.",
"items": {
"type": "string"
},
"maxItems": 100,
"type": "array"
}
},
"required": [
"dataStoreId",
"statementType",
"table",
"type"
],
"type": "object"
},
{
"description": "Save CSV data chunks to local file storage",
"properties": {
"type": {
"description": "Type name for this output type",
"enum": [
"local_file",
"localFile"
],
"type": "string"
}
},
"required": [
"type"
],
"type": "object"
},
{
"description": "Saves CSV data chunks to Datasphere using browser-datasphere",
"properties": {
"catalog": {
"description": "The name of the specified database catalog to read input data from.",
"type": "string"
},
"credentialId": {
"description": "Either the populated value of the field or [redacted] due to permission settings",
"oneOf": [
{
"description": "The ID of the credential holding information about a user with write access to the data destination.",
"type": "string"
},
{
"enum": [
"[redacted]"
],
"type": "string"
}
]
},
"dataStoreId": {
"description": "Either the populated value of the field or [redacted] due to permission settings",
"oneOf": [
{
"description": "The ID of the data store to connect to",
"type": "string"
},
{
"enum": [
"[redacted]"
],
"type": "string"
}
]
},
"schema": {
"description": "The name of the specified database schema to read input data from.",
"type": "string"
},
"table": {
"description": "The name of the specified database table to read input data from.",
"type": "string"
},
"type": {
"description": "The type name for this output type",
"enum": [
"datasphere"
],
"type": "string"
}
},
"required": [
"credentialId",
"dataStoreId",
"schema",
"table",
"type"
],
"type": "object",
"x-versionadded": "v2.35"
}
]
},
"passthroughColumns": {
"description": "Pass through columns from the original dataset",
"items": {
"description": "A column name from the original dataset to pass through to the resulting predictions",
"maxLength": 50,
"minLength": 1,
"type": "string"
},
"maxItems": 100,
"type": "array"
},
"passthroughColumnsSet": {
"description": "Pass through all columns from the original dataset",
"enum": [
"all"
],
"type": "string"
},
"pinnedModelId": {
"description": "Specify a model ID used for scoring",
"type": "string"
},
"predictionInstance": {
"description": "Override the default prediction instance from the deployment when scoring this job.",
"properties": {
"apiKey": {
"description": "By default, prediction requests will use the API key of the user that created the job. This allows you to make requests on behalf of other users.",
"type": "string"
},
"datarobotKey": {
"description": "If running a job against a prediction instance in the Managed AI Cloud, you must provide the organization level DataRobot-Key.",
"type": "string"
},
"hostName": {
"description": "Override the default host name of the deployment with this.",
"type": "string"
},
"sslEnabled": {
"default": true,
"description": "Use SSL (HTTPS) when communicating with the overriden prediction server.",
"type": "boolean"
}
},
"required": [
"hostName",
"sslEnabled"
],
"type": "object"
},
"predictionWarningEnabled": {
"description": "Enable prediction warnings.",
"type": [
"boolean",
"null"
]
},
"redactedFields": {
"description": "A list of qualified field names from intake- and/or outputSettings that was redacted due to permissions and sharing settings. For example: intakeSettings.dataStoreId",
"items": {
"description": "Field names that are potentially redacted",
"type": "string"
},
"type": "array",
"x-versionadded": "v2.30"
},
"skipDriftTracking": {
"default": false,
"description": "Skip drift tracking for this job.",
"type": "boolean"
},
"thresholdHigh": {
"description": "Compute explanations for predictions above this threshold",
"type": "number"
},
"thresholdLow": {
"description": "Compute explanations for predictions below this threshold",
"type": "number"
},
"timeseriesSettings": {
"description": "Time Series settings included of this job is a Time Series job.",
"oneOf": [
{
"properties": {
"forecastPoint": {
"description": "Used for forecast predictions in order to override the inferred forecast point from the dataset.",
"format": "date-time",
"type": "string"
},
"relaxKnownInAdvanceFeaturesCheck": {
"default": false,
"description": "If activated, missing values in the known in advance features are allowed in the forecast window at prediction time. If omitted or false, missing values are not allowed.",
"type": "boolean"
},
"type": {
"description": "Forecast mode makes predictions using forecastPoint or rows in the dataset without target.",
"enum": [
"forecast"
],
"type": "string"
}
},
"required": [
"type"
],
"type": "object"
},
{
"properties": {
"forecastPointPolicy": {
"description": "Forecast point policy",
"properties": {
"configuration": {
"description": "Customize if forecast point based on job run time needs to be shifted.",
"properties": {
"offset": {
"description": "Offset to apply to scheduled run time of the job in a ISO-8601 format toobtain a relative forecast point. Example of the positive offset 'P2DT5H3M', example of the negative offset '-P2DT5H4M'",
"format": "offset",
"type": "string"
}
},
"required": [
"offset"
],
"type": "object"
},
"type": {
"description": "Type of the forecast point policy. Forecast point will be based on the scheduled run time of the job or the current moment in UTC if job was launched manually. Run time can be adjusted backwards or forwards.",
"enum": [
"jobRunTimeBased"
],
"type": "string"
}
},
"required": [
"type"
],
"type": "object"
},
"relaxKnownInAdvanceFeaturesCheck": {
"default": false,
"description": "If activated, missing values in the known in advance features are allowed in the forecast window at prediction time. If omitted or false, missing values are not allowed.",
"type": "boolean"
},
"type": {
"description": "Forecast mode makes predictions using forecastPoint or rows in the dataset without target.",
"enum": [
"forecast"
],
"type": "string"
}
},
"required": [
"forecastPointPolicy",
"type"
],
"type": "object"
},
{
"properties": {
"predictionsEndDate": {
"description": "Used for historical predictions in order to override date to which predictions should be calculated. By default value will be inferred automatically from the dataset.",
"format": "date-time",
"type": "string"
},
"predictionsStartDate": {
"description": "Used for historical predictions in order to override date from which predictions should be calculated. By default value will be inferred automatically from the dataset.",
"format": "date-time",
"type": "string"
},
"relaxKnownInAdvanceFeaturesCheck": {
"default": false,
"description": "If activated, missing values in the known in advance features are allowed in the forecast window at prediction time. If omitted or false, missing values are not allowed.",
"type": "boolean"
},
"type": {
"description": "Historical mode enables bulk predictions which calculates predictions for all possible forecast points and forecast distances in the dataset within the predictionsStartDate/predictionsEndDate range.",
"enum": [
"historical"
],
"type": "string"
}
},
"required": [
"type"
],
"type": "object"
}
],
"x-versionadded": "v2.20"
}
},
"required": [
"abortOnError",
"csvSettings",
"disableRowLevelErrorHandling",
"includePredictionStatus",
"includeProbabilities",
"includeProbabilitiesClasses",
"intakeSettings",
"maxExplanations",
"numConcurrent",
"redactedFields",
"skipDriftTracking"
],
"type": "object"
},
"created": {
"description": "When was this job created",
"format": "date-time",
"type": "string"
},
"createdBy": {
"description": "Who created this job",
"properties": {
"fullName": {
"description": "The full name of the user who created this job (if defined by the user)",
"type": [
"string",
"null"
]
},
"userId": {
"description": "The User ID of the user who created this job",
"type": "string"
},
"username": {
"description": "The username (e-mail address) of the user who created this job",
"type": "string"
}
},
"required": [
"fullName",
"userId",
"username"
],
"type": "object"
},
"enabled": {
"default": false,
"description": "If this job definition is enabled as a scheduled job.",
"type": "boolean"
},
"id": {
"description": "The ID of the Batch job definition",
"type": "string"
},
"lastFailedRunTime": {
"description": "Last time this job had a failed run",
"format": "date-time",
"type": [
"string",
"null"
]
},
"lastScheduledRunTime": {
"description": "Last time this job was scheduled to run (though not guaranteed it actually ran at that time)",
"format": "date-time",
"type": [
"string",
"null"
]
},
"lastStartedJobStatus": {
"description": "The status of the latest job launched to the queue (if any).",
"enum": [
"INITIALIZING",
"RUNNING",
"COMPLETED",
"ABORTED",
"FAILED"
],
"type": [
"string",
"null"
]
},
"lastStartedJobTime": {
"description": "The last time (if any) a job was launched.",
"format": "date-time",
"type": [
"string",
"null"
]
},
"lastSuccessfulRunTime": {
"description": "Last time this job had a successful run",
"format": "date-time",
"type": [
"string",
"null"
]
},
"name": {
"description": "A human-readable name for the definition, must be unique across organisations",
"type": "string"
},
"nextScheduledRunTime": {
"description": "Next time this job is scheduled to run",
"format": "date-time",
"type": [
"string",
"null"
]
},
"schedule": {
"description": "The scheduling information defining how often and when to execute this job to the Job Scheduling service. Optional if enabled = False.",
"properties": {
"dayOfMonth": {
"description": "The date(s) of the month that the job will run. Allowed values are either ``[1 ... 31]`` or ``[\"*\"]`` for all days of the month. This field is additive with ``dayOfWeek``, meaning the job will run both on the date(s) defined in this field and the day specified by ``dayOfWeek`` (for example, dates 1st, 2nd, 3rd, plus every Tuesday). If ``dayOfMonth`` is set to ``[\"*\"]`` and ``dayOfWeek`` is defined, the scheduler will trigger on every day of the month that matches ``dayOfWeek`` (for example, Tuesday the 2nd, 9th, 16th, 23rd, 30th). Invalid dates such as February 31st are ignored.",
"items": {
"enum": [
"*",
1,
2,
3,
4,
5,
6,
7,
8,
9,
10,
11,
12,
13,
14,
15,
16,
17,
18,
19,
20,
21,
22,
23,
24,
25,
26,
27,
28,
29,
30,
31
],
"type": [
"number",
"string"
]
},
"maxItems": 31,
"type": "array"
},
"dayOfWeek": {
"description": "The day(s) of the week that the job will run. Allowed values are ``[0 .. 6]``, where (Sunday=0), or ``[\"*\"]``, for all days of the week. Strings, either 3-letter abbreviations or the full name of the day, can be used interchangeably (e.g., \"sunday\", \"Sunday\", \"sun\", or \"Sun\", all map to ``[0]``. This field is additive with ``dayOfMonth``, meaning the job will run both on the date specified by ``dayOfMonth`` and the day defined in this field.",
"items": {
"enum": [
"*",
0,
1,
2,
3,
4,
5,
6,
"sunday",
"SUNDAY",
"Sunday",
"monday",
"MONDAY",
"Monday",
"tuesday",
"TUESDAY",
"Tuesday",
"wednesday",
"WEDNESDAY",
"Wednesday",
"thursday",
"THURSDAY",
"Thursday",
"friday",
"FRIDAY",
"Friday",
"saturday",
"SATURDAY",
"Saturday",
"sun",
"SUN",
"Sun",
"mon",
"MON",
"Mon",
"tue",
"TUE",
"Tue",
"wed",
"WED",
"Wed",
"thu",
"THU",
"Thu",
"fri",
"FRI",
"Fri",
"sat",
"SAT",
"Sat"
],
"type": [
"number",
"string"
]
},
"maxItems": 7,
"type": "array"
},
"hour": {
"description": "The hour(s) of the day that the job will run. Allowed values are either ``[\"*\"]`` meaning every hour of the day or ``[0 ... 23]``.",
"items": {
"enum": [
"*",
0,
1,
2,
3,
4,
5,
6,
7,
8,
9,
10,
11,
12,
13,
14,
15,
16,
17,
18,
19,
20,
21,
22,
23
],
"type": [
"number",
"string"
]
},
"maxItems": 24,
"type": "array"
},
"minute": {
"description": "The minute(s) of the day that the job will run. Allowed values are either ``[\"*\"]`` meaning every minute of the day or``[0 ... 59]``.",
"items": {
"enum": [
"*",
0,
1,
2,
3,
4,
5,
6,
7,
8,
9,
10,
11,
12,
13,
14,
15,
16,
17,
18,
19,
20,
21,
22,
23,
24,
25,
26,
27,
28,
29,
30,
31,
32,
33,
34,
35,
36,
37,
38,
39,
40,
41,
42,
43,
44,
45,
46,
47,
48,
49,
50,
51,
52,
53,
54,
55,
56,
57,
58,
59
],
"type": [
"number",
"string"
]
},
"maxItems": 60,
"type": "array"
},
"month": {
"description": "The month(s) of the year that the job will run. Allowed values are either ``[1 ... 12]`` or ``[\"*\"]`` for all months of the year. Strings, either 3-letter abbreviations or the full name of the month, can be used interchangeably (e.g., \"jan\" or \"october\"). Months that are not compatible with ``dayOfMonth`` are ignored, for example ``{\"dayOfMonth\": [31], \"month\":[\"feb\"]}``.",
"items": {
"enum": [
"*",
1,
2,
3,
4,
5,
6,
7,
8,
9,
10,
11,
12,
"january",
"JANUARY",
"January",
"february",
"FEBRUARY",
"February",
"march",
"MARCH",
"March",
"april",
"APRIL",
"April",
"may",
"MAY",
"May",
"june",
"JUNE",
"June",
"july",
"JULY",
"July",
"august",
"AUGUST",
"August",
"september",
"SEPTEMBER",
"September",
"october",
"OCTOBER",
"October",
"november",
"NOVEMBER",
"November",
"december",
"DECEMBER",
"December",
"jan",
"JAN",
"Jan",
"feb",
"FEB",
"Feb",
"mar",
"MAR",
"Mar",
"apr",
"APR",
"Apr",
"jun",
"JUN",
"Jun",
"jul",
"JUL",
"Jul",
"aug",
"AUG",
"Aug",
"sep",
"SEP",
"Sep",
"oct",
"OCT",
"Oct",
"nov",
"NOV",
"Nov",
"dec",
"DEC",
"Dec"
],
"type": [
"number",
"string"
]
},
"maxItems": 12,
"type": "array"
}
},
"required": [
"dayOfMonth",
"dayOfWeek",
"hour",
"minute",
"month"
],
"type": "object"
},
"updated": {
"description": "When was this job last updated",
"format": "date-time",
"type": "string"
},
"updatedBy": {
"description": "Who created this job",
"properties": {
"fullName": {
"description": "The full name of the user who created this job (if defined by the user)",
"type": [
"string",
"null"
]
},
"userId": {
"description": "The User ID of the user who created this job",
"type": "string"
},
"username": {
"description": "The username (e-mail address) of the user who created this job",
"type": "string"
}
},
"required": [
"fullName",
"userId",
"username"
],
"type": "object"
}
},
"required": [
"batchPredictionJob",
"created",
"createdBy",
"enabled",
"id",
"lastStartedJobStatus",
"lastStartedJobTime",
"name",
"updated",
"updatedBy"
],
"type": "object"
},
"type": "array"
},
"next": {
"description": "URL pointing to the next page (if null, there is no next page).",
"format": "uri",
"type": [
"string",
"null"
]
},
"previous": {
"description": "URL pointing to the previous page (if null, there is no previous page).",
"format": "uri",
"type": [
"string",
"null"
]
},
"totalCount": {
"description": "The total number of items across all pages.",
"type": "integer"
}
},
"required": [
"data",
"next",
"previous",
"totalCount"
],
"type": "object"
}
Properties
Name |
Type |
Required |
Restrictions |
Description |
count |
integer |
false |
|
Number of items returned on this page. |
data |
[BatchPredictionJobDefinitionsResponse] |
true |
|
An array of scheduled jobs |
next |
string,null(uri) |
true |
|
URL pointing to the next page (if null, there is no next page). |
previous |
string,null(uri) |
true |
|
URL pointing to the previous page (if null, there is no previous page). |
totalCount |
integer |
true |
|
The total number of items across all pages. |
BatchPredictionJobDefinitionsResponse
{
"properties": {
"batchPredictionJob": {
"description": "The Batch Prediction Job specification to be put on the queue in intervals",
"properties": {
"abortOnError": {
"default": true,
"description": "Should this job abort if too many errors are encountered",
"type": "boolean"
},
"batchJobType": {
"description": "Batch job type.",
"enum": [
"monitoring",
"prediction"
],
"type": "string",
"x-versionadded": "v2.30"
},
"chunkSize": {
"default": "auto",
"description": "Which strategy should be used to determine the chunk size. Can be either a named strategy or a fixed size in bytes.",
"oneOf": [
{
"enum": [
"auto",
"fixed",
"dynamic"
],
"type": "string"
},
{
"maximum": 41943040,
"minimum": 20,
"type": "integer"
}
]
},
"columnNamesRemapping": {
"description": "Remap (rename or remove columns from) the output from this job",
"oneOf": [
{
"description": "Provide a dictionary with key/value pairs to remap (deprecated)",
"type": "object"
},
{
"description": "Provide a list of items to remap",
"items": {
"properties": {
"inputName": {
"description": "Rename column with this name",
"type": "string"
},
"outputName": {
"description": "Rename column to this name (leave as null to remove from the output)",
"type": [
"string",
"null"
]
}
},
"required": [
"inputName",
"outputName"
],
"type": "object"
},
"maxItems": 1000,
"type": "array"
}
]
},
"csvSettings": {
"description": "The CSV settings used for this job",
"properties": {
"delimiter": {
"default": ",",
"description": "CSV fields are delimited by this character. Use the string \"tab\" to denote TSV (TAB separated values).",
"oneOf": [
{
"enum": [
"tab"
],
"type": "string"
},
{
"maxLength": 1,
"minLength": 1,
"type": "string"
}
]
},
"encoding": {
"default": "utf-8",
"description": "The encoding to be used for intake and output. For example (but not limited to): \"shift_jis\", \"latin_1\" or \"mskanji\".",
"type": "string"
},
"quotechar": {
"default": "\"",
"description": "Fields containing the delimiter or newlines must be quoted using this character.",
"maxLength": 1,
"minLength": 1,
"type": "string"
}
},
"required": [
"delimiter",
"encoding",
"quotechar"
],
"type": "object"
},
"deploymentId": {
"description": "ID of deployment which is used in job for processing predictions dataset",
"type": "string"
},
"disableRowLevelErrorHandling": {
"default": false,
"description": "Skip row by row error handling",
"type": "boolean"
},
"explanationAlgorithm": {
"description": "Which algorithm will be used to calculate prediction explanations",
"enum": [
"shap",
"xemp"
],
"type": "string"
},
"explanationClassNames": {
"description": "List of class names that will be explained for each row for multiclass. Mutually exclusive with explanationNumTopClasses. If neither specified - we assume explanationNumTopClasses=1",
"items": {
"description": "Class name to explain",
"type": "string"
},
"maxItems": 10,
"minItems": 1,
"type": "array",
"x-versionadded": "v2.30"
},
"explanationNumTopClasses": {
"description": "Number of top predicted classes for each row that will be explained for multiclass. Mutually exclusive with explanationClassNames. If neither specified - we assume explanationNumTopClasses=1",
"maximum": 10,
"minimum": 1,
"type": "integer",
"x-versionadded": "v2.30"
},
"includePredictionStatus": {
"default": false,
"description": "Include prediction status column in the output",
"type": "boolean"
},
"includeProbabilities": {
"default": true,
"description": "Include probabilities for all classes",
"type": "boolean"
},
"includeProbabilitiesClasses": {
"default": [],
"description": "Include only probabilities for these specific class names.",
"items": {
"description": "Include probability for this class name",
"type": "string"
},
"maxItems": 100,
"type": "array"
},
"intakeSettings": {
"default": {
"type": "localFile"
},
"description": "The response option configured for this job",
"oneOf": [
{
"description": "Stream CSV data chunks from Azure",
"properties": {
"credentialId": {
"description": "Either the populated value of the field or [redacted] due to permission settings",
"oneOf": [
{
"description": "Use the specified credential to access the url",
"type": [
"string",
"null"
]
},
{
"enum": [
"[redacted]"
],
"type": "string"
}
]
},
"format": {
"default": "csv",
"description": "Type of input file format",
"enum": [
"csv",
"parquet"
],
"type": "string",
"x-versionadded": "v2.25"
},
"type": {
"description": "Type name for this intake type",
"enum": [
"azure"
],
"type": "string"
},
"url": {
"description": "URL for the CSV file",
"format": "url",
"type": "string"
}
},
"required": [
"type",
"url"
],
"type": "object"
},
{
"description": "Stream CSV data chunks from data stage storage",
"properties": {
"dataStageId": {
"description": "The ID of the data stage",
"type": "string"
},
"type": {
"description": "Type name for this intake type",
"enum": [
"dataStage"
],
"type": "string"
}
},
"required": [
"dataStageId",
"type"
],
"type": "object"
},
{
"description": "Stream CSV data chunks from AI catalog dataset",
"properties": {
"datasetId": {
"description": "Either the populated value of the field or [redacted] due to permission settings",
"oneOf": [
{
"description": "The ID of the AI catalog dataset",
"type": "string"
},
{
"enum": [
"[redacted]"
],
"type": "string"
}
]
},
"datasetVersionId": {
"description": "The ID of the AI catalog dataset version",
"type": "string"
},
"type": {
"description": "Type name for this intake type",
"enum": [
"dataset"
],
"type": "string"
}
},
"required": [
"datasetId",
"type"
],
"type": "object"
},
{
"description": "Stream CSV data chunks from Google Storage",
"properties": {
"credentialId": {
"description": "Either the populated value of the field or [redacted] due to permission settings",
"oneOf": [
{
"description": "Use the specified credential to access the url",
"type": [
"string",
"null"
]
},
{
"enum": [
"[redacted]"
],
"type": "string"
}
]
},
"format": {
"default": "csv",
"description": "Type of input file format",
"enum": [
"csv",
"parquet"
],
"type": "string",
"x-versionadded": "v2.25"
},
"type": {
"description": "Type name for this intake type",
"enum": [
"gcp"
],
"type": "string"
},
"url": {
"description": "URL for the CSV file",
"format": "url",
"type": "string"
}
},
"required": [
"type",
"url"
],
"type": "object"
},
{
"description": "Stream CSV data chunks from Big Query using GCS",
"properties": {
"bucket": {
"description": "The name of gcs bucket for data export",
"type": "string"
},
"credentialId": {
"description": "Either the populated value of the field or [redacted] due to permission settings",
"oneOf": [
{
"description": "The ID of the GCP credentials",
"type": "string"
},
{
"enum": [
"[redacted]"
],
"type": "string"
}
]
},
"dataset": {
"description": "The name of the specified big query dataset to read input data from",
"type": "string"
},
"table": {
"description": "The name of the specified big query table to read input data from",
"type": "string"
},
"type": {
"description": "Type name for this intake type",
"enum": [
"bigquery"
],
"type": "string"
}
},
"required": [
"bucket",
"credentialId",
"dataset",
"table",
"type"
],
"type": "object"
},
{
"description": "Stream CSV data chunks from Amazon Cloud Storage S3",
"properties": {
"credentialId": {
"description": "Either the populated value of the field or [redacted] due to permission settings",
"oneOf": [
{
"description": "Use the specified credential to access the url",
"type": [
"string",
"null"
]
},
{
"enum": [
"[redacted]"
],
"type": "string"
}
]
},
"endpointUrl": {
"description": "Endpoint URL for the S3 connection (omit to use the default)",
"format": "url",
"type": "string",
"x-versionadded": "v2.29"
},
"format": {
"default": "csv",
"description": "Type of input file format",
"enum": [
"csv",
"parquet"
],
"type": "string",
"x-versionadded": "v2.25"
},
"type": {
"description": "Type name for this intake type",
"enum": [
"s3"
],
"type": "string"
},
"url": {
"description": "URL for the CSV file",
"format": "url",
"type": "string"
}
},
"required": [
"type",
"url"
],
"type": "object"
},
{
"description": "Stream CSV data chunks from Snowflake",
"properties": {
"catalog": {
"description": "The name of the specified database catalog to read input data from.",
"type": "string",
"x-versionadded": "v2.28"
},
"cloudStorageCredentialId": {
"description": "Either the populated value of the field or [redacted] due to permission settings",
"oneOf": [
{
"description": "The ID of the credential holding information about a user with read access to the cloud storage.",
"type": [
"string",
"null"
]
},
{
"enum": [
"[redacted]"
],
"type": "string"
}
]
},
"cloudStorageType": {
"default": "s3",
"description": "Type name for cloud storage",
"enum": [
"azure",
"gcp",
"s3"
],
"type": "string"
},
"credentialId": {
"description": "Either the populated value of the field or [redacted] due to permission settings",
"oneOf": [
{
"description": "The ID of the credential holding information about a user with read access to the Snowflake data source.",
"type": [
"string",
"null"
]
},
{
"enum": [
"[redacted]"
],
"type": "string"
}
]
},
"dataStoreId": {
"description": "Either the populated value of the field or [redacted] due to permission settings",
"oneOf": [
{
"description": "ID of the data store to connect to",
"type": "string"
},
{
"enum": [
"[redacted]"
],
"type": "string"
}
]
},
"externalStage": {
"description": "External storage",
"type": "string"
},
"query": {
"description": "A self-supplied SELECT statement of the dataset you wish to score. Helpful for supplying a more fine-grained selection of data not achievable through specification of \"table\" and/or \"schema\" parameters exclusively.If this job is executed with a job definition, then template variables are available which will be substituted for timestamps: {{ current_run_timestamp }}, {{ last_completed_run_time }}, {{ last_scheduled_run_time }}, {{ next_scheduled_run_time }}, {{ current_run_time }}",
"type": "string"
},
"schema": {
"description": "The name of the specified database schema to read input data from.",
"type": "string"
},
"table": {
"description": "The name of the specified database table to read input data from.",
"type": "string"
},
"type": {
"description": "Type name for this intake type",
"enum": [
"snowflake"
],
"type": "string"
}
},
"required": [
"dataStoreId",
"externalStage",
"type"
],
"type": "object"
},
{
"description": "Stream CSV data chunks from Azure Synapse",
"properties": {
"cloudStorageCredentialId": {
"description": "Either the populated value of the field or [redacted] due to permission settings",
"oneOf": [
{
"description": "The ID of the Azure credential holding information about a user with read access to the cloud storage.",
"type": [
"string",
"null"
]
},
{
"enum": [
"[redacted]"
],
"type": "string"
}
]
},
"credentialId": {
"description": "Either the populated value of the field or [redacted] due to permission settings",
"oneOf": [
{
"description": "The ID of the credential holding information about a user with read access to the JDBC data source.",
"type": [
"string",
"null"
]
},
{
"enum": [
"[redacted]"
],
"type": "string"
}
]
},
"dataStoreId": {
"description": "Either the populated value of the field or [redacted] due to permission settings",
"oneOf": [
{
"description": "ID of the data store to connect to",
"type": "string"
},
{
"enum": [
"[redacted]"
],
"type": "string"
}
]
},
"externalDataSource": {
"description": "External datasource name",
"type": "string"
},
"query": {
"description": "A self-supplied SELECT statement of the dataset you wish to score. Helpful for supplying a more fine-grained selection of data not achievable through specification of \"table\" and/or \"schema\" parameters exclusively.If this job is executed with a job definition, then template variables are available which will be substituted for timestamps: {{ current_run_timestamp }}, {{ last_completed_run_time }}, {{ last_scheduled_run_time }}, {{ next_scheduled_run_time }}, {{ current_run_time }}",
"type": "string"
},
"schema": {
"description": "The name of the specified database schema to read input data from.",
"type": "string"
},
"table": {
"description": "The name of the specified database table to read input data from.",
"type": "string"
},
"type": {
"description": "Type name for this intake type",
"enum": [
"synapse"
],
"type": "string"
}
},
"required": [
"dataStoreId",
"externalDataSource",
"type"
],
"type": "object"
},
{
"description": "Stream CSV data chunks from DSS dataset",
"properties": {
"datasetId": {
"description": "Either the populated value of the field or [redacted] due to permission settings",
"oneOf": [
{
"description": "The ID of the dataset",
"type": "string"
},
{
"enum": [
"[redacted]"
],
"type": "string"
}
]
},
"partition": {
"default": null,
"description": "Partition used to predict",
"enum": [
"holdout",
"validation",
"allBacktests",
null
],
"type": [
"string",
"null"
]
},
"projectId": {
"description": "The ID of the project",
"type": "string"
},
"type": {
"description": "Type name for this intake type",
"enum": [
"dss"
],
"type": "string"
}
},
"required": [
"projectId",
"type"
],
"type": "object"
},
{
"properties": {
"path": {
"description": "Path to data on host filesystem",
"type": "string"
},
"type": {
"description": "Type name for this intake type",
"enum": [
"filesystem"
],
"type": "string"
}
},
"required": [
"path",
"type"
],
"type": "object"
},
{
"description": "Stream CSV data chunks from HTTP",
"properties": {
"type": {
"description": "Type name for this intake type",
"enum": [
"http"
],
"type": "string"
},
"url": {
"description": "URL for the CSV file",
"format": "url",
"type": "string"
}
},
"required": [
"type",
"url"
],
"type": "object"
},
{
"description": "Stream CSV data chunks from JDBC",
"properties": {
"catalog": {
"description": "The name of the specified database catalog to read input data from.",
"type": "string",
"x-versionadded": "v2.22"
},
"credentialId": {
"description": "Either the populated value of the field or [redacted] due to permission settings",
"oneOf": [
{
"description": "The ID of the credential holding information about a user with read access to the JDBC data source.",
"type": [
"string",
"null"
]
},
{
"enum": [
"[redacted]"
],
"type": "string"
}
]
},
"dataStoreId": {
"description": "Either the populated value of the field or [redacted] due to permission settings",
"oneOf": [
{
"description": "ID of the data store to connect to",
"type": "string"
},
{
"enum": [
"[redacted]"
],
"type": "string"
}
]
},
"fetchSize": {
"description": "A user specified fetch size. Changing it can be used to balance throughput and memory usage. Deprecated and ignored since v2.21.",
"maximum": 1000000,
"minimum": 1,
"type": "integer"
},
"query": {
"description": "A self-supplied SELECT statement of the dataset you wish to score. Helpful for supplying a more fine-grained selection of data not achievable through specification of \"table\" and/or \"schema\" parameters exclusively.If this job is executed with a job definition, then template variables are available which will be substituted for timestamps: {{ current_run_timestamp }}, {{ last_completed_run_time }}, {{ last_scheduled_run_time }}, {{ next_scheduled_run_time }}, {{ current_run_time }}",
"type": "string"
},
"schema": {
"description": "The name of the specified database schema to read input data from.",
"type": "string"
},
"table": {
"description": "The name of the specified database table to read input data from.",
"type": "string"
},
"type": {
"description": "Type name for this intake type",
"enum": [
"jdbc"
],
"type": "string"
}
},
"required": [
"dataStoreId",
"type"
],
"type": "object"
},
{
"description": "Stream CSV data chunks from local file storage",
"properties": {
"async": {
"description": "The default behavior (async: true) will still submit the job to the queue and start processing as soon as the upload is started.Setting it to false will postpone submitting the job to the queue until all data has been uploaded.This is helpful if the user is on a bad connection and bottlednecked by the upload speed. Instead of blocking the queue this will allow others to submit to the queue until the upload has finished.",
"type": [
"boolean",
"null"
],
"x-versionadded": "v2.28"
},
"multipart": {
"description": "specify if the data will be uploaded in multiple parts instead of a single file",
"type": "boolean",
"x-versionadded": "v2.27"
},
"type": {
"description": "Type name for this intake type",
"enum": [
"local_file",
"localFile"
],
"type": "string"
}
},
"required": [
"type"
],
"type": "object"
},
{
"description": "Stream CSV data chunks from Datasphere using browser-datasphere",
"properties": {
"catalog": {
"description": "The name of the specified database catalog to read input data from.",
"type": "string"
},
"credentialId": {
"description": "Either the populated value of the field or [redacted] due to permission settings",
"oneOf": [
{
"description": "The ID of the credential holding information about a user with read access to the data source.",
"type": "string"
},
{
"enum": [
"[redacted]"
],
"type": "string"
}
]
},
"dataStoreId": {
"description": "Either the populated value of the field or [redacted] due to permission settings",
"oneOf": [
{
"description": "ID of the data store to connect to",
"type": "string"
},
{
"enum": [
"[redacted]"
],
"type": "string"
}
]
},
"schema": {
"description": "The name of the specified database schema to read input data from.",
"type": "string"
},
"table": {
"description": "The name of the specified database table to read input data from.",
"type": "string"
},
"type": {
"description": "Type name for this intake type",
"enum": [
"datasphere"
],
"type": "string"
}
},
"required": [
"credentialId",
"dataStoreId",
"schema",
"table",
"type"
],
"type": "object",
"x-versionadded": "v2.35"
}
]
},
"maxExplanations": {
"default": 0,
"description": "Number of explanations requested. Will be ordered by strength.",
"maximum": 100,
"minimum": 0,
"type": "integer"
},
"maxNgramExplanations": {
"description": "The maximum number of text ngram explanations to supply per row of the dataset. The default recommended `maxNgramExplanations` is `all` (no limit)",
"oneOf": [
{
"minimum": 0,
"type": "integer"
},
{
"enum": [
"all"
],
"type": "string"
}
],
"x-versionadded": "v2.30"
},
"modelId": {
"description": "ID of leaderboard model which is used in job for processing predictions dataset",
"type": "string",
"x-versionadded": "v2.30"
},
"modelPackageId": {
"description": "ID of model package from registry is used in job for processing predictions dataset",
"type": "string",
"x-versionadded": "v2.30"
},
"monitoringAggregation": {
"description": "Defines the aggregation policy for monitoring jobs.",
"properties": {
"retentionPolicy": {
"default": "percentage",
"description": "Monitoring jobs retention policy for aggregation.",
"enum": [
"samples",
"percentage"
],
"type": "string"
},
"retentionValue": {
"default": 0,
"description": "Amount/percentage of samples to retain.",
"type": "integer"
}
},
"type": "object"
},
"monitoringBatchPrefix": {
"description": "Name of the batch to create with this job",
"type": [
"string",
"null"
]
},
"monitoringColumns": {
"description": "Column names mapping for monitoring",
"properties": {
"actedUponColumn": {
"description": "Name of column that contains value for acted_on.",
"type": "string"
},
"actualsTimestampColumn": {
"description": "Name of column that contains actual timestamps.",
"type": "string"
},
"actualsValueColumn": {
"description": "Name of column that contains actuals value.",
"type": "string"
},
"associationIdColumn": {
"description": "Name of column that contains association Id.",
"type": "string"
},
"customMetricId": {
"description": "Id of custom metric to process values for.",
"type": "string"
},
"customMetricTimestampColumn": {
"description": "Name of column that contains custom metric values timestamps.",
"type": "string"
},
"customMetricTimestampFormat": {
"description": "Format of timestamps from customMetricTimestampColumn.",
"type": "string"
},
"customMetricValueColumn": {
"description": "Name of column that contains values for custom metric.",
"type": "string"
},
"monitoredStatusColumn": {
"description": "Column name used to mark monitored rows.",
"type": "string"
},
"predictionsColumns": {
"description": "Name of the column(s) which contain prediction values.",
"oneOf": [
{
"description": "Map containing column name(s) and class name(s) for multiclass problem.",
"items": {
"properties": {
"className": {
"description": "Class name.",
"type": "string"
},
"columnName": {
"description": "Column name that contains the prediction for a specific class.",
"type": "string"
}
},
"required": [
"className",
"columnName"
],
"type": "object"
},
"maxItems": 100,
"type": "array"
},
{
"description": "Column name that contains the prediction for regressions problem.",
"type": "string"
}
]
},
"reportDrift": {
"description": "True to report drift, False otherwise.",
"type": "boolean"
},
"reportPredictions": {
"description": "True to report prediction, False otherwise.",
"type": "boolean"
},
"uniqueRowIdentifierColumns": {
"description": "Column(s) name of unique row identifiers.",
"items": {
"type": "string"
},
"maxItems": 100,
"type": "array"
}
},
"type": "object"
},
"monitoringOutputSettings": {
"description": "Output settings for monitoring jobs",
"properties": {
"monitoredStatusColumn": {
"description": "Column name used to mark monitored rows.",
"type": "string"
},
"uniqueRowIdentifierColumns": {
"description": "Column(s) name of unique row identifiers.",
"items": {
"type": "string"
},
"maxItems": 100,
"type": "array"
}
},
"required": [
"monitoredStatusColumn",
"uniqueRowIdentifierColumns"
],
"type": "object"
},
"numConcurrent": {
"description": "Number of simultaneous requests to run against the prediction instance",
"minimum": 0,
"type": "integer"
},
"outputSettings": {
"description": "The response option configured for this job",
"oneOf": [
{
"description": "Save CSV data chunks to Azure Blob Storage",
"properties": {
"credentialId": {
"description": "Either the populated value of the field or [redacted] due to permission settings",
"oneOf": [
{
"description": "Use the specified credential to access the url",
"type": [
"string",
"null"
]
},
{
"enum": [
"[redacted]"
],
"type": "string"
}
]
},
"format": {
"default": "csv",
"description": "Type of output file format",
"enum": [
"csv",
"parquet"
],
"type": "string",
"x-versionadded": "v2.25"
},
"partitionColumns": {
"description": "For Parquet directory-scoring only. The column names of the intake data of which to partition the dataset. Columns are partitioned in the order they are given. At least one value is required if scoring to a directory (meaning the output url ends with a slash (\"/\").",
"items": {
"type": "string"
},
"maxItems": 100,
"type": "array",
"x-versionadded": "v2.26"
},
"type": {
"description": "Type name for this output type",
"enum": [
"azure"
],
"type": "string"
},
"url": {
"description": "URL for the file or directory",
"format": "url",
"type": "string"
}
},
"required": [
"type",
"url"
],
"type": "object"
},
{
"description": "Save CSV data chunks to Google Storage",
"properties": {
"credentialId": {
"description": "Either the populated value of the field or [redacted] due to permission settings",
"oneOf": [
{
"description": "Use the specified credential to access the url",
"type": [
"string",
"null"
]
},
{
"enum": [
"[redacted]"
],
"type": "string"
}
]
},
"format": {
"default": "csv",
"description": "Type of input file format",
"enum": [
"csv",
"parquet"
],
"type": "string",
"x-versionadded": "v2.25"
},
"partitionColumns": {
"description": "For Parquet directory-scoring only. The column names of the intake data of which to partition the dataset. Columns are partitioned in the order they are given. At least one value is required if scoring to a directory (meaning the output url ends with a slash (\"/\").",
"items": {
"type": "string"
},
"maxItems": 100,
"type": "array",
"x-versionadded": "v2.26"
},
"type": {
"description": "Type name for this output type",
"enum": [
"gcp"
],
"type": "string"
},
"url": {
"description": "URL for the CSV file",
"format": "url",
"type": "string"
}
},
"required": [
"type",
"url"
],
"type": "object"
},
{
"description": "Save CSV data chunks to Google BigQuery in bulk",
"properties": {
"bucket": {
"description": "The name of gcs bucket for data loading",
"type": "string"
},
"credentialId": {
"description": "Either the populated value of the field or [redacted] due to permission settings",
"oneOf": [
{
"description": "The ID of the GCP credentials",
"type": "string"
},
{
"enum": [
"[redacted]"
],
"type": "string"
}
]
},
"dataset": {
"description": "The name of the specified big query dataset to write data back",
"type": "string"
},
"table": {
"description": "The name of the specified big query table to write data back",
"type": "string"
},
"type": {
"description": "Type name for this output type",
"enum": [
"bigquery"
],
"type": "string"
}
},
"required": [
"bucket",
"credentialId",
"dataset",
"table",
"type"
],
"type": "object"
},
{
"description": "Saves CSV data chunks to Amazon Cloud Storage S3",
"properties": {
"credentialId": {
"description": "Either the populated value of the field or [redacted] due to permission settings",
"oneOf": [
{
"description": "Use the specified credential to access the url",
"type": [
"string",
"null"
]
},
{
"enum": [
"[redacted]"
],
"type": "string"
}
]
},
"endpointUrl": {
"description": "Endpoint URL for the S3 connection (omit to use the default)",
"format": "url",
"type": "string",
"x-versionadded": "v2.29"
},
"format": {
"default": "csv",
"description": "Type of output file format",
"enum": [
"csv",
"parquet"
],
"type": "string",
"x-versionadded": "v2.25"
},
"partitionColumns": {
"description": "For Parquet directory-scoring only. The column names of the intake data of which to partition the dataset. Columns are partitioned in the order they are given. At least one value is required if scoring to a directory (meaning the output url ends with a slash (\"/\").",
"items": {
"type": "string"
},
"maxItems": 100,
"type": "array",
"x-versionadded": "v2.26"
},
"serverSideEncryption": {
"description": "Configure Server-Side Encryption for S3 output",
"properties": {
"algorithm": {
"description": "The server-side encryption algorithm used when storing this object in Amazon S3 (for example, AES256, aws:kms).",
"type": "string"
},
"customerAlgorithm": {
"description": "Specifies the algorithm to use to when encrypting the object (for example, AES256).",
"type": "string"
},
"customerKey": {
"description": "Specifies the customer-provided encryption key for Amazon S3 to use in encrypting data. This value is used to store the object and then it is discarded; Amazon S3 does not store the encryption key. The key must be appropriate for use with the algorithm specified in customerAlgorithm. The key must be sent as an base64 encoded string.",
"type": "string"
},
"kmsEncryptionContext": {
"description": "Specifies the Amazon Web Services KMS Encryption Context to use for object encryption. The value of this header is a base64-encoded UTF-8 string holding JSON with the encryption context key-value pairs.",
"type": "string"
},
"kmsKeyId": {
"description": "Specifies the ID of the symmetric customer managed key to use for object encryption.",
"type": "string"
}
},
"type": "object"
},
"type": {
"description": "Type name for this output type",
"enum": [
"s3"
],
"type": "string"
},
"url": {
"description": "URL for the CSV file",
"format": "url",
"type": "string"
}
},
"required": [
"type",
"url"
],
"type": "object"
},
{
"description": "Save CSV data chunks to Snowflake in bulk",
"properties": {
"catalog": {
"description": "The name of the specified database catalog to write output data to.",
"type": "string",
"x-versionadded": "v2.28"
},
"cloudStorageCredentialId": {
"description": "Either the populated value of the field or [redacted] due to permission settings",
"oneOf": [
{
"description": "The ID of the credential holding information about a user with write access to the cloud storage.",
"type": [
"string",
"null"
]
},
{
"enum": [
"[redacted]"
],
"type": "string"
}
]
},
"cloudStorageType": {
"default": "s3",
"description": "Type name for cloud storage",
"enum": [
"azure",
"gcp",
"s3"
],
"type": "string"
},
"createTableIfNotExists": {
"default": false,
"description": "Attempt to create the table first if no existing one is detected, before writing data with the strategy defined in the `statementType` parameter.",
"type": "boolean",
"x-versionadded": "v2.25"
},
"credentialId": {
"description": "Either the populated value of the field or [redacted] due to permission settings",
"oneOf": [
{
"description": "The ID of the credential holding information about a user with write access to the Snowflake data source.",
"type": [
"string",
"null"
]
},
{
"enum": [
"[redacted]"
],
"type": "string"
}
]
},
"dataStoreId": {
"description": "Either the populated value of the field or [redacted] due to permission settings",
"oneOf": [
{
"description": "ID of the data store to connect to",
"type": "string"
},
{
"enum": [
"[redacted]"
],
"type": "string"
}
]
},
"externalStage": {
"description": "External storage",
"type": "string"
},
"schema": {
"description": "The name of the specified database schema to write results to.",
"type": "string"
},
"statementType": {
"description": "The statement type to use when writing the results.",
"enum": [
"insert",
"create_table",
"createTable"
],
"type": "string"
},
"table": {
"description": "The name of the specified database table to write results to.",
"type": "string"
},
"type": {
"description": "Type name for this output type",
"enum": [
"snowflake"
],
"type": "string"
}
},
"required": [
"dataStoreId",
"externalStage",
"statementType",
"table",
"type"
],
"type": "object"
},
{
"description": "Save CSV data chunks to Azure Synapse in bulk",
"properties": {
"cloudStorageCredentialId": {
"description": "Either the populated value of the field or [redacted] due to permission settings",
"oneOf": [
{
"description": "The ID of the credential holding information about a user with write access to the cloud storage.",
"type": [
"string",
"null"
]
},
{
"enum": [
"[redacted]"
],
"type": "string"
}
]
},
"createTableIfNotExists": {
"default": false,
"description": "Attempt to create the table first if no existing one is detected, before writing data with the strategy defined in the `statementType` parameter.",
"type": "boolean",
"x-versionadded": "v2.25"
},
"credentialId": {
"description": "Either the populated value of the field or [redacted] due to permission settings",
"oneOf": [
{
"description": "The ID of the credential holding information about a user with write access to the JDBC data source.",
"type": [
"string",
"null"
]
},
{
"enum": [
"[redacted]"
],
"type": "string"
}
]
},
"dataStoreId": {
"description": "Either the populated value of the field or [redacted] due to permission settings",
"oneOf": [
{
"description": "ID of the data store to connect to",
"type": "string"
},
{
"enum": [
"[redacted]"
],
"type": "string"
}
]
},
"externalDataSource": {
"description": "External data source name",
"type": "string"
},
"schema": {
"description": "The name of the specified database schema to write results to.",
"type": "string"
},
"statementType": {
"description": "The statement type to use when writing the results.",
"enum": [
"insert",
"create_table",
"createTable"
],
"type": "string"
},
"table": {
"description": "The name of the specified database table to write results to.",
"type": "string"
},
"type": {
"description": "Type name for this output type",
"enum": [
"synapse"
],
"type": "string"
}
},
"required": [
"dataStoreId",
"externalDataSource",
"statementType",
"table",
"type"
],
"type": "object"
},
{
"properties": {
"path": {
"description": "Path to results on host filesystem",
"type": "string"
},
"type": {
"description": "Type name for this output type",
"enum": [
"filesystem"
],
"type": "string"
}
},
"required": [
"path",
"type"
],
"type": "object"
},
{
"description": "Save CSV data chunks to HTTP data endpoint",
"properties": {
"headers": {
"description": "Extra headers to send with the request",
"type": "object"
},
"method": {
"description": "Method to use when saving the CSV file",
"enum": [
"POST",
"PUT"
],
"type": "string"
},
"type": {
"description": "Type name for this output type",
"enum": [
"http"
],
"type": "string"
},
"url": {
"description": "URL for the CSV file",
"format": "url",
"type": "string"
}
},
"required": [
"method",
"type",
"url"
],
"type": "object"
},
{
"description": "Save CSV data chunks via JDBC",
"properties": {
"catalog": {
"description": "The name of the specified database catalog to write output data to.",
"type": "string",
"x-versionadded": "v2.22"
},
"commitInterval": {
"default": 600,
"description": "Defines a time interval in seconds between each commit is done to the JDBC source. If set to 0, the batch prediction operation will write the entire job before committing.",
"maximum": 86400,
"minimum": 0,
"type": "integer",
"x-versionadded": "v2.21"
},
"createTableIfNotExists": {
"default": false,
"description": "Attempt to create the table first if no existing one is detected, before writing data with the strategy defined in the `statementType` parameter.",
"type": "boolean",
"x-versionadded": "v2.24"
},
"credentialId": {
"description": "Either the populated value of the field or [redacted] due to permission settings",
"oneOf": [
{
"description": "The ID of the credential holding information about a user with write access to the JDBC data source.",
"type": [
"string",
"null"
]
},
{
"enum": [
"[redacted]"
],
"type": "string"
}
]
},
"dataStoreId": {
"description": "Either the populated value of the field or [redacted] due to permission settings",
"oneOf": [
{
"description": "ID of the data store to connect to",
"type": "string"
},
{
"enum": [
"[redacted]"
],
"type": "string"
}
]
},
"schema": {
"description": "The name of the specified database schema to write the results to.",
"type": "string"
},
"statementType": {
"description": "The statement type to use when writing the results. Deprecation Warning: Use of `create_table` is now discouraged. Use one of the other possibilities along with the parameter `createTableIfNotExists` set to `true`.",
"enum": [
"createTable",
"create_table",
"insert",
"insertUpdate",
"insert_update",
"update"
],
"type": "string"
},
"table": {
"description": "The name of the specified database table to write the results to.If this job is executed with a job definition, then template variables are available which will be substituted for timestamps: {{ current_run_timestamp }}, {{ last_completed_run_time }}, {{ last_scheduled_run_time }}, {{ next_scheduled_run_time }}, {{ current_run_time }}",
"type": "string"
},
"type": {
"description": "Type name for this intake type",
"enum": [
"jdbc"
],
"type": "string"
},
"updateColumns": {
"description": "The column names to be updated if statementType is set to either update or upsert.",
"items": {
"type": "string"
},
"maxItems": 100,
"type": "array"
},
"whereColumns": {
"description": "The column names to be used in the where clause if statementType is set to update or upsert.",
"items": {
"type": "string"
},
"maxItems": 100,
"type": "array"
}
},
"required": [
"dataStoreId",
"statementType",
"table",
"type"
],
"type": "object"
},
{
"description": "Save CSV data chunks to local file storage",
"properties": {
"type": {
"description": "Type name for this output type",
"enum": [
"local_file",
"localFile"
],
"type": "string"
}
},
"required": [
"type"
],
"type": "object"
},
{
"description": "Saves CSV data chunks to Datasphere using browser-datasphere",
"properties": {
"catalog": {
"description": "The name of the specified database catalog to read input data from.",
"type": "string"
},
"credentialId": {
"description": "Either the populated value of the field or [redacted] due to permission settings",
"oneOf": [
{
"description": "The ID of the credential holding information about a user with write access to the data destination.",
"type": "string"
},
{
"enum": [
"[redacted]"
],
"type": "string"
}
]
},
"dataStoreId": {
"description": "Either the populated value of the field or [redacted] due to permission settings",
"oneOf": [
{
"description": "The ID of the data store to connect to",
"type": "string"
},
{
"enum": [
"[redacted]"
],
"type": "string"
}
]
},
"schema": {
"description": "The name of the specified database schema to read input data from.",
"type": "string"
},
"table": {
"description": "The name of the specified database table to read input data from.",
"type": "string"
},
"type": {
"description": "The type name for this output type",
"enum": [
"datasphere"
],
"type": "string"
}
},
"required": [
"credentialId",
"dataStoreId",
"schema",
"table",
"type"
],
"type": "object",
"x-versionadded": "v2.35"
}
]
},
"passthroughColumns": {
"description": "Pass through columns from the original dataset",
"items": {
"description": "A column name from the original dataset to pass through to the resulting predictions",
"maxLength": 50,
"minLength": 1,
"type": "string"
},
"maxItems": 100,
"type": "array"
},
"passthroughColumnsSet": {
"description": "Pass through all columns from the original dataset",
"enum": [
"all"
],
"type": "string"
},
"pinnedModelId": {
"description": "Specify a model ID used for scoring",
"type": "string"
},
"predictionInstance": {
"description": "Override the default prediction instance from the deployment when scoring this job.",
"properties": {
"apiKey": {
"description": "By default, prediction requests will use the API key of the user that created the job. This allows you to make requests on behalf of other users.",
"type": "string"
},
"datarobotKey": {
"description": "If running a job against a prediction instance in the Managed AI Cloud, you must provide the organization level DataRobot-Key.",
"type": "string"
},
"hostName": {
"description": "Override the default host name of the deployment with this.",
"type": "string"
},
"sslEnabled": {
"default": true,
"description": "Use SSL (HTTPS) when communicating with the overriden prediction server.",
"type": "boolean"
}
},
"required": [
"hostName",
"sslEnabled"
],
"type": "object"
},
"predictionWarningEnabled": {
"description": "Enable prediction warnings.",
"type": [
"boolean",
"null"
]
},
"redactedFields": {
"description": "A list of qualified field names from intake- and/or outputSettings that was redacted due to permissions and sharing settings. For example: intakeSettings.dataStoreId",
"items": {
"description": "Field names that are potentially redacted",
"type": "string"
},
"type": "array",
"x-versionadded": "v2.30"
},
"skipDriftTracking": {
"default": false,
"description": "Skip drift tracking for this job.",
"type": "boolean"
},
"thresholdHigh": {
"description": "Compute explanations for predictions above this threshold",
"type": "number"
},
"thresholdLow": {
"description": "Compute explanations for predictions below this threshold",
"type": "number"
},
"timeseriesSettings": {
"description": "Time Series settings included of this job is a Time Series job.",
"oneOf": [
{
"properties": {
"forecastPoint": {
"description": "Used for forecast predictions in order to override the inferred forecast point from the dataset.",
"format": "date-time",
"type": "string"
},
"relaxKnownInAdvanceFeaturesCheck": {
"default": false,
"description": "If activated, missing values in the known in advance features are allowed in the forecast window at prediction time. If omitted or false, missing values are not allowed.",
"type": "boolean"
},
"type": {
"description": "Forecast mode makes predictions using forecastPoint or rows in the dataset without target.",
"enum": [
"forecast"
],
"type": "string"
}
},
"required": [
"type"
],
"type": "object"
},
{
"properties": {
"forecastPointPolicy": {
"description": "Forecast point policy",
"properties": {
"configuration": {
"description": "Customize if forecast point based on job run time needs to be shifted.",
"properties": {
"offset": {
"description": "Offset to apply to scheduled run time of the job in a ISO-8601 format toobtain a relative forecast point. Example of the positive offset 'P2DT5H3M', example of the negative offset '-P2DT5H4M'",
"format": "offset",
"type": "string"
}
},
"required": [
"offset"
],
"type": "object"
},
"type": {
"description": "Type of the forecast point policy. Forecast point will be based on the scheduled run time of the job or the current moment in UTC if job was launched manually. Run time can be adjusted backwards or forwards.",
"enum": [
"jobRunTimeBased"
],
"type": "string"
}
},
"required": [
"type"
],
"type": "object"
},
"relaxKnownInAdvanceFeaturesCheck": {
"default": false,
"description": "If activated, missing values in the known in advance features are allowed in the forecast window at prediction time. If omitted or false, missing values are not allowed.",
"type": "boolean"
},
"type": {
"description": "Forecast mode makes predictions using forecastPoint or rows in the dataset without target.",
"enum": [
"forecast"
],
"type": "string"
}
},
"required": [
"forecastPointPolicy",
"type"
],
"type": "object"
},
{
"properties": {
"predictionsEndDate": {
"description": "Used for historical predictions in order to override date to which predictions should be calculated. By default value will be inferred automatically from the dataset.",
"format": "date-time",
"type": "string"
},
"predictionsStartDate": {
"description": "Used for historical predictions in order to override date from which predictions should be calculated. By default value will be inferred automatically from the dataset.",
"format": "date-time",
"type": "string"
},
"relaxKnownInAdvanceFeaturesCheck": {
"default": false,
"description": "If activated, missing values in the known in advance features are allowed in the forecast window at prediction time. If omitted or false, missing values are not allowed.",
"type": "boolean"
},
"type": {
"description": "Historical mode enables bulk predictions which calculates predictions for all possible forecast points and forecast distances in the dataset within the predictionsStartDate/predictionsEndDate range.",
"enum": [
"historical"
],
"type": "string"
}
},
"required": [
"type"
],
"type": "object"
}
],
"x-versionadded": "v2.20"
}
},
"required": [
"abortOnError",
"csvSettings",
"disableRowLevelErrorHandling",
"includePredictionStatus",
"includeProbabilities",
"includeProbabilitiesClasses",
"intakeSettings",
"maxExplanations",
"numConcurrent",
"redactedFields",
"skipDriftTracking"
],
"type": "object"
},
"created": {
"description": "When was this job created",
"format": "date-time",
"type": "string"
},
"createdBy": {
"description": "Who created this job",
"properties": {
"fullName": {
"description": "The full name of the user who created this job (if defined by the user)",
"type": [
"string",
"null"
]
},
"userId": {
"description": "The User ID of the user who created this job",
"type": "string"
},
"username": {
"description": "The username (e-mail address) of the user who created this job",
"type": "string"
}
},
"required": [
"fullName",
"userId",
"username"
],
"type": "object"
},
"enabled": {
"default": false,
"description": "If this job definition is enabled as a scheduled job.",
"type": "boolean"
},
"id": {
"description": "The ID of the Batch job definition",
"type": "string"
},
"lastFailedRunTime": {
"description": "Last time this job had a failed run",
"format": "date-time",
"type": [
"string",
"null"
]
},
"lastScheduledRunTime": {
"description": "Last time this job was scheduled to run (though not guaranteed it actually ran at that time)",
"format": "date-time",
"type": [
"string",
"null"
]
},
"lastStartedJobStatus": {
"description": "The status of the latest job launched to the queue (if any).",
"enum": [
"INITIALIZING",
"RUNNING",
"COMPLETED",
"ABORTED",
"FAILED"
],
"type": [
"string",
"null"
]
},
"lastStartedJobTime": {
"description": "The last time (if any) a job was launched.",
"format": "date-time",
"type": [
"string",
"null"
]
},
"lastSuccessfulRunTime": {
"description": "Last time this job had a successful run",
"format": "date-time",
"type": [
"string",
"null"
]
},
"name": {
"description": "A human-readable name for the definition, must be unique across organisations",
"type": "string"
},
"nextScheduledRunTime": {
"description": "Next time this job is scheduled to run",
"format": "date-time",
"type": [
"string",
"null"
]
},
"schedule": {
"description": "The scheduling information defining how often and when to execute this job to the Job Scheduling service. Optional if enabled = False.",
"properties": {
"dayOfMonth": {
"description": "The date(s) of the month that the job will run. Allowed values are either ``[1 ... 31]`` or ``[\"*\"]`` for all days of the month. This field is additive with ``dayOfWeek``, meaning the job will run both on the date(s) defined in this field and the day specified by ``dayOfWeek`` (for example, dates 1st, 2nd, 3rd, plus every Tuesday). If ``dayOfMonth`` is set to ``[\"*\"]`` and ``dayOfWeek`` is defined, the scheduler will trigger on every day of the month that matches ``dayOfWeek`` (for example, Tuesday the 2nd, 9th, 16th, 23rd, 30th). Invalid dates such as February 31st are ignored.",
"items": {
"enum": [
"*",
1,
2,
3,
4,
5,
6,
7,
8,
9,
10,
11,
12,
13,
14,
15,
16,
17,
18,
19,
20,
21,
22,
23,
24,
25,
26,
27,
28,
29,
30,
31
],
"type": [
"number",
"string"
]
},
"maxItems": 31,
"type": "array"
},
"dayOfWeek": {
"description": "The day(s) of the week that the job will run. Allowed values are ``[0 .. 6]``, where (Sunday=0), or ``[\"*\"]``, for all days of the week. Strings, either 3-letter abbreviations or the full name of the day, can be used interchangeably (e.g., \"sunday\", \"Sunday\", \"sun\", or \"Sun\", all map to ``[0]``. This field is additive with ``dayOfMonth``, meaning the job will run both on the date specified by ``dayOfMonth`` and the day defined in this field.",
"items": {
"enum": [
"*",
0,
1,
2,
3,
4,
5,
6,
"sunday",
"SUNDAY",
"Sunday",
"monday",
"MONDAY",
"Monday",
"tuesday",
"TUESDAY",
"Tuesday",
"wednesday",
"WEDNESDAY",
"Wednesday",
"thursday",
"THURSDAY",
"Thursday",
"friday",
"FRIDAY",
"Friday",
"saturday",
"SATURDAY",
"Saturday",
"sun",
"SUN",
"Sun",
"mon",
"MON",
"Mon",
"tue",
"TUE",
"Tue",
"wed",
"WED",
"Wed",
"thu",
"THU",
"Thu",
"fri",
"FRI",
"Fri",
"sat",
"SAT",
"Sat"
],
"type": [
"number",
"string"
]
},
"maxItems": 7,
"type": "array"
},
"hour": {
"description": "The hour(s) of the day that the job will run. Allowed values are either ``[\"*\"]`` meaning every hour of the day or ``[0 ... 23]``.",
"items": {
"enum": [
"*",
0,
1,
2,
3,
4,
5,
6,
7,
8,
9,
10,
11,
12,
13,
14,
15,
16,
17,
18,
19,
20,
21,
22,
23
],
"type": [
"number",
"string"
]
},
"maxItems": 24,
"type": "array"
},
"minute": {
"description": "The minute(s) of the day that the job will run. Allowed values are either ``[\"*\"]`` meaning every minute of the day or``[0 ... 59]``.",
"items": {
"enum": [
"*",
0,
1,
2,
3,
4,
5,
6,
7,
8,
9,
10,
11,
12,
13,
14,
15,
16,
17,
18,
19,
20,
21,
22,
23,
24,
25,
26,
27,
28,
29,
30,
31,
32,
33,
34,
35,
36,
37,
38,
39,
40,
41,
42,
43,
44,
45,
46,
47,
48,
49,
50,
51,
52,
53,
54,
55,
56,
57,
58,
59
],
"type": [
"number",
"string"
]
},
"maxItems": 60,
"type": "array"
},
"month": {
"description": "The month(s) of the year that the job will run. Allowed values are either ``[1 ... 12]`` or ``[\"*\"]`` for all months of the year. Strings, either 3-letter abbreviations or the full name of the month, can be used interchangeably (e.g., \"jan\" or \"october\"). Months that are not compatible with ``dayOfMonth`` are ignored, for example ``{\"dayOfMonth\": [31], \"month\":[\"feb\"]}``.",
"items": {
"enum": [
"*",
1,
2,
3,
4,
5,
6,
7,
8,
9,
10,
11,
12,
"january",
"JANUARY",
"January",
"february",
"FEBRUARY",
"February",
"march",
"MARCH",
"March",
"april",
"APRIL",
"April",
"may",
"MAY",
"May",
"june",
"JUNE",
"June",
"july",
"JULY",
"July",
"august",
"AUGUST",
"August",
"september",
"SEPTEMBER",
"September",
"october",
"OCTOBER",
"October",
"november",
"NOVEMBER",
"November",
"december",
"DECEMBER",
"December",
"jan",
"JAN",
"Jan",
"feb",
"FEB",
"Feb",
"mar",
"MAR",
"Mar",
"apr",
"APR",
"Apr",
"jun",
"JUN",
"Jun",
"jul",
"JUL",
"Jul",
"aug",
"AUG",
"Aug",
"sep",
"SEP",
"Sep",
"oct",
"OCT",
"Oct",
"nov",
"NOV",
"Nov",
"dec",
"DEC",
"Dec"
],
"type": [
"number",
"string"
]
},
"maxItems": 12,
"type": "array"
}
},
"required": [
"dayOfMonth",
"dayOfWeek",
"hour",
"minute",
"month"
],
"type": "object"
},
"updated": {
"description": "When was this job last updated",
"format": "date-time",
"type": "string"
},
"updatedBy": {
"description": "Who created this job",
"properties": {
"fullName": {
"description": "The full name of the user who created this job (if defined by the user)",
"type": [
"string",
"null"
]
},
"userId": {
"description": "The User ID of the user who created this job",
"type": "string"
},
"username": {
"description": "The username (e-mail address) of the user who created this job",
"type": "string"
}
},
"required": [
"fullName",
"userId",
"username"
],
"type": "object"
}
},
"required": [
"batchPredictionJob",
"created",
"createdBy",
"enabled",
"id",
"lastStartedJobStatus",
"lastStartedJobTime",
"name",
"updated",
"updatedBy"
],
"type": "object"
}
Properties
Name |
Type |
Required |
Restrictions |
Description |
batchPredictionJob |
BatchPredictionJobDefinitionJobSpecResponse |
true |
|
The Batch Prediction Job specification to be put on the queue in intervals |
created |
string(date-time) |
true |
|
When was this job created |
createdBy |
BatchJobCreatedBy |
true |
|
Who created this job |
enabled |
boolean |
true |
|
If this job definition is enabled as a scheduled job. |
id |
string |
true |
|
The ID of the Batch job definition |
lastFailedRunTime |
string,null(date-time) |
false |
|
Last time this job had a failed run |
lastScheduledRunTime |
string,null(date-time) |
false |
|
Last time this job was scheduled to run (though not guaranteed it actually ran at that time) |
lastStartedJobStatus |
string,null |
true |
|
The status of the latest job launched to the queue (if any). |
lastStartedJobTime |
string,null(date-time) |
true |
|
The last time (if any) a job was launched. |
lastSuccessfulRunTime |
string,null(date-time) |
false |
|
Last time this job had a successful run |
name |
string |
true |
|
A human-readable name for the definition, must be unique across organisations |
nextScheduledRunTime |
string,null(date-time) |
false |
|
Next time this job is scheduled to run |
schedule |
Schedule |
false |
|
The scheduling information defining how often and when to execute this job to the Job Scheduling service. Optional if enabled = False. |
updated |
string(date-time) |
true |
|
When was this job last updated |
updatedBy |
BatchJobCreatedBy |
true |
|
Who created this job |
Enumerated Values
Property |
Value |
lastStartedJobStatus |
[INITIALIZING , RUNNING , COMPLETED , ABORTED , FAILED ] |
BatchPredictionJobDefinitionsUpdate
{
"properties": {
"abortOnError": {
"default": true,
"description": "Should this job abort if too many errors are encountered",
"type": "boolean"
},
"batchJobType": {
"default": "prediction",
"description": "Batch job type.",
"enum": [
"monitoring",
"prediction"
],
"type": "string",
"x-versionadded": "v2.35"
},
"chunkSize": {
"default": "auto",
"description": "Which strategy should be used to determine the chunk size. Can be either a named strategy or a fixed size in bytes.",
"oneOf": [
{
"enum": [
"auto",
"fixed",
"dynamic"
],
"type": "string"
},
{
"maximum": 41943040,
"minimum": 20,
"type": "integer"
}
]
},
"columnNamesRemapping": {
"description": "Remap (rename or remove columns from) the output from this job",
"oneOf": [
{
"description": "Provide a dictionary with key/value pairs to remap (deprecated)",
"type": "object"
},
{
"description": "Provide a list of items to remap",
"items": {
"properties": {
"inputName": {
"description": "Rename column with this name",
"type": "string"
},
"outputName": {
"description": "Rename column to this name (leave as null to remove from the output)",
"type": [
"string",
"null"
]
}
},
"required": [
"inputName",
"outputName"
],
"type": "object"
},
"maxItems": 1000,
"type": "array"
}
]
},
"csvSettings": {
"description": "The CSV settings used for this job",
"properties": {
"delimiter": {
"default": ",",
"description": "CSV fields are delimited by this character. Use the string \"tab\" to denote TSV (TAB separated values).",
"oneOf": [
{
"enum": [
"tab"
],
"type": "string"
},
{
"maxLength": 1,
"minLength": 1,
"type": "string"
}
]
},
"encoding": {
"default": "utf-8",
"description": "The encoding to be used for intake and output. For example (but not limited to): \"shift_jis\", \"latin_1\" or \"mskanji\".",
"type": "string"
},
"quotechar": {
"default": "\"",
"description": "Fields containing the delimiter or newlines must be quoted using this character.",
"maxLength": 1,
"minLength": 1,
"type": "string"
}
},
"required": [
"delimiter",
"encoding",
"quotechar"
],
"type": "object"
},
"deploymentId": {
"description": "ID of deployment which is used in job for processing predictions dataset",
"type": "string"
},
"disableRowLevelErrorHandling": {
"default": false,
"description": "Skip row by row error handling",
"type": "boolean"
},
"enabled": {
"description": "If this job definition is enabled as a scheduled job. Optional if no schedule is supplied.",
"type": "boolean"
},
"explanationAlgorithm": {
"description": "Which algorithm will be used to calculate prediction explanations",
"enum": [
"shap",
"xemp"
],
"type": "string"
},
"explanationClassNames": {
"description": "List of class names that will be explained for each row for multiclass. Mutually exclusive with explanationNumTopClasses. If neither specified - we assume explanationNumTopClasses=1",
"items": {
"description": "Class name to explain",
"type": "string"
},
"maxItems": 10,
"minItems": 1,
"type": "array",
"x-versionadded": "v2.29"
},
"explanationNumTopClasses": {
"description": "Number of top predicted classes for each row that will be explained for multiclass. Mutually exclusive with explanationClassNames. If neither specified - we assume explanationNumTopClasses=1",
"maximum": 10,
"minimum": 1,
"type": "integer",
"x-versionadded": "v2.29"
},
"includePredictionStatus": {
"default": false,
"description": "Include prediction status column in the output",
"type": "boolean"
},
"includeProbabilities": {
"default": true,
"description": "Include probabilities for all classes",
"type": "boolean"
},
"includeProbabilitiesClasses": {
"default": [],
"description": "Include only probabilities for these specific class names.",
"items": {
"description": "Include probability for this class name",
"type": "string"
},
"maxItems": 100,
"type": "array"
},
"intakeSettings": {
"default": {
"type": "localFile"
},
"description": "The intake option configured for this job",
"oneOf": [
{
"description": "Stream CSV data chunks from Azure",
"properties": {
"credentialId": {
"description": "Use the specified credential to access the url",
"type": [
"string",
"null"
]
},
"format": {
"default": "csv",
"description": "Type of input file format",
"enum": [
"csv",
"parquet"
],
"type": "string",
"x-versionadded": "v2.25"
},
"type": {
"description": "Type name for this intake type",
"enum": [
"azure"
],
"type": "string"
},
"url": {
"description": "URL for the CSV file",
"format": "url",
"type": "string"
}
},
"required": [
"type",
"url"
],
"type": "object"
},
{
"description": "Stream CSV data chunks from Big Query using GCS",
"properties": {
"bucket": {
"description": "The name of gcs bucket for data export",
"type": "string"
},
"credentialId": {
"description": "The ID of the GCP credentials",
"type": "string"
},
"dataset": {
"description": "The name of the specified big query dataset to read input data from",
"type": "string"
},
"table": {
"description": "The name of the specified big query table to read input data from",
"type": "string"
},
"type": {
"description": "Type name for this intake type",
"enum": [
"bigquery"
],
"type": "string"
}
},
"required": [
"bucket",
"credentialId",
"dataset",
"table",
"type"
],
"type": "object"
},
{
"description": "Stream CSV data chunks from data stage storage",
"properties": {
"dataStageId": {
"description": "The ID of the data stage",
"type": "string"
},
"type": {
"description": "Type name for this intake type",
"enum": [
"dataStage"
],
"type": "string"
}
},
"required": [
"dataStageId",
"type"
],
"type": "object"
},
{
"description": "Stream CSV data chunks from AI catalog dataset",
"properties": {
"datasetId": {
"description": "The ID of the AI catalog dataset",
"type": "string"
},
"datasetVersionId": {
"description": "The ID of the AI catalog dataset version",
"type": "string"
},
"type": {
"description": "Type name for this intake type",
"enum": [
"dataset"
],
"type": "string"
}
},
"required": [
"datasetId",
"type"
],
"type": "object"
},
{
"description": "Stream CSV data chunks from Datasphere using browser-datasphere",
"properties": {
"catalog": {
"description": "The name of the specified database catalog to read input data from.",
"type": "string"
},
"credentialId": {
"description": "The ID of the credential holding information about a user with read access to the data source.",
"type": "string"
},
"dataStoreId": {
"description": "ID of the data store to connect to",
"type": "string"
},
"schema": {
"description": "The name of the specified database schema to read input data from.",
"type": "string"
},
"table": {
"description": "The name of the specified database table to read input data from.",
"type": "string"
},
"type": {
"description": "Type name for this intake type",
"enum": [
"datasphere"
],
"type": "string"
}
},
"required": [
"credentialId",
"dataStoreId",
"schema",
"table",
"type"
],
"type": "object",
"x-versionadded": "v2.35"
},
{
"description": "Stream CSV data chunks from DSS dataset",
"properties": {
"datasetId": {
"description": "The ID of the dataset",
"type": "string"
},
"partition": {
"default": null,
"description": "Partition used to predict",
"enum": [
"holdout",
"validation",
"allBacktests",
null
],
"type": [
"string",
"null"
]
},
"projectId": {
"description": "The ID of the project",
"type": "string"
},
"type": {
"description": "Type name for this intake type",
"enum": [
"dss"
],
"type": "string"
}
},
"required": [
"projectId",
"type"
],
"type": "object"
},
{
"properties": {
"path": {
"description": "Path to data on host filesystem",
"type": "string"
},
"type": {
"description": "Type name for this intake type",
"enum": [
"filesystem"
],
"type": "string"
}
},
"required": [
"path",
"type"
],
"type": "object"
},
{
"description": "Stream CSV data chunks from Google Storage",
"properties": {
"credentialId": {
"description": "Use the specified credential to access the url",
"type": [
"string",
"null"
]
},
"format": {
"default": "csv",
"description": "Type of input file format",
"enum": [
"csv",
"parquet"
],
"type": "string",
"x-versionadded": "v2.25"
},
"type": {
"description": "Type name for this intake type",
"enum": [
"gcp"
],
"type": "string"
},
"url": {
"description": "URL for the CSV file",
"format": "url",
"type": "string"
}
},
"required": [
"type",
"url"
],
"type": "object"
},
{
"description": "Stream CSV data chunks from HTTP",
"properties": {
"type": {
"description": "Type name for this intake type",
"enum": [
"http"
],
"type": "string"
},
"url": {
"description": "URL for the CSV file",
"format": "url",
"type": "string"
}
},
"required": [
"type",
"url"
],
"type": "object"
},
{
"description": "Stream CSV data chunks from JDBC",
"properties": {
"catalog": {
"description": "The name of the specified database catalog to read input data from.",
"type": "string",
"x-versionadded": "v2.22"
},
"credentialId": {
"description": "The ID of the credential holding information about a user with read access to the JDBC data source.",
"type": [
"string",
"null"
]
},
"dataStoreId": {
"description": "ID of the data store to connect to",
"type": "string"
},
"fetchSize": {
"description": "A user specified fetch size. Changing it can be used to balance throughput and memory usage. Deprecated and ignored since v2.21.",
"maximum": 1000000,
"minimum": 1,
"type": "integer"
},
"query": {
"description": "A self-supplied SELECT statement of the dataset you wish to score. Helpful for supplying a more fine-grained selection of data not achievable through specification of \"table\" and/or \"schema\" parameters exclusively.If this job is executed with a job definition, then template variables are available which will be substituted for timestamps: {{ current_run_timestamp }}, {{ last_completed_run_time }}, {{ last_scheduled_run_time }}, {{ next_scheduled_run_time }}, {{ current_run_time }}",
"type": "string"
},
"schema": {
"description": "The name of the specified database schema to read input data from.",
"type": "string"
},
"table": {
"description": "The name of the specified database table to read input data from.",
"type": "string"
},
"type": {
"description": "Type name for this intake type",
"enum": [
"jdbc"
],
"type": "string"
}
},
"required": [
"dataStoreId",
"type"
],
"type": "object"
},
{
"description": "Stream CSV data chunks from local file storage",
"properties": {
"async": {
"description": "The default behavior (async: true) will still submit the job to the queue and start processing as soon as the upload is started.Setting it to false will postpone submitting the job to the queue until all data has been uploaded.This is helpful if the user is on a bad connection and bottlednecked by the upload speed. Instead of blocking the queue this will allow others to submit to the queue until the upload has finished.",
"type": [
"boolean",
"null"
],
"x-versionadded": "v2.28"
},
"multipart": {
"description": "specify if the data will be uploaded in multiple parts instead of a single file",
"type": "boolean",
"x-versionadded": "v2.27"
},
"type": {
"description": "Type name for this intake type",
"enum": [
"local_file",
"localFile"
],
"type": "string"
}
},
"required": [
"type"
],
"type": "object"
},
{
"description": "Stream CSV data chunks from Amazon Cloud Storage S3",
"properties": {
"credentialId": {
"description": "Use the specified credential to access the url",
"type": [
"string",
"null"
]
},
"endpointUrl": {
"description": "Endpoint URL for the S3 connection (omit to use the default)",
"format": "url",
"type": "string",
"x-versionadded": "v2.29"
},
"format": {
"default": "csv",
"description": "Type of input file format",
"enum": [
"csv",
"parquet"
],
"type": "string",
"x-versionadded": "v2.25"
},
"type": {
"description": "Type name for this intake type",
"enum": [
"s3"
],
"type": "string"
},
"url": {
"description": "URL for the CSV file",
"format": "url",
"type": "string"
}
},
"required": [
"type",
"url"
],
"type": "object"
},
{
"description": "Stream CSV data chunks from Snowflake",
"properties": {
"catalog": {
"description": "The name of the specified database catalog to read input data from.",
"type": "string",
"x-versionadded": "v2.28"
},
"cloudStorageCredentialId": {
"description": "The ID of the credential holding information about a user with read access to the cloud storage.",
"type": [
"string",
"null"
]
},
"cloudStorageType": {
"default": "s3",
"description": "Type name for cloud storage",
"enum": [
"azure",
"gcp",
"s3"
],
"type": "string"
},
"credentialId": {
"description": "The ID of the credential holding information about a user with read access to the Snowflake data source.",
"type": [
"string",
"null"
]
},
"dataStoreId": {
"description": "ID of the data store to connect to",
"type": "string"
},
"externalStage": {
"description": "External storage",
"type": "string"
},
"query": {
"description": "A self-supplied SELECT statement of the dataset you wish to score. Helpful for supplying a more fine-grained selection of data not achievable through specification of \"table\" and/or \"schema\" parameters exclusively.If this job is executed with a job definition, then template variables are available which will be substituted for timestamps: {{ current_run_timestamp }}, {{ last_completed_run_time }}, {{ last_scheduled_run_time }}, {{ next_scheduled_run_time }}, {{ current_run_time }}",
"type": "string"
},
"schema": {
"description": "The name of the specified database schema to read input data from.",
"type": "string"
},
"table": {
"description": "The name of the specified database table to read input data from.",
"type": "string"
},
"type": {
"description": "Type name for this intake type",
"enum": [
"snowflake"
],
"type": "string"
}
},
"required": [
"dataStoreId",
"externalStage",
"type"
],
"type": "object"
},
{
"description": "Stream CSV data chunks from Azure Synapse",
"properties": {
"cloudStorageCredentialId": {
"description": "The ID of the Azure credential holding information about a user with read access to the cloud storage.",
"type": [
"string",
"null"
]
},
"credentialId": {
"description": "The ID of the credential holding information about a user with read access to the JDBC data source.",
"type": [
"string",
"null"
]
},
"dataStoreId": {
"description": "ID of the data store to connect to",
"type": "string"
},
"externalDataSource": {
"description": "External datasource name",
"type": "string"
},
"query": {
"description": "A self-supplied SELECT statement of the dataset you wish to score. Helpful for supplying a more fine-grained selection of data not achievable through specification of \"table\" and/or \"schema\" parameters exclusively.If this job is executed with a job definition, then template variables are available which will be substituted for timestamps: {{ current_run_timestamp }}, {{ last_completed_run_time }}, {{ last_scheduled_run_time }}, {{ next_scheduled_run_time }}, {{ current_run_time }}",
"type": "string"
},
"schema": {
"description": "The name of the specified database schema to read input data from.",
"type": "string"
},
"table": {
"description": "The name of the specified database table to read input data from.",
"type": "string"
},
"type": {
"description": "Type name for this intake type",
"enum": [
"synapse"
],
"type": "string"
}
},
"required": [
"dataStoreId",
"externalDataSource",
"type"
],
"type": "object"
}
]
},
"maxExplanations": {
"default": 0,
"description": "Number of explanations requested. Will be ordered by strength.",
"maximum": 100,
"minimum": 0,
"type": "integer"
},
"modelId": {
"description": "ID of leaderboard model which is used in job for processing predictions dataset",
"type": "string",
"x-versionadded": "v2.28"
},
"modelPackageId": {
"description": "ID of model package from registry is used in job for processing predictions dataset",
"type": "string",
"x-versionadded": "v2.28"
},
"monitoringBatchPrefix": {
"description": "Name of the batch to create with this job",
"type": [
"string",
"null"
]
},
"name": {
"description": "A human-readable name for the definition, must be unique across organisations, if left out the backend will generate one for you.",
"maxLength": 100,
"minLength": 1,
"type": "string"
},
"numConcurrent": {
"description": "Number of simultaneous requests to run against the prediction instance",
"minimum": 1,
"type": "integer"
},
"outputSettings": {
"description": "The output option configured for this job",
"oneOf": [
{
"description": "Save CSV data chunks to Azure Blob Storage",
"properties": {
"credentialId": {
"description": "Use the specified credential to access the url",
"type": [
"string",
"null"
]
},
"format": {
"default": "csv",
"description": "Type of output file format",
"enum": [
"csv",
"parquet"
],
"type": "string",
"x-versionadded": "v2.25"
},
"partitionColumns": {
"description": "For Parquet directory-scoring only. The column names of the intake data of which to partition the dataset. Columns are partitioned in the order they are given. At least one value is required if scoring to a directory (meaning the output url ends with a slash (\"/\").",
"items": {
"type": "string"
},
"maxItems": 100,
"type": "array",
"x-versionadded": "v2.26"
},
"type": {
"description": "Type name for this output type",
"enum": [
"azure"
],
"type": "string"
},
"url": {
"description": "URL for the file or directory",
"format": "url",
"type": "string"
}
},
"required": [
"type",
"url"
],
"type": "object"
},
{
"description": "Save CSV data chunks to Google BigQuery in bulk",
"properties": {
"bucket": {
"description": "The name of gcs bucket for data loading",
"type": "string"
},
"credentialId": {
"description": "The ID of the GCP credentials",
"type": "string"
},
"dataset": {
"description": "The name of the specified big query dataset to write data back",
"type": "string"
},
"table": {
"description": "The name of the specified big query table to write data back",
"type": "string"
},
"type": {
"description": "Type name for this output type",
"enum": [
"bigquery"
],
"type": "string"
}
},
"required": [
"bucket",
"credentialId",
"dataset",
"table",
"type"
],
"type": "object"
},
{
"description": "Saves CSV data chunks to Datasphere using browser-datasphere",
"properties": {
"catalog": {
"description": "The name of the specified database catalog to read input data from.",
"type": "string"
},
"credentialId": {
"description": "The ID of the credential holding information about a user with write access to the data destination.",
"type": "string"
},
"dataStoreId": {
"description": "The ID of the data store to connect to",
"type": "string"
},
"schema": {
"description": "The name of the specified database schema to read input data from.",
"type": "string"
},
"table": {
"description": "The name of the specified database table to read input data from.",
"type": "string"
},
"type": {
"description": "The type name for this output type",
"enum": [
"datasphere"
],
"type": "string"
}
},
"required": [
"credentialId",
"dataStoreId",
"schema",
"table",
"type"
],
"type": "object",
"x-versionadded": "v2.35"
},
{
"properties": {
"path": {
"description": "Path to results on host filesystem",
"type": "string"
},
"type": {
"description": "Type name for this output type",
"enum": [
"filesystem"
],
"type": "string"
}
},
"required": [
"path",
"type"
],
"type": "object"
},
{
"description": "Save CSV data chunks to Google Storage",
"properties": {
"credentialId": {
"description": "Use the specified credential to access the url",
"type": [
"string",
"null"
]
},
"format": {
"default": "csv",
"description": "Type of input file format",
"enum": [
"csv",
"parquet"
],
"type": "string",
"x-versionadded": "v2.25"
},
"partitionColumns": {
"description": "For Parquet directory-scoring only. The column names of the intake data of which to partition the dataset. Columns are partitioned in the order they are given. At least one value is required if scoring to a directory (meaning the output url ends with a slash (\"/\").",
"items": {
"type": "string"
},
"maxItems": 100,
"type": "array",
"x-versionadded": "v2.26"
},
"type": {
"description": "Type name for this output type",
"enum": [
"gcp"
],
"type": "string"
},
"url": {
"description": "URL for the CSV file",
"format": "url",
"type": "string"
}
},
"required": [
"type",
"url"
],
"type": "object"
},
{
"description": "Save CSV data chunks to HTTP data endpoint",
"properties": {
"headers": {
"description": "Extra headers to send with the request",
"type": "object"
},
"method": {
"description": "Method to use when saving the CSV file",
"enum": [
"POST",
"PUT"
],
"type": "string"
},
"type": {
"description": "Type name for this output type",
"enum": [
"http"
],
"type": "string"
},
"url": {
"description": "URL for the CSV file",
"format": "url",
"type": "string"
}
},
"required": [
"method",
"type",
"url"
],
"type": "object"
},
{
"description": "Save CSV data chunks via JDBC",
"properties": {
"catalog": {
"description": "The name of the specified database catalog to write output data to.",
"type": "string",
"x-versionadded": "v2.22"
},
"commitInterval": {
"default": 600,
"description": "Defines a time interval in seconds between each commit is done to the JDBC source. If set to 0, the batch prediction operation will write the entire job before committing.",
"maximum": 86400,
"minimum": 0,
"type": "integer",
"x-versionadded": "v2.21"
},
"createTableIfNotExists": {
"default": false,
"description": "Attempt to create the table first if no existing one is detected, before writing data with the strategy defined in the `statementType` parameter.",
"type": "boolean",
"x-versionadded": "v2.24"
},
"credentialId": {
"description": "The ID of the credential holding information about a user with write access to the JDBC data source.",
"type": [
"string",
"null"
]
},
"dataStoreId": {
"description": "ID of the data store to connect to",
"type": "string"
},
"schema": {
"description": "The name of the specified database schema to write the results to.",
"type": "string"
},
"statementType": {
"description": "The statement type to use when writing the results. Deprecation Warning: Use of `create_table` is now discouraged. Use one of the other possibilities along with the parameter `createTableIfNotExists` set to `true`.",
"enum": [
"createTable",
"create_table",
"insert",
"insertUpdate",
"insert_update",
"update"
],
"type": "string"
},
"table": {
"description": "The name of the specified database table to write the results to.If this job is executed with a job definition, then template variables are available which will be substituted for timestamps: {{ current_run_timestamp }}, {{ last_completed_run_time }}, {{ last_scheduled_run_time }}, {{ next_scheduled_run_time }}, {{ current_run_time }}",
"type": "string"
},
"type": {
"description": "Type name for this intake type",
"enum": [
"jdbc"
],
"type": "string"
},
"updateColumns": {
"description": "The column names to be updated if statementType is set to either update or upsert.",
"items": {
"type": "string"
},
"maxItems": 100,
"type": "array"
},
"whereColumns": {
"description": "The column names to be used in the where clause if statementType is set to update or upsert.",
"items": {
"type": "string"
},
"maxItems": 100,
"type": "array"
}
},
"required": [
"dataStoreId",
"statementType",
"table",
"type"
],
"type": "object"
},
{
"description": "Save CSV data chunks to local file storage",
"properties": {
"type": {
"description": "Type name for this output type",
"enum": [
"local_file",
"localFile"
],
"type": "string"
}
},
"required": [
"type"
],
"type": "object"
},
{
"description": "Saves CSV data chunks to Amazon Cloud Storage S3",
"properties": {
"credentialId": {
"description": "Use the specified credential to access the url",
"type": [
"string",
"null"
]
},
"endpointUrl": {
"description": "Endpoint URL for the S3 connection (omit to use the default)",
"format": "url",
"type": "string",
"x-versionadded": "v2.29"
},
"format": {
"default": "csv",
"description": "Type of output file format",
"enum": [
"csv",
"parquet"
],
"type": "string",
"x-versionadded": "v2.25"
},
"partitionColumns": {
"description": "For Parquet directory-scoring only. The column names of the intake data of which to partition the dataset. Columns are partitioned in the order they are given. At least one value is required if scoring to a directory (meaning the output url ends with a slash (\"/\").",
"items": {
"type": "string"
},
"maxItems": 100,
"type": "array",
"x-versionadded": "v2.26"
},
"serverSideEncryption": {
"description": "Configure Server-Side Encryption for S3 output",
"properties": {
"algorithm": {
"description": "The server-side encryption algorithm used when storing this object in Amazon S3 (for example, AES256, aws:kms).",
"type": "string"
},
"customerAlgorithm": {
"description": "Specifies the algorithm to use to when encrypting the object (for example, AES256).",
"type": "string"
},
"customerKey": {
"description": "Specifies the customer-provided encryption key for Amazon S3 to use in encrypting data. This value is used to store the object and then it is discarded; Amazon S3 does not store the encryption key. The key must be appropriate for use with the algorithm specified in customerAlgorithm. The key must be sent as an base64 encoded string.",
"type": "string"
},
"kmsEncryptionContext": {
"description": "Specifies the Amazon Web Services KMS Encryption Context to use for object encryption. The value of this header is a base64-encoded UTF-8 string holding JSON with the encryption context key-value pairs.",
"type": "string"
},
"kmsKeyId": {
"description": "Specifies the ID of the symmetric customer managed key to use for object encryption.",
"type": "string"
}
},
"type": "object"
},
"type": {
"description": "Type name for this output type",
"enum": [
"s3"
],
"type": "string"
},
"url": {
"description": "URL for the CSV file",
"format": "url",
"type": "string"
}
},
"required": [
"type",
"url"
],
"type": "object"
},
{
"description": "Save CSV data chunks to Snowflake in bulk",
"properties": {
"catalog": {
"description": "The name of the specified database catalog to write output data to.",
"type": "string",
"x-versionadded": "v2.28"
},
"cloudStorageCredentialId": {
"description": "The ID of the credential holding information about a user with write access to the cloud storage.",
"type": [
"string",
"null"
]
},
"cloudStorageType": {
"default": "s3",
"description": "Type name for cloud storage",
"enum": [
"azure",
"gcp",
"s3"
],
"type": "string"
},
"createTableIfNotExists": {
"default": false,
"description": "Attempt to create the table first if no existing one is detected, before writing data with the strategy defined in the `statementType` parameter.",
"type": "boolean",
"x-versionadded": "v2.25"
},
"credentialId": {
"description": "The ID of the credential holding information about a user with write access to the Snowflake data source.",
"type": [
"string",
"null"
]
},
"dataStoreId": {
"description": "ID of the data store to connect to",
"type": "string"
},
"externalStage": {
"description": "External storage",
"type": "string"
},
"schema": {
"description": "The name of the specified database schema to write results to.",
"type": "string"
},
"statementType": {
"description": "The statement type to use when writing the results.",
"enum": [
"insert",
"create_table",
"createTable"
],
"type": "string"
},
"table": {
"description": "The name of the specified database table to write results to.",
"type": "string"
},
"type": {
"description": "Type name for this output type",
"enum": [
"snowflake"
],
"type": "string"
}
},
"required": [
"dataStoreId",
"externalStage",
"statementType",
"table",
"type"
],
"type": "object"
},
{
"description": "Save CSV data chunks to Azure Synapse in bulk",
"properties": {
"cloudStorageCredentialId": {
"description": "The ID of the credential holding information about a user with write access to the cloud storage.",
"type": [
"string",
"null"
]
},
"createTableIfNotExists": {
"default": false,
"description": "Attempt to create the table first if no existing one is detected, before writing data with the strategy defined in the `statementType` parameter.",
"type": "boolean",
"x-versionadded": "v2.25"
},
"credentialId": {
"description": "The ID of the credential holding information about a user with write access to the JDBC data source.",
"type": [
"string",
"null"
]
},
"dataStoreId": {
"description": "ID of the data store to connect to",
"type": "string"
},
"externalDataSource": {
"description": "External data source name",
"type": "string"
},
"schema": {
"description": "The name of the specified database schema to write results to.",
"type": "string"
},
"statementType": {
"description": "The statement type to use when writing the results.",
"enum": [
"insert",
"create_table",
"createTable"
],
"type": "string"
},
"table": {
"description": "The name of the specified database table to write results to.",
"type": "string"
},
"type": {
"description": "Type name for this output type",
"enum": [
"synapse"
],
"type": "string"
}
},
"required": [
"dataStoreId",
"externalDataSource",
"statementType",
"table",
"type"
],
"type": "object"
}
]
},
"passthroughColumns": {
"description": "Pass through columns from the original dataset",
"items": {
"description": "A column name from the original dataset to pass through to the resulting predictions",
"maxLength": 50,
"minLength": 1,
"type": "string"
},
"maxItems": 100,
"type": "array"
},
"passthroughColumnsSet": {
"description": "Pass through all columns from the original dataset",
"enum": [
"all"
],
"type": "string"
},
"pinnedModelId": {
"description": "Specify a model ID used for scoring",
"type": "string"
},
"predictionInstance": {
"description": "Override the default prediction instance from the deployment when scoring this job.",
"properties": {
"apiKey": {
"description": "By default, prediction requests will use the API key of the user that created the job. This allows you to make requests on behalf of other users.",
"type": "string"
},
"datarobotKey": {
"description": "If running a job against a prediction instance in the Managed AI Cloud, you must provide the organization level DataRobot-Key.",
"type": "string"
},
"hostName": {
"description": "Override the default host name of the deployment with this.",
"type": "string"
},
"sslEnabled": {
"default": true,
"description": "Use SSL (HTTPS) when communicating with the overriden prediction server.",
"type": "boolean"
}
},
"required": [
"hostName",
"sslEnabled"
],
"type": "object"
},
"predictionThreshold": {
"description": "Threshold is the point that sets the class boundary for a predicted value. The model classifies an observation below the threshold as FALSE, and an observation above the threshold as TRUE. In other words, DataRobot automatically assigns the positive class label to any prediction exceeding the threshold. This value can be set between 0.0 and 1.0.",
"maximum": 1,
"minimum": 0,
"type": "number"
},
"predictionWarningEnabled": {
"description": "Enable prediction warnings.",
"type": [
"boolean",
"null"
]
},
"schedule": {
"description": "The scheduling information defining how often and when to execute this job to the Job Scheduling service. Optional if enabled = False.",
"properties": {
"dayOfMonth": {
"description": "The date(s) of the month that the job will run. Allowed values are either ``[1 ... 31]`` or ``[\"*\"]`` for all days of the month. This field is additive with ``dayOfWeek``, meaning the job will run both on the date(s) defined in this field and the day specified by ``dayOfWeek`` (for example, dates 1st, 2nd, 3rd, plus every Tuesday). If ``dayOfMonth`` is set to ``[\"*\"]`` and ``dayOfWeek`` is defined, the scheduler will trigger on every day of the month that matches ``dayOfWeek`` (for example, Tuesday the 2nd, 9th, 16th, 23rd, 30th). Invalid dates such as February 31st are ignored.",
"items": {
"enum": [
"*",
1,
2,
3,
4,
5,
6,
7,
8,
9,
10,
11,
12,
13,
14,
15,
16,
17,
18,
19,
20,
21,
22,
23,
24,
25,
26,
27,
28,
29,
30,
31
],
"type": [
"number",
"string"
]
},
"maxItems": 31,
"type": "array"
},
"dayOfWeek": {
"description": "The day(s) of the week that the job will run. Allowed values are ``[0 .. 6]``, where (Sunday=0), or ``[\"*\"]``, for all days of the week. Strings, either 3-letter abbreviations or the full name of the day, can be used interchangeably (e.g., \"sunday\", \"Sunday\", \"sun\", or \"Sun\", all map to ``[0]``. This field is additive with ``dayOfMonth``, meaning the job will run both on the date specified by ``dayOfMonth`` and the day defined in this field.",
"items": {
"enum": [
"*",
0,
1,
2,
3,
4,
5,
6,
"sunday",
"SUNDAY",
"Sunday",
"monday",
"MONDAY",
"Monday",
"tuesday",
"TUESDAY",
"Tuesday",
"wednesday",
"WEDNESDAY",
"Wednesday",
"thursday",
"THURSDAY",
"Thursday",
"friday",
"FRIDAY",
"Friday",
"saturday",
"SATURDAY",
"Saturday",
"sun",
"SUN",
"Sun",
"mon",
"MON",
"Mon",
"tue",
"TUE",
"Tue",
"wed",
"WED",
"Wed",
"thu",
"THU",
"Thu",
"fri",
"FRI",
"Fri",
"sat",
"SAT",
"Sat"
],
"type": [
"number",
"string"
]
},
"maxItems": 7,
"type": "array"
},
"hour": {
"description": "The hour(s) of the day that the job will run. Allowed values are either ``[\"*\"]`` meaning every hour of the day or ``[0 ... 23]``.",
"items": {
"enum": [
"*",
0,
1,
2,
3,
4,
5,
6,
7,
8,
9,
10,
11,
12,
13,
14,
15,
16,
17,
18,
19,
20,
21,
22,
23
],
"type": [
"number",
"string"
]
},
"maxItems": 24,
"type": "array"
},
"minute": {
"description": "The minute(s) of the day that the job will run. Allowed values are either ``[\"*\"]`` meaning every minute of the day or``[0 ... 59]``.",
"items": {
"enum": [
"*",
0,
1,
2,
3,
4,
5,
6,
7,
8,
9,
10,
11,
12,
13,
14,
15,
16,
17,
18,
19,
20,
21,
22,
23,
24,
25,
26,
27,
28,
29,
30,
31,
32,
33,
34,
35,
36,
37,
38,
39,
40,
41,
42,
43,
44,
45,
46,
47,
48,
49,
50,
51,
52,
53,
54,
55,
56,
57,
58,
59
],
"type": [
"number",
"string"
]
},
"maxItems": 60,
"type": "array"
},
"month": {
"description": "The month(s) of the year that the job will run. Allowed values are either ``[1 ... 12]`` or ``[\"*\"]`` for all months of the year. Strings, either 3-letter abbreviations or the full name of the month, can be used interchangeably (e.g., \"jan\" or \"october\"). Months that are not compatible with ``dayOfMonth`` are ignored, for example ``{\"dayOfMonth\": [31], \"month\":[\"feb\"]}``.",
"items": {
"enum": [
"*",
1,
2,
3,
4,
5,
6,
7,
8,
9,
10,
11,
12,
"january",
"JANUARY",
"January",
"february",
"FEBRUARY",
"February",
"march",
"MARCH",
"March",
"april",
"APRIL",
"April",
"may",
"MAY",
"May",
"june",
"JUNE",
"June",
"july",
"JULY",
"July",
"august",
"AUGUST",
"August",
"september",
"SEPTEMBER",
"September",
"october",
"OCTOBER",
"October",
"november",
"NOVEMBER",
"November",
"december",
"DECEMBER",
"December",
"jan",
"JAN",
"Jan",
"feb",
"FEB",
"Feb",
"mar",
"MAR",
"Mar",
"apr",
"APR",
"Apr",
"jun",
"JUN",
"Jun",
"jul",
"JUL",
"Jul",
"aug",
"AUG",
"Aug",
"sep",
"SEP",
"Sep",
"oct",
"OCT",
"Oct",
"nov",
"NOV",
"Nov",
"dec",
"DEC",
"Dec"
],
"type": [
"number",
"string"
]
},
"maxItems": 12,
"type": "array"
}
},
"required": [
"dayOfMonth",
"dayOfWeek",
"hour",
"minute",
"month"
],
"type": "object"
},
"secondaryDatasetsConfigId": {
"description": "Configuration id for secondary datasets to use when making a prediction.",
"type": "string",
"x-versionadded": "v2.33"
},
"skipDriftTracking": {
"default": false,
"description": "Skip drift tracking for this job.",
"type": "boolean"
},
"thresholdHigh": {
"description": "Compute explanations for predictions above this threshold",
"type": "number"
},
"thresholdLow": {
"description": "Compute explanations for predictions below this threshold",
"type": "number"
},
"timeseriesSettings": {
"description": "Time Series settings included of this job is a Time Series job.",
"oneOf": [
{
"properties": {
"forecastPoint": {
"description": "Used for forecast predictions in order to override the inferred forecast point from the dataset.",
"format": "date-time",
"type": "string"
},
"relaxKnownInAdvanceFeaturesCheck": {
"default": false,
"description": "If activated, missing values in the known in advance features are allowed in the forecast window at prediction time. If omitted or false, missing values are not allowed.",
"type": "boolean"
},
"type": {
"description": "Forecast mode makes predictions using forecastPoint or rows in the dataset without target.",
"enum": [
"forecast"
],
"type": "string"
}
},
"required": [
"type"
],
"type": "object"
},
{
"properties": {
"forecastPointPolicy": {
"description": "Forecast point policy",
"properties": {
"configuration": {
"description": "Customize if forecast point based on job run time needs to be shifted.",
"properties": {
"offset": {
"description": "Offset to apply to scheduled run time of the job in a ISO-8601 format toobtain a relative forecast point. Example of the positive offset 'P2DT5H3M', example of the negative offset '-P2DT5H4M'",
"format": "offset",
"type": "string"
}
},
"required": [
"offset"
],
"type": "object"
},
"type": {
"description": "Type of the forecast point policy. Forecast point will be based on the scheduled run time of the job or the current moment in UTC if job was launched manually. Run time can be adjusted backwards or forwards.",
"enum": [
"jobRunTimeBased"
],
"type": "string"
}
},
"required": [
"type"
],
"type": "object"
},
"relaxKnownInAdvanceFeaturesCheck": {
"default": false,
"description": "If activated, missing values in the known in advance features are allowed in the forecast window at prediction time. If omitted or false, missing values are not allowed.",
"type": "boolean"
},
"type": {
"description": "Forecast mode makes predictions using forecastPoint or rows in the dataset without target.",
"enum": [
"forecast"
],
"type": "string"
}
},
"required": [
"forecastPointPolicy",
"type"
],
"type": "object"
},
{
"properties": {
"predictionsEndDate": {
"description": "Used for historical predictions in order to override date to which predictions should be calculated. By default value will be inferred automatically from the dataset.",
"format": "date-time",
"type": "string"
},
"predictionsStartDate": {
"description": "Used for historical predictions in order to override date from which predictions should be calculated. By default value will be inferred automatically from the dataset.",
"format": "date-time",
"type": "string"
},
"relaxKnownInAdvanceFeaturesCheck": {
"default": false,
"description": "If activated, missing values in the known in advance features are allowed in the forecast window at prediction time. If omitted or false, missing values are not allowed.",
"type": "boolean"
},
"type": {
"description": "Historical mode enables bulk predictions which calculates predictions for all possible forecast points and forecast distances in the dataset within the predictionsStartDate/predictionsEndDate range.",
"enum": [
"historical"
],
"type": "string"
}
},
"required": [
"type"
],
"type": "object"
}
],
"x-versionadded": "v2.20"
}
},
"type": "object"
}
Properties
Name |
Type |
Required |
Restrictions |
Description |
abortOnError |
boolean |
false |
|
Should this job abort if too many errors are encountered |
batchJobType |
string |
false |
|
Batch job type. |
chunkSize |
any |
false |
|
Which strategy should be used to determine the chunk size. Can be either a named strategy or a fixed size in bytes. |
oneOf
Name |
Type |
Required |
Restrictions |
Description |
» anonymous |
string |
false |
|
none |
xor
Name |
Type |
Required |
Restrictions |
Description |
» anonymous |
integer |
false |
maximum: 41943040 minimum: 20
|
none |
continued
Name |
Type |
Required |
Restrictions |
Description |
columnNamesRemapping |
any |
false |
|
Remap (rename or remove columns from) the output from this job |
oneOf
Name |
Type |
Required |
Restrictions |
Description |
» anonymous |
object |
false |
|
Provide a dictionary with key/value pairs to remap (deprecated) |
xor
Name |
Type |
Required |
Restrictions |
Description |
» anonymous |
[BatchPredictionJobRemapping] |
false |
maxItems: 1000
|
Provide a list of items to remap |
continued
Name |
Type |
Required |
Restrictions |
Description |
csvSettings |
BatchPredictionJobCSVSettings |
false |
|
The CSV settings used for this job |
deploymentId |
string |
false |
|
ID of deployment which is used in job for processing predictions dataset |
disableRowLevelErrorHandling |
boolean |
false |
|
Skip row by row error handling |
enabled |
boolean |
false |
|
If this job definition is enabled as a scheduled job. Optional if no schedule is supplied. |
explanationAlgorithm |
string |
false |
|
Which algorithm will be used to calculate prediction explanations |
explanationClassNames |
[string] |
false |
maxItems: 10 minItems: 1
|
List of class names that will be explained for each row for multiclass. Mutually exclusive with explanationNumTopClasses. If neither specified - we assume explanationNumTopClasses=1 |
explanationNumTopClasses |
integer |
false |
maximum: 10 minimum: 1
|
Number of top predicted classes for each row that will be explained for multiclass. Mutually exclusive with explanationClassNames. If neither specified - we assume explanationNumTopClasses=1 |
includePredictionStatus |
boolean |
false |
|
Include prediction status column in the output |
includeProbabilities |
boolean |
false |
|
Include probabilities for all classes |
includeProbabilitiesClasses |
[string] |
false |
maxItems: 100
|
Include only probabilities for these specific class names. |
intakeSettings |
any |
false |
|
The intake option configured for this job |
oneOf
Name |
Type |
Required |
Restrictions |
Description |
» anonymous |
AzureIntake |
false |
|
Stream CSV data chunks from Azure |
xor
Name |
Type |
Required |
Restrictions |
Description |
» anonymous |
BigQueryIntake |
false |
|
Stream CSV data chunks from Big Query using GCS |
xor
Name |
Type |
Required |
Restrictions |
Description |
» anonymous |
DataStageIntake |
false |
|
Stream CSV data chunks from data stage storage |
xor
Name |
Type |
Required |
Restrictions |
Description |
» anonymous |
Catalog |
false |
|
Stream CSV data chunks from AI catalog dataset |
xor
Name |
Type |
Required |
Restrictions |
Description |
» anonymous |
DatasphereIntake |
false |
|
Stream CSV data chunks from Datasphere using browser-datasphere |
xor
Name |
Type |
Required |
Restrictions |
Description |
» anonymous |
DSS |
false |
|
Stream CSV data chunks from DSS dataset |
xor
Name |
Type |
Required |
Restrictions |
Description |
» anonymous |
FileSystemIntake |
false |
|
none |
xor
Name |
Type |
Required |
Restrictions |
Description |
» anonymous |
GCPIntake |
false |
|
Stream CSV data chunks from Google Storage |
xor
Name |
Type |
Required |
Restrictions |
Description |
» anonymous |
HTTPIntake |
false |
|
Stream CSV data chunks from HTTP |
xor
Name |
Type |
Required |
Restrictions |
Description |
» anonymous |
JDBCIntake |
false |
|
Stream CSV data chunks from JDBC |
xor
Name |
Type |
Required |
Restrictions |
Description |
» anonymous |
LocalFileIntake |
false |
|
Stream CSV data chunks from local file storage |
xor
Name |
Type |
Required |
Restrictions |
Description |
» anonymous |
S3Intake |
false |
|
Stream CSV data chunks from Amazon Cloud Storage S3 |
xor
Name |
Type |
Required |
Restrictions |
Description |
» anonymous |
SnowflakeIntake |
false |
|
Stream CSV data chunks from Snowflake |
xor
Name |
Type |
Required |
Restrictions |
Description |
» anonymous |
SynapseIntake |
false |
|
Stream CSV data chunks from Azure Synapse |
continued
Name |
Type |
Required |
Restrictions |
Description |
maxExplanations |
integer |
false |
maximum: 100 minimum: 0
|
Number of explanations requested. Will be ordered by strength. |
modelId |
string |
false |
|
ID of leaderboard model which is used in job for processing predictions dataset |
modelPackageId |
string |
false |
|
ID of model package from registry is used in job for processing predictions dataset |
monitoringBatchPrefix |
string,null |
false |
|
Name of the batch to create with this job |
name |
string |
false |
maxLength: 100 minLength: 1 minLength: 1
|
A human-readable name for the definition, must be unique across organisations, if left out the backend will generate one for you. |
numConcurrent |
integer |
false |
minimum: 1
|
Number of simultaneous requests to run against the prediction instance |
outputSettings |
any |
false |
|
The output option configured for this job |
oneOf
Name |
Type |
Required |
Restrictions |
Description |
» anonymous |
AzureOutput |
false |
|
Save CSV data chunks to Azure Blob Storage |
xor
Name |
Type |
Required |
Restrictions |
Description |
» anonymous |
BigQueryOutput |
false |
|
Save CSV data chunks to Google BigQuery in bulk |
xor
Name |
Type |
Required |
Restrictions |
Description |
» anonymous |
DatasphereOutput |
false |
|
Saves CSV data chunks to Datasphere using browser-datasphere |
xor
Name |
Type |
Required |
Restrictions |
Description |
» anonymous |
FileSystemOutput |
false |
|
none |
xor
Name |
Type |
Required |
Restrictions |
Description |
» anonymous |
GCPOutput |
false |
|
Save CSV data chunks to Google Storage |
xor
Name |
Type |
Required |
Restrictions |
Description |
» anonymous |
HTTPOutput |
false |
|
Save CSV data chunks to HTTP data endpoint |
xor
Name |
Type |
Required |
Restrictions |
Description |
» anonymous |
JDBCOutput |
false |
|
Save CSV data chunks via JDBC |
xor
Name |
Type |
Required |
Restrictions |
Description |
» anonymous |
LocalFileOutput |
false |
|
Save CSV data chunks to local file storage |
xor
Name |
Type |
Required |
Restrictions |
Description |
» anonymous |
S3Output |
false |
|
Saves CSV data chunks to Amazon Cloud Storage S3 |
xor
Name |
Type |
Required |
Restrictions |
Description |
» anonymous |
SnowflakeOutput |
false |
|
Save CSV data chunks to Snowflake in bulk |
xor
Name |
Type |
Required |
Restrictions |
Description |
» anonymous |
SynapseOutput |
false |
|
Save CSV data chunks to Azure Synapse in bulk |
continued
Name |
Type |
Required |
Restrictions |
Description |
passthroughColumns |
[string] |
false |
maxItems: 100
|
Pass through columns from the original dataset |
passthroughColumnsSet |
string |
false |
|
Pass through all columns from the original dataset |
pinnedModelId |
string |
false |
|
Specify a model ID used for scoring |
predictionInstance |
BatchPredictionJobPredictionInstance |
false |
|
Override the default prediction instance from the deployment when scoring this job. |
predictionThreshold |
number |
false |
maximum: 1 minimum: 0
|
Threshold is the point that sets the class boundary for a predicted value. The model classifies an observation below the threshold as FALSE, and an observation above the threshold as TRUE. In other words, DataRobot automatically assigns the positive class label to any prediction exceeding the threshold. This value can be set between 0.0 and 1.0. |
predictionWarningEnabled |
boolean,null |
false |
|
Enable prediction warnings. |
schedule |
Schedule |
false |
|
The scheduling information defining how often and when to execute this job to the Job Scheduling service. Optional if enabled = False. |
secondaryDatasetsConfigId |
string |
false |
|
Configuration id for secondary datasets to use when making a prediction. |
skipDriftTracking |
boolean |
false |
|
Skip drift tracking for this job. |
thresholdHigh |
number |
false |
|
Compute explanations for predictions above this threshold |
thresholdLow |
number |
false |
|
Compute explanations for predictions below this threshold |
timeseriesSettings |
any |
false |
|
Time Series settings included of this job is a Time Series job. |
oneOf
xor
xor
Enumerated Values
Property |
Value |
batchJobType |
[monitoring , prediction ] |
anonymous |
[auto , fixed , dynamic ] |
explanationAlgorithm |
[shap , xemp ] |
passthroughColumnsSet |
all |
BatchPredictionJobId
{
"properties": {
"partNumber": {
"default": 0,
"description": "The number of which csv part is being uploaded when using multipart upload ",
"minimum": 0,
"type": "integer",
"x-versionadded": "v2.27"
},
"predictionJobId": {
"description": "ID of the Batch Prediction job",
"type": "string"
}
},
"required": [
"partNumber",
"predictionJobId"
],
"type": "object"
}
Properties
Name |
Type |
Required |
Restrictions |
Description |
partNumber |
integer |
true |
minimum: 0
|
The number of which csv part is being uploaded when using multipart upload |
predictionJobId |
string |
true |
|
ID of the Batch Prediction job |
BatchPredictionJobLinks
{
"description": "Links useful for this job",
"properties": {
"csvUpload": {
"description": "The URL used to upload the dataset for this job. Only available for localFile intake.",
"format": "url",
"type": "string"
},
"download": {
"description": "The URL used to download the results from this job. Only available for localFile outputs. Will be null if the download is not yet available.",
"type": [
"string",
"null"
]
},
"self": {
"description": "The URL used access this job.",
"format": "url",
"type": "string"
}
},
"required": [
"self"
],
"type": "object"
}
Links useful for this job
Properties
Name |
Type |
Required |
Restrictions |
Description |
csvUpload |
string(url) |
false |
|
The URL used to upload the dataset for this job. Only available for localFile intake. |
download |
string,null |
false |
|
The URL used to download the results from this job. Only available for localFile outputs. Will be null if the download is not yet available. |
self |
string(url) |
true |
|
The URL used access this job. |
BatchPredictionJobListResponse
{
"properties": {
"count": {
"description": "Number of items returned on this page.",
"type": "integer"
},
"data": {
"description": "An array of jobs",
"items": {
"properties": {
"batchPredictionJobDefinition": {
"description": "The Batch Prediction Job Definition linking to this job, if any.",
"properties": {
"createdBy": {
"description": "The ID of creator of this job definition",
"type": "string"
},
"id": {
"description": "The ID of the Batch Prediction job definition",
"type": "string"
},
"name": {
"description": "A human-readable name for the definition, must be unique across organisations",
"type": "string"
}
},
"required": [
"createdBy",
"id",
"name"
],
"type": "object"
},
"created": {
"description": "When was this job created",
"format": "date-time",
"type": "string",
"x-versionadded": "v2.21"
},
"createdBy": {
"description": "Who created this job",
"properties": {
"fullName": {
"description": "The full name of the user who created this job (if defined by the user)",
"type": [
"string",
"null"
]
},
"userId": {
"description": "The User ID of the user who created this job",
"type": "string"
},
"username": {
"description": "The username (e-mail address) of the user who created this job",
"type": "string"
}
},
"required": [
"fullName",
"userId",
"username"
],
"type": "object"
},
"elapsedTimeSec": {
"description": "Number of seconds the job has been processing for",
"minimum": 0,
"type": "integer"
},
"failedRows": {
"description": "Number of rows that have failed scoring",
"minimum": 0,
"type": "integer"
},
"hidden": {
"description": "When was this job was hidden last, blank if visible",
"format": "date-time",
"type": "string",
"x-versionadded": "v2.24"
},
"id": {
"description": "The ID of the Batch Prediction job",
"type": "string",
"x-versionadded": "v2.21"
},
"intakeDatasetDisplayName": {
"description": "If applicable (e.g. for AI catalog), will contain the dataset name used for the intake dataset.",
"type": [
"string",
"null"
],
"x-versionadded": "v2.23"
},
"jobIntakeSize": {
"description": "Number of bytes in the intake dataset for this job",
"minimum": 0,
"type": [
"integer",
"null"
]
},
"jobOutputSize": {
"description": "Number of bytes in the output dataset for this job",
"minimum": 0,
"type": [
"integer",
"null"
]
},
"jobSpec": {
"description": "The job configuration used to create this job",
"properties": {
"abortOnError": {
"default": true,
"description": "Should this job abort if too many errors are encountered",
"type": "boolean"
},
"batchJobType": {
"default": "prediction",
"description": "Batch job type.",
"enum": [
"monitoring",
"prediction"
],
"type": "string",
"x-versionadded": "v2.35"
},
"chunkSize": {
"default": "auto",
"description": "Which strategy should be used to determine the chunk size. Can be either a named strategy or a fixed size in bytes.",
"oneOf": [
{
"enum": [
"auto",
"fixed",
"dynamic"
],
"type": "string"
},
{
"maximum": 41943040,
"minimum": 20,
"type": "integer"
}
]
},
"columnNamesRemapping": {
"description": "Remap (rename or remove columns from) the output from this job",
"oneOf": [
{
"description": "Provide a dictionary with key/value pairs to remap (deprecated)",
"type": "object"
},
{
"description": "Provide a list of items to remap",
"items": {
"properties": {
"inputName": {
"description": "Rename column with this name",
"type": "string"
},
"outputName": {
"description": "Rename column to this name (leave as null to remove from the output)",
"type": [
"string",
"null"
]
}
},
"required": [
"inputName",
"outputName"
],
"type": "object"
},
"maxItems": 1000,
"type": "array"
}
]
},
"csvSettings": {
"description": "The CSV settings used for this job",
"properties": {
"delimiter": {
"default": ",",
"description": "CSV fields are delimited by this character. Use the string \"tab\" to denote TSV (TAB separated values).",
"oneOf": [
{
"enum": [
"tab"
],
"type": "string"
},
{
"maxLength": 1,
"minLength": 1,
"type": "string"
}
]
},
"encoding": {
"default": "utf-8",
"description": "The encoding to be used for intake and output. For example (but not limited to): \"shift_jis\", \"latin_1\" or \"mskanji\".",
"type": "string"
},
"quotechar": {
"default": "\"",
"description": "Fields containing the delimiter or newlines must be quoted using this character.",
"maxLength": 1,
"minLength": 1,
"type": "string"
}
},
"required": [
"delimiter",
"encoding",
"quotechar"
],
"type": "object"
},
"deploymentId": {
"description": "ID of deployment which is used in job for processing predictions dataset",
"type": "string"
},
"disableRowLevelErrorHandling": {
"default": false,
"description": "Skip row by row error handling",
"type": "boolean"
},
"explanationAlgorithm": {
"description": "Which algorithm will be used to calculate prediction explanations",
"enum": [
"shap",
"xemp"
],
"type": "string"
},
"explanationClassNames": {
"description": "List of class names that will be explained for each row for multiclass. Mutually exclusive with explanationNumTopClasses. If neither specified - we assume explanationNumTopClasses=1",
"items": {
"description": "Class name to explain",
"type": "string"
},
"maxItems": 10,
"minItems": 1,
"type": "array",
"x-versionadded": "v2.29"
},
"explanationNumTopClasses": {
"description": "Number of top predicted classes for each row that will be explained for multiclass. Mutually exclusive with explanationClassNames. If neither specified - we assume explanationNumTopClasses=1",
"maximum": 10,
"minimum": 1,
"type": "integer",
"x-versionadded": "v2.29"
},
"includePredictionStatus": {
"default": false,
"description": "Include prediction status column in the output",
"type": "boolean"
},
"includeProbabilities": {
"default": true,
"description": "Include probabilities for all classes",
"type": "boolean"
},
"includeProbabilitiesClasses": {
"default": [],
"description": "Include only probabilities for these specific class names.",
"items": {
"description": "Include probability for this class name",
"type": "string"
},
"maxItems": 100,
"type": "array"
},
"intakeSettings": {
"default": {
"type": "localFile"
},
"description": "The response option configured for this job",
"oneOf": [
{
"description": "Stream CSV data chunks from Azure",
"properties": {
"credentialId": {
"description": "Either the populated value of the field or [redacted] due to permission settings",
"oneOf": [
{
"description": "Use the specified credential to access the url",
"type": [
"string",
"null"
]
},
{
"enum": [
"[redacted]"
],
"type": "string"
}
]
},
"format": {
"default": "csv",
"description": "Type of input file format",
"enum": [
"csv",
"parquet"
],
"type": "string",
"x-versionadded": "v2.25"
},
"type": {
"description": "Type name for this intake type",
"enum": [
"azure"
],
"type": "string"
},
"url": {
"description": "URL for the CSV file",
"format": "url",
"type": "string"
}
},
"required": [
"type",
"url"
],
"type": "object"
},
{
"description": "Stream CSV data chunks from data stage storage",
"properties": {
"dataStageId": {
"description": "The ID of the data stage",
"type": "string"
},
"type": {
"description": "Type name for this intake type",
"enum": [
"dataStage"
],
"type": "string"
}
},
"required": [
"dataStageId",
"type"
],
"type": "object"
},
{
"description": "Stream CSV data chunks from AI catalog dataset",
"properties": {
"datasetId": {
"description": "Either the populated value of the field or [redacted] due to permission settings",
"oneOf": [
{
"description": "The ID of the AI catalog dataset",
"type": "string"
},
{
"enum": [
"[redacted]"
],
"type": "string"
}
]
},
"datasetVersionId": {
"description": "The ID of the AI catalog dataset version",
"type": "string"
},
"type": {
"description": "Type name for this intake type",
"enum": [
"dataset"
],
"type": "string"
}
},
"required": [
"datasetId",
"type"
],
"type": "object"
},
{
"description": "Stream CSV data chunks from Google Storage",
"properties": {
"credentialId": {
"description": "Either the populated value of the field or [redacted] due to permission settings",
"oneOf": [
{
"description": "Use the specified credential to access the url",
"type": [
"string",
"null"
]
},
{
"enum": [
"[redacted]"
],
"type": "string"
}
]
},
"format": {
"default": "csv",
"description": "Type of input file format",
"enum": [
"csv",
"parquet"
],
"type": "string",
"x-versionadded": "v2.25"
},
"type": {
"description": "Type name for this intake type",
"enum": [
"gcp"
],
"type": "string"
},
"url": {
"description": "URL for the CSV file",
"format": "url",
"type": "string"
}
},
"required": [
"type",
"url"
],
"type": "object"
},
{
"description": "Stream CSV data chunks from Big Query using GCS",
"properties": {
"bucket": {
"description": "The name of gcs bucket for data export",
"type": "string"
},
"credentialId": {
"description": "Either the populated value of the field or [redacted] due to permission settings",
"oneOf": [
{
"description": "The ID of the GCP credentials",
"type": "string"
},
{
"enum": [
"[redacted]"
],
"type": "string"
}
]
},
"dataset": {
"description": "The name of the specified big query dataset to read input data from",
"type": "string"
},
"table": {
"description": "The name of the specified big query table to read input data from",
"type": "string"
},
"type": {
"description": "Type name for this intake type",
"enum": [
"bigquery"
],
"type": "string"
}
},
"required": [
"bucket",
"credentialId",
"dataset",
"table",
"type"
],
"type": "object"
},
{
"description": "Stream CSV data chunks from Amazon Cloud Storage S3",
"properties": {
"credentialId": {
"description": "Either the populated value of the field or [redacted] due to permission settings",
"oneOf": [
{
"description": "Use the specified credential to access the url",
"type": [
"string",
"null"
]
},
{
"enum": [
"[redacted]"
],
"type": "string"
}
]
},
"endpointUrl": {
"description": "Endpoint URL for the S3 connection (omit to use the default)",
"format": "url",
"type": "string",
"x-versionadded": "v2.29"
},
"format": {
"default": "csv",
"description": "Type of input file format",
"enum": [
"csv",
"parquet"
],
"type": "string",
"x-versionadded": "v2.25"
},
"type": {
"description": "Type name for this intake type",
"enum": [
"s3"
],
"type": "string"
},
"url": {
"description": "URL for the CSV file",
"format": "url",
"type": "string"
}
},
"required": [
"type",
"url"
],
"type": "object"
},
{
"description": "Stream CSV data chunks from Snowflake",
"properties": {
"catalog": {
"description": "The name of the specified database catalog to read input data from.",
"type": "string",
"x-versionadded": "v2.28"
},
"cloudStorageCredentialId": {
"description": "Either the populated value of the field or [redacted] due to permission settings",
"oneOf": [
{
"description": "The ID of the credential holding information about a user with read access to the cloud storage.",
"type": [
"string",
"null"
]
},
{
"enum": [
"[redacted]"
],
"type": "string"
}
]
},
"cloudStorageType": {
"default": "s3",
"description": "Type name for cloud storage",
"enum": [
"azure",
"gcp",
"s3"
],
"type": "string"
},
"credentialId": {
"description": "Either the populated value of the field or [redacted] due to permission settings",
"oneOf": [
{
"description": "The ID of the credential holding information about a user with read access to the Snowflake data source.",
"type": [
"string",
"null"
]
},
{
"enum": [
"[redacted]"
],
"type": "string"
}
]
},
"dataStoreId": {
"description": "Either the populated value of the field or [redacted] due to permission settings",
"oneOf": [
{
"description": "ID of the data store to connect to",
"type": "string"
},
{
"enum": [
"[redacted]"
],
"type": "string"
}
]
},
"externalStage": {
"description": "External storage",
"type": "string"
},
"query": {
"description": "A self-supplied SELECT statement of the dataset you wish to score. Helpful for supplying a more fine-grained selection of data not achievable through specification of \"table\" and/or \"schema\" parameters exclusively.If this job is executed with a job definition, then template variables are available which will be substituted for timestamps: {{ current_run_timestamp }}, {{ last_completed_run_time }}, {{ last_scheduled_run_time }}, {{ next_scheduled_run_time }}, {{ current_run_time }}",
"type": "string"
},
"schema": {
"description": "The name of the specified database schema to read input data from.",
"type": "string"
},
"table": {
"description": "The name of the specified database table to read input data from.",
"type": "string"
},
"type": {
"description": "Type name for this intake type",
"enum": [
"snowflake"
],
"type": "string"
}
},
"required": [
"dataStoreId",
"externalStage",
"type"
],
"type": "object"
},
{
"description": "Stream CSV data chunks from Azure Synapse",
"properties": {
"cloudStorageCredentialId": {
"description": "Either the populated value of the field or [redacted] due to permission settings",
"oneOf": [
{
"description": "The ID of the Azure credential holding information about a user with read access to the cloud storage.",
"type": [
"string",
"null"
]
},
{
"enum": [
"[redacted]"
],
"type": "string"
}
]
},
"credentialId": {
"description": "Either the populated value of the field or [redacted] due to permission settings",
"oneOf": [
{
"description": "The ID of the credential holding information about a user with read access to the JDBC data source.",
"type": [
"string",
"null"
]
},
{
"enum": [
"[redacted]"
],
"type": "string"
}
]
},
"dataStoreId": {
"description": "Either the populated value of the field or [redacted] due to permission settings",
"oneOf": [
{
"description": "ID of the data store to connect to",
"type": "string"
},
{
"enum": [
"[redacted]"
],
"type": "string"
}
]
},
"externalDataSource": {
"description": "External datasource name",
"type": "string"
},
"query": {
"description": "A self-supplied SELECT statement of the dataset you wish to score. Helpful for supplying a more fine-grained selection of data not achievable through specification of \"table\" and/or \"schema\" parameters exclusively.If this job is executed with a job definition, then template variables are available which will be substituted for timestamps: {{ current_run_timestamp }}, {{ last_completed_run_time }}, {{ last_scheduled_run_time }}, {{ next_scheduled_run_time }}, {{ current_run_time }}",
"type": "string"
},
"schema": {
"description": "The name of the specified database schema to read input data from.",
"type": "string"
},
"table": {
"description": "The name of the specified database table to read input data from.",
"type": "string"
},
"type": {
"description": "Type name for this intake type",
"enum": [
"synapse"
],
"type": "string"
}
},
"required": [
"dataStoreId",
"externalDataSource",
"type"
],
"type": "object"
},
{
"description": "Stream CSV data chunks from DSS dataset",
"properties": {
"datasetId": {
"description": "Either the populated value of the field or [redacted] due to permission settings",
"oneOf": [
{
"description": "The ID of the dataset",
"type": "string"
},
{
"enum": [
"[redacted]"
],
"type": "string"
}
]
},
"partition": {
"default": null,
"description": "Partition used to predict",
"enum": [
"holdout",
"validation",
"allBacktests",
null
],
"type": [
"string",
"null"
]
},
"projectId": {
"description": "The ID of the project",
"type": "string"
},
"type": {
"description": "Type name for this intake type",
"enum": [
"dss"
],
"type": "string"
}
},
"required": [
"projectId",
"type"
],
"type": "object"
},
{
"properties": {
"path": {
"description": "Path to data on host filesystem",
"type": "string"
},
"type": {
"description": "Type name for this intake type",
"enum": [
"filesystem"
],
"type": "string"
}
},
"required": [
"path",
"type"
],
"type": "object"
},
{
"description": "Stream CSV data chunks from HTTP",
"properties": {
"type": {
"description": "Type name for this intake type",
"enum": [
"http"
],
"type": "string"
},
"url": {
"description": "URL for the CSV file",
"format": "url",
"type": "string"
}
},
"required": [
"type",
"url"
],
"type": "object"
},
{
"description": "Stream CSV data chunks from JDBC",
"properties": {
"catalog": {
"description": "The name of the specified database catalog to read input data from.",
"type": "string",
"x-versionadded": "v2.22"
},
"credentialId": {
"description": "Either the populated value of the field or [redacted] due to permission settings",
"oneOf": [
{
"description": "The ID of the credential holding information about a user with read access to the JDBC data source.",
"type": [
"string",
"null"
]
},
{
"enum": [
"[redacted]"
],
"type": "string"
}
]
},
"dataStoreId": {
"description": "Either the populated value of the field or [redacted] due to permission settings",
"oneOf": [
{
"description": "ID of the data store to connect to",
"type": "string"
},
{
"enum": [
"[redacted]"
],
"type": "string"
}
]
},
"fetchSize": {
"description": "A user specified fetch size. Changing it can be used to balance throughput and memory usage. Deprecated and ignored since v2.21.",
"maximum": 1000000,
"minimum": 1,
"type": "integer"
},
"query": {
"description": "A self-supplied SELECT statement of the dataset you wish to score. Helpful for supplying a more fine-grained selection of data not achievable through specification of \"table\" and/or \"schema\" parameters exclusively.If this job is executed with a job definition, then template variables are available which will be substituted for timestamps: {{ current_run_timestamp }}, {{ last_completed_run_time }}, {{ last_scheduled_run_time }}, {{ next_scheduled_run_time }}, {{ current_run_time }}",
"type": "string"
},
"schema": {
"description": "The name of the specified database schema to read input data from.",
"type": "string"
},
"table": {
"description": "The name of the specified database table to read input data from.",
"type": "string"
},
"type": {
"description": "Type name for this intake type",
"enum": [
"jdbc"
],
"type": "string"
}
},
"required": [
"dataStoreId",
"type"
],
"type": "object"
},
{
"description": "Stream CSV data chunks from local file storage",
"properties": {
"async": {
"description": "The default behavior (async: true) will still submit the job to the queue and start processing as soon as the upload is started.Setting it to false will postpone submitting the job to the queue until all data has been uploaded.This is helpful if the user is on a bad connection and bottlednecked by the upload speed. Instead of blocking the queue this will allow others to submit to the queue until the upload has finished.",
"type": [
"boolean",
"null"
],
"x-versionadded": "v2.28"
},
"multipart": {
"description": "specify if the data will be uploaded in multiple parts instead of a single file",
"type": "boolean",
"x-versionadded": "v2.27"
},
"type": {
"description": "Type name for this intake type",
"enum": [
"local_file",
"localFile"
],
"type": "string"
}
},
"required": [
"type"
],
"type": "object"
},
{
"description": "Stream CSV data chunks from Datasphere using browser-datasphere",
"properties": {
"catalog": {
"description": "The name of the specified database catalog to read input data from.",
"type": "string"
},
"credentialId": {
"description": "Either the populated value of the field or [redacted] due to permission settings",
"oneOf": [
{
"description": "The ID of the credential holding information about a user with read access to the data source.",
"type": "string"
},
{
"enum": [
"[redacted]"
],
"type": "string"
}
]
},
"dataStoreId": {
"description": "Either the populated value of the field or [redacted] due to permission settings",
"oneOf": [
{
"description": "ID of the data store to connect to",
"type": "string"
},
{
"enum": [
"[redacted]"
],
"type": "string"
}
]
},
"schema": {
"description": "The name of the specified database schema to read input data from.",
"type": "string"
},
"table": {
"description": "The name of the specified database table to read input data from.",
"type": "string"
},
"type": {
"description": "Type name for this intake type",
"enum": [
"datasphere"
],
"type": "string"
}
},
"required": [
"credentialId",
"dataStoreId",
"schema",
"table",
"type"
],
"type": "object",
"x-versionadded": "v2.35"
}
]
},
"maxExplanations": {
"default": 0,
"description": "Number of explanations requested. Will be ordered by strength.",
"maximum": 100,
"minimum": 0,
"type": "integer"
},
"modelId": {
"description": "ID of leaderboard model which is used in job for processing predictions dataset",
"type": "string",
"x-versionadded": "v2.28"
},
"modelPackageId": {
"description": "ID of model package from registry is used in job for processing predictions dataset",
"type": "string",
"x-versionadded": "v2.28"
},
"monitoringBatchPrefix": {
"description": "Name of the batch to create with this job",
"type": [
"string",
"null"
]
},
"numConcurrent": {
"description": "Number of simultaneous requests to run against the prediction instance",
"minimum": 1,
"type": "integer"
},
"outputSettings": {
"description": "The response option configured for this job",
"oneOf": [
{
"description": "Save CSV data chunks to Azure Blob Storage",
"properties": {
"credentialId": {
"description": "Either the populated value of the field or [redacted] due to permission settings",
"oneOf": [
{
"description": "Use the specified credential to access the url",
"type": [
"string",
"null"
]
},
{
"enum": [
"[redacted]"
],
"type": "string"
}
]
},
"format": {
"default": "csv",
"description": "Type of output file format",
"enum": [
"csv",
"parquet"
],
"type": "string",
"x-versionadded": "v2.25"
},
"partitionColumns": {
"description": "For Parquet directory-scoring only. The column names of the intake data of which to partition the dataset. Columns are partitioned in the order they are given. At least one value is required if scoring to a directory (meaning the output url ends with a slash (\"/\").",
"items": {
"type": "string"
},
"maxItems": 100,
"type": "array",
"x-versionadded": "v2.26"
},
"type": {
"description": "Type name for this output type",
"enum": [
"azure"
],
"type": "string"
},
"url": {
"description": "URL for the file or directory",
"format": "url",
"type": "string"
}
},
"required": [
"type",
"url"
],
"type": "object"
},
{
"description": "Save CSV data chunks to Google Storage",
"properties": {
"credentialId": {
"description": "Either the populated value of the field or [redacted] due to permission settings",
"oneOf": [
{
"description": "Use the specified credential to access the url",
"type": [
"string",
"null"
]
},
{
"enum": [
"[redacted]"
],
"type": "string"
}
]
},
"format": {
"default": "csv",
"description": "Type of input file format",
"enum": [
"csv",
"parquet"
],
"type": "string",
"x-versionadded": "v2.25"
},
"partitionColumns": {
"description": "For Parquet directory-scoring only. The column names of the intake data of which to partition the dataset. Columns are partitioned in the order they are given. At least one value is required if scoring to a directory (meaning the output url ends with a slash (\"/\").",
"items": {
"type": "string"
},
"maxItems": 100,
"type": "array",
"x-versionadded": "v2.26"
},
"type": {
"description": "Type name for this output type",
"enum": [
"gcp"
],
"type": "string"
},
"url": {
"description": "URL for the CSV file",
"format": "url",
"type": "string"
}
},
"required": [
"type",
"url"
],
"type": "object"
},
{
"description": "Save CSV data chunks to Google BigQuery in bulk",
"properties": {
"bucket": {
"description": "The name of gcs bucket for data loading",
"type": "string"
},
"credentialId": {
"description": "Either the populated value of the field or [redacted] due to permission settings",
"oneOf": [
{
"description": "The ID of the GCP credentials",
"type": "string"
},
{
"enum": [
"[redacted]"
],
"type": "string"
}
]
},
"dataset": {
"description": "The name of the specified big query dataset to write data back",
"type": "string"
},
"table": {
"description": "The name of the specified big query table to write data back",
"type": "string"
},
"type": {
"description": "Type name for this output type",
"enum": [
"bigquery"
],
"type": "string"
}
},
"required": [
"bucket",
"credentialId",
"dataset",
"table",
"type"
],
"type": "object"
},
{
"description": "Saves CSV data chunks to Amazon Cloud Storage S3",
"properties": {
"credentialId": {
"description": "Either the populated value of the field or [redacted] due to permission settings",
"oneOf": [
{
"description": "Use the specified credential to access the url",
"type": [
"string",
"null"
]
},
{
"enum": [
"[redacted]"
],
"type": "string"
}
]
},
"endpointUrl": {
"description": "Endpoint URL for the S3 connection (omit to use the default)",
"format": "url",
"type": "string",
"x-versionadded": "v2.29"
},
"format": {
"default": "csv",
"description": "Type of output file format",
"enum": [
"csv",
"parquet"
],
"type": "string",
"x-versionadded": "v2.25"
},
"partitionColumns": {
"description": "For Parquet directory-scoring only. The column names of the intake data of which to partition the dataset. Columns are partitioned in the order they are given. At least one value is required if scoring to a directory (meaning the output url ends with a slash (\"/\").",
"items": {
"type": "string"
},
"maxItems": 100,
"type": "array",
"x-versionadded": "v2.26"
},
"serverSideEncryption": {
"description": "Configure Server-Side Encryption for S3 output",
"properties": {
"algorithm": {
"description": "The server-side encryption algorithm used when storing this object in Amazon S3 (for example, AES256, aws:kms).",
"type": "string"
},
"customerAlgorithm": {
"description": "Specifies the algorithm to use to when encrypting the object (for example, AES256).",
"type": "string"
},
"customerKey": {
"description": "Specifies the customer-provided encryption key for Amazon S3 to use in encrypting data. This value is used to store the object and then it is discarded; Amazon S3 does not store the encryption key. The key must be appropriate for use with the algorithm specified in customerAlgorithm. The key must be sent as an base64 encoded string.",
"type": "string"
},
"kmsEncryptionContext": {
"description": "Specifies the Amazon Web Services KMS Encryption Context to use for object encryption. The value of this header is a base64-encoded UTF-8 string holding JSON with the encryption context key-value pairs.",
"type": "string"
},
"kmsKeyId": {
"description": "Specifies the ID of the symmetric customer managed key to use for object encryption.",
"type": "string"
}
},
"type": "object"
},
"type": {
"description": "Type name for this output type",
"enum": [
"s3"
],
"type": "string"
},
"url": {
"description": "URL for the CSV file",
"format": "url",
"type": "string"
}
},
"required": [
"type",
"url"
],
"type": "object"
},
{
"description": "Save CSV data chunks to Snowflake in bulk",
"properties": {
"catalog": {
"description": "The name of the specified database catalog to write output data to.",
"type": "string",
"x-versionadded": "v2.28"
},
"cloudStorageCredentialId": {
"description": "Either the populated value of the field or [redacted] due to permission settings",
"oneOf": [
{
"description": "The ID of the credential holding information about a user with write access to the cloud storage.",
"type": [
"string",
"null"
]
},
{
"enum": [
"[redacted]"
],
"type": "string"
}
]
},
"cloudStorageType": {
"default": "s3",
"description": "Type name for cloud storage",
"enum": [
"azure",
"gcp",
"s3"
],
"type": "string"
},
"createTableIfNotExists": {
"default": false,
"description": "Attempt to create the table first if no existing one is detected, before writing data with the strategy defined in the `statementType` parameter.",
"type": "boolean",
"x-versionadded": "v2.25"
},
"credentialId": {
"description": "Either the populated value of the field or [redacted] due to permission settings",
"oneOf": [
{
"description": "The ID of the credential holding information about a user with write access to the Snowflake data source.",
"type": [
"string",
"null"
]
},
{
"enum": [
"[redacted]"
],
"type": "string"
}
]
},
"dataStoreId": {
"description": "Either the populated value of the field or [redacted] due to permission settings",
"oneOf": [
{
"description": "ID of the data store to connect to",
"type": "string"
},
{
"enum": [
"[redacted]"
],
"type": "string"
}
]
},
"externalStage": {
"description": "External storage",
"type": "string"
},
"schema": {
"description": "The name of the specified database schema to write results to.",
"type": "string"
},
"statementType": {
"description": "The statement type to use when writing the results.",
"enum": [
"insert",
"create_table",
"createTable"
],
"type": "string"
},
"table": {
"description": "The name of the specified database table to write results to.",
"type": "string"
},
"type": {
"description": "Type name for this output type",
"enum": [
"snowflake"
],
"type": "string"
}
},
"required": [
"dataStoreId",
"externalStage",
"statementType",
"table",
"type"
],
"type": "object"
},
{
"description": "Save CSV data chunks to Azure Synapse in bulk",
"properties": {
"cloudStorageCredentialId": {
"description": "Either the populated value of the field or [redacted] due to permission settings",
"oneOf": [
{
"description": "The ID of the credential holding information about a user with write access to the cloud storage.",
"type": [
"string",
"null"
]
},
{
"enum": [
"[redacted]"
],
"type": "string"
}
]
},
"createTableIfNotExists": {
"default": false,
"description": "Attempt to create the table first if no existing one is detected, before writing data with the strategy defined in the `statementType` parameter.",
"type": "boolean",
"x-versionadded": "v2.25"
},
"credentialId": {
"description": "Either the populated value of the field or [redacted] due to permission settings",
"oneOf": [
{
"description": "The ID of the credential holding information about a user with write access to the JDBC data source.",
"type": [
"string",
"null"
]
},
{
"enum": [
"[redacted]"
],
"type": "string"
}
]
},
"dataStoreId": {
"description": "Either the populated value of the field or [redacted] due to permission settings",
"oneOf": [
{
"description": "ID of the data store to connect to",
"type": "string"
},
{
"enum": [
"[redacted]"
],
"type": "string"
}
]
},
"externalDataSource": {
"description": "External data source name",
"type": "string"
},
"schema": {
"description": "The name of the specified database schema to write results to.",
"type": "string"
},
"statementType": {
"description": "The statement type to use when writing the results.",
"enum": [
"insert",
"create_table",
"createTable"
],
"type": "string"
},
"table": {
"description": "The name of the specified database table to write results to.",
"type": "string"
},
"type": {
"description": "Type name for this output type",
"enum": [
"synapse"
],
"type": "string"
}
},
"required": [
"dataStoreId",
"externalDataSource",
"statementType",
"table",
"type"
],
"type": "object"
},
{
"properties": {
"path": {
"description": "Path to results on host filesystem",
"type": "string"
},
"type": {
"description": "Type name for this output type",
"enum": [
"filesystem"
],
"type": "string"
}
},
"required": [
"path",
"type"
],
"type": "object"
},
{
"description": "Save CSV data chunks to HTTP data endpoint",
"properties": {
"headers": {
"description": "Extra headers to send with the request",
"type": "object"
},
"method": {
"description": "Method to use when saving the CSV file",
"enum": [
"POST",
"PUT"
],
"type": "string"
},
"type": {
"description": "Type name for this output type",
"enum": [
"http"
],
"type": "string"
},
"url": {
"description": "URL for the CSV file",
"format": "url",
"type": "string"
}
},
"required": [
"method",
"type",
"url"
],
"type": "object"
},
{
"description": "Save CSV data chunks via JDBC",
"properties": {
"catalog": {
"description": "The name of the specified database catalog to write output data to.",
"type": "string",
"x-versionadded": "v2.22"
},
"commitInterval": {
"default": 600,
"description": "Defines a time interval in seconds between each commit is done to the JDBC source. If set to 0, the batch prediction operation will write the entire job before committing.",
"maximum": 86400,
"minimum": 0,
"type": "integer",
"x-versionadded": "v2.21"
},
"createTableIfNotExists": {
"default": false,
"description": "Attempt to create the table first if no existing one is detected, before writing data with the strategy defined in the `statementType` parameter.",
"type": "boolean",
"x-versionadded": "v2.24"
},
"credentialId": {
"description": "Either the populated value of the field or [redacted] due to permission settings",
"oneOf": [
{
"description": "The ID of the credential holding information about a user with write access to the JDBC data source.",
"type": [
"string",
"null"
]
},
{
"enum": [
"[redacted]"
],
"type": "string"
}
]
},
"dataStoreId": {
"description": "Either the populated value of the field or [redacted] due to permission settings",
"oneOf": [
{
"description": "ID of the data store to connect to",
"type": "string"
},
{
"enum": [
"[redacted]"
],
"type": "string"
}
]
},
"schema": {
"description": "The name of the specified database schema to write the results to.",
"type": "string"
},
"statementType": {
"description": "The statement type to use when writing the results. Deprecation Warning: Use of `create_table` is now discouraged. Use one of the other possibilities along with the parameter `createTableIfNotExists` set to `true`.",
"enum": [
"createTable",
"create_table",
"insert",
"insertUpdate",
"insert_update",
"update"
],
"type": "string"
},
"table": {
"description": "The name of the specified database table to write the results to.If this job is executed with a job definition, then template variables are available which will be substituted for timestamps: {{ current_run_timestamp }}, {{ last_completed_run_time }}, {{ last_scheduled_run_time }}, {{ next_scheduled_run_time }}, {{ current_run_time }}",
"type": "string"
},
"type": {
"description": "Type name for this intake type",
"enum": [
"jdbc"
],
"type": "string"
},
"updateColumns": {
"description": "The column names to be updated if statementType is set to either update or upsert.",
"items": {
"type": "string"
},
"maxItems": 100,
"type": "array"
},
"whereColumns": {
"description": "The column names to be used in the where clause if statementType is set to update or upsert.",
"items": {
"type": "string"
},
"maxItems": 100,
"type": "array"
}
},
"required": [
"dataStoreId",
"statementType",
"table",
"type"
],
"type": "object"
},
{
"description": "Save CSV data chunks to local file storage",
"properties": {
"type": {
"description": "Type name for this output type",
"enum": [
"local_file",
"localFile"
],
"type": "string"
}
},
"required": [
"type"
],
"type": "object"
},
{
"description": "Saves CSV data chunks to Datasphere using browser-datasphere",
"properties": {
"catalog": {
"description": "The name of the specified database catalog to read input data from.",
"type": "string"
},
"credentialId": {
"description": "Either the populated value of the field or [redacted] due to permission settings",
"oneOf": [
{
"description": "The ID of the credential holding information about a user with write access to the data destination.",
"type": "string"
},
{
"enum": [
"[redacted]"
],
"type": "string"
}
]
},
"dataStoreId": {
"description": "Either the populated value of the field or [redacted] due to permission settings",
"oneOf": [
{
"description": "The ID of the data store to connect to",
"type": "string"
},
{
"enum": [
"[redacted]"
],
"type": "string"
}
]
},
"schema": {
"description": "The name of the specified database schema to read input data from.",
"type": "string"
},
"table": {
"description": "The name of the specified database table to read input data from.",
"type": "string"
},
"type": {
"description": "The type name for this output type",
"enum": [
"datasphere"
],
"type": "string"
}
},
"required": [
"credentialId",
"dataStoreId",
"schema",
"table",
"type"
],
"type": "object",
"x-versionadded": "v2.35"
}
]
},
"passthroughColumns": {
"description": "Pass through columns from the original dataset",
"items": {
"description": "A column name from the original dataset to pass through to the resulting predictions",
"maxLength": 50,
"minLength": 1,
"type": "string"
},
"maxItems": 100,
"type": "array"
},
"passthroughColumnsSet": {
"description": "Pass through all columns from the original dataset",
"enum": [
"all"
],
"type": "string"
},
"pinnedModelId": {
"description": "Specify a model ID used for scoring",
"type": "string"
},
"predictionInstance": {
"description": "Override the default prediction instance from the deployment when scoring this job.",
"properties": {
"apiKey": {
"description": "By default, prediction requests will use the API key of the user that created the job. This allows you to make requests on behalf of other users.",
"type": "string"
},
"datarobotKey": {
"description": "If running a job against a prediction instance in the Managed AI Cloud, you must provide the organization level DataRobot-Key.",
"type": "string"
},
"hostName": {
"description": "Override the default host name of the deployment with this.",
"type": "string"
},
"sslEnabled": {
"default": true,
"description": "Use SSL (HTTPS) when communicating with the overriden prediction server.",
"type": "boolean"
}
},
"required": [
"hostName",
"sslEnabled"
],
"type": "object"
},
"predictionThreshold": {
"description": "Threshold is the point that sets the class boundary for a predicted value. The model classifies an observation below the threshold as FALSE, and an observation above the threshold as TRUE. In other words, DataRobot automatically assigns the positive class label to any prediction exceeding the threshold. This value can be set between 0.0 and 1.0.",
"maximum": 1,
"minimum": 0,
"type": "number"
},
"predictionWarningEnabled": {
"description": "Enable prediction warnings.",
"type": [
"boolean",
"null"
]
},
"redactedFields": {
"description": "A list of qualified field names from intake- and/or outputSettings that was redacted due to permissions and sharing settings. For example: intakeSettings.dataStoreId",
"items": {
"description": "Field names that are potentially redacted",
"type": "string"
},
"type": "array",
"x-versionadded": "v2.30"
},
"secondaryDatasetsConfigId": {
"description": "Configuration id for secondary datasets to use when making a prediction.",
"type": "string",
"x-versionadded": "v2.33"
},
"skipDriftTracking": {
"default": false,
"description": "Skip drift tracking for this job.",
"type": "boolean"
},
"thresholdHigh": {
"description": "Compute explanations for predictions above this threshold",
"type": "number"
},
"thresholdLow": {
"description": "Compute explanations for predictions below this threshold",
"type": "number"
},
"timeseriesSettings": {
"description": "Time Series settings included of this job is a Time Series job.",
"oneOf": [
{
"properties": {
"forecastPoint": {
"description": "Used for forecast predictions in order to override the inferred forecast point from the dataset.",
"format": "date-time",
"type": "string"
},
"relaxKnownInAdvanceFeaturesCheck": {
"default": false,
"description": "If activated, missing values in the known in advance features are allowed in the forecast window at prediction time. If omitted or false, missing values are not allowed.",
"type": "boolean"
},
"type": {
"description": "Forecast mode makes predictions using forecastPoint or rows in the dataset without target.",
"enum": [
"forecast"
],
"type": "string"
}
},
"required": [
"type"
],
"type": "object"
},
{
"properties": {
"predictionsEndDate": {
"description": "Used for historical predictions in order to override date to which predictions should be calculated. By default value will be inferred automatically from the dataset.",
"format": "date-time",
"type": "string"
},
"predictionsStartDate": {
"description": "Used for historical predictions in order to override date from which predictions should be calculated. By default value will be inferred automatically from the dataset.",
"format": "date-time",
"type": "string"
},
"relaxKnownInAdvanceFeaturesCheck": {
"default": false,
"description": "If activated, missing values in the known in advance features are allowed in the forecast window at prediction time. If omitted or false, missing values are not allowed.",
"type": "boolean"
},
"type": {
"description": "Historical mode enables bulk predictions which calculates predictions for all possible forecast points and forecast distances in the dataset within the predictionsStartDate/predictionsEndDate range.",
"enum": [
"historical"
],
"type": "string"
}
},
"required": [
"type"
],
"type": "object"
},
{
"properties": {
"relaxKnownInAdvanceFeaturesCheck": {
"default": false,
"description": "If activated, missing values in the known in advance features are allowed in the forecast window at prediction time. If omitted or false, missing values are not allowed.",
"type": "boolean"
},
"type": {
"description": "Forecast mode used for making predictions on subsets of training data.",
"enum": [
"training"
],
"type": "string"
}
},
"required": [
"type"
],
"type": "object"
}
],
"x-versionadded": "v2.20"
}
},
"required": [
"abortOnError",
"csvSettings",
"disableRowLevelErrorHandling",
"includePredictionStatus",
"includeProbabilities",
"includeProbabilitiesClasses",
"intakeSettings",
"maxExplanations",
"redactedFields",
"skipDriftTracking"
],
"type": "object"
},
"links": {
"description": "Links useful for this job",
"properties": {
"csvUpload": {
"description": "The URL used to upload the dataset for this job. Only available for localFile intake.",
"format": "url",
"type": "string"
},
"download": {
"description": "The URL used to download the results from this job. Only available for localFile outputs. Will be null if the download is not yet available.",
"type": [
"string",
"null"
]
},
"self": {
"description": "The URL used access this job.",
"format": "url",
"type": "string"
}
},
"required": [
"self"
],
"type": "object"
},
"logs": {
"description": "The job log.",
"items": {
"description": "A log line from the job log.",
"type": "string"
},
"type": "array"
},
"monitoringBatchId": {
"description": "Id of the monitoring batch created by this job. Only present if the job runs on a deployment with batch monitoring enabled.",
"type": [
"string",
"null"
],
"x-versionadded": "v2.33"
},
"percentageCompleted": {
"description": "Indicates job progress which is based on number of already processed rows in dataset",
"maximum": 100,
"minimum": 0,
"type": "number"
},
"queuePosition": {
"description": "To ensure a dedicated prediction instance is not overloaded, only one job will be run against it at a time. This is the number of jobs that are awaiting processing before this job start running. May not be available in all environments.",
"minimum": 0,
"type": [
"integer",
"null"
],
"x-versionadded": "v2.21"
},
"queued": {
"description": "The job has been put on the queue for execution.",
"type": "boolean",
"x-versionadded": "v2.26"
},
"resultsDeleted": {
"description": "Indicates if the job was subject to garbage collection and had its artifacts deleted (output files, if any, and scoring data on local storage)",
"type": "boolean",
"x-versionadded": "v2.24"
},
"scoredRows": {
"description": "Number of rows that have been used in prediction computation",
"minimum": 0,
"type": "integer"
},
"skippedRows": {
"description": "Number of rows that have been skipped during scoring. May contain non-zero value only in time-series predictions case if provided dataset contains more than required historical rows.",
"minimum": 0,
"type": "integer",
"x-versionadded": "v2.20"
},
"source": {
"description": "Source from which batch job was started",
"type": "string",
"x-versionadded": "v2.24"
},
"status": {
"description": "The current job status",
"enum": [
"INITIALIZING",
"RUNNING",
"COMPLETED",
"ABORTED",
"FAILED"
],
"type": "string"
},
"statusDetails": {
"description": "Explanation for current status",
"type": "string"
}
},
"required": [
"created",
"createdBy",
"elapsedTimeSec",
"failedRows",
"id",
"jobIntakeSize",
"jobOutputSize",
"jobSpec",
"links",
"logs",
"monitoringBatchId",
"percentageCompleted",
"queued",
"scoredRows",
"skippedRows",
"status",
"statusDetails"
],
"type": "object"
},
"type": "array"
},
"next": {
"description": "URL pointing to the next page (if null, there is no next page).",
"format": "uri",
"type": [
"string",
"null"
]
},
"previous": {
"description": "URL pointing to the previous page (if null, there is no previous page).",
"format": "uri",
"type": [
"string",
"null"
]
},
"totalCount": {
"description": "The total number of items across all pages.",
"type": "integer"
}
},
"required": [
"data",
"next",
"previous",
"totalCount"
],
"type": "object"
}
Properties
Name |
Type |
Required |
Restrictions |
Description |
count |
integer |
false |
|
Number of items returned on this page. |
data |
[BatchPredictionJobResponse] |
true |
|
An array of jobs |
next |
string,null(uri) |
true |
|
URL pointing to the next page (if null, there is no next page). |
previous |
string,null(uri) |
true |
|
URL pointing to the previous page (if null, there is no previous page). |
totalCount |
integer |
true |
|
The total number of items across all pages. |
BatchPredictionJobPredictionInstance
{
"description": "Override the default prediction instance from the deployment when scoring this job.",
"properties": {
"apiKey": {
"description": "By default, prediction requests will use the API key of the user that created the job. This allows you to make requests on behalf of other users.",
"type": "string"
},
"datarobotKey": {
"description": "If running a job against a prediction instance in the Managed AI Cloud, you must provide the organization level DataRobot-Key.",
"type": "string"
},
"hostName": {
"description": "Override the default host name of the deployment with this.",
"type": "string"
},
"sslEnabled": {
"default": true,
"description": "Use SSL (HTTPS) when communicating with the overriden prediction server.",
"type": "boolean"
}
},
"required": [
"hostName",
"sslEnabled"
],
"type": "object"
}
Override the default prediction instance from the deployment when scoring this job.
Properties
Name |
Type |
Required |
Restrictions |
Description |
apiKey |
string |
false |
|
By default, prediction requests will use the API key of the user that created the job. This allows you to make requests on behalf of other users. |
datarobotKey |
string |
false |
|
If running a job against a prediction instance in the Managed AI Cloud, you must provide the organization level DataRobot-Key. |
hostName |
string |
true |
|
Override the default host name of the deployment with this. |
sslEnabled |
boolean |
true |
|
Use SSL (HTTPS) when communicating with the overriden prediction server. |
BatchPredictionJobRemapping
{
"properties": {
"inputName": {
"description": "Rename column with this name",
"type": "string"
},
"outputName": {
"description": "Rename column to this name (leave as null to remove from the output)",
"type": [
"string",
"null"
]
}
},
"required": [
"inputName",
"outputName"
],
"type": "object"
}
Properties
Name |
Type |
Required |
Restrictions |
Description |
inputName |
string |
true |
|
Rename column with this name |
outputName |
string,null |
true |
|
Rename column to this name (leave as null to remove from the output) |
BatchPredictionJobResponse
{
"properties": {
"batchPredictionJobDefinition": {
"description": "The Batch Prediction Job Definition linking to this job, if any.",
"properties": {
"createdBy": {
"description": "The ID of creator of this job definition",
"type": "string"
},
"id": {
"description": "The ID of the Batch Prediction job definition",
"type": "string"
},
"name": {
"description": "A human-readable name for the definition, must be unique across organisations",
"type": "string"
}
},
"required": [
"createdBy",
"id",
"name"
],
"type": "object"
},
"created": {
"description": "When was this job created",
"format": "date-time",
"type": "string",
"x-versionadded": "v2.21"
},
"createdBy": {
"description": "Who created this job",
"properties": {
"fullName": {
"description": "The full name of the user who created this job (if defined by the user)",
"type": [
"string",
"null"
]
},
"userId": {
"description": "The User ID of the user who created this job",
"type": "string"
},
"username": {
"description": "The username (e-mail address) of the user who created this job",
"type": "string"
}
},
"required": [
"fullName",
"userId",
"username"
],
"type": "object"
},
"elapsedTimeSec": {
"description": "Number of seconds the job has been processing for",
"minimum": 0,
"type": "integer"
},
"failedRows": {
"description": "Number of rows that have failed scoring",
"minimum": 0,
"type": "integer"
},
"hidden": {
"description": "When was this job was hidden last, blank if visible",
"format": "date-time",
"type": "string",
"x-versionadded": "v2.24"
},
"id": {
"description": "The ID of the Batch Prediction job",
"type": "string",
"x-versionadded": "v2.21"
},
"intakeDatasetDisplayName": {
"description": "If applicable (e.g. for AI catalog), will contain the dataset name used for the intake dataset.",
"type": [
"string",
"null"
],
"x-versionadded": "v2.23"
},
"jobIntakeSize": {
"description": "Number of bytes in the intake dataset for this job",
"minimum": 0,
"type": [
"integer",
"null"
]
},
"jobOutputSize": {
"description": "Number of bytes in the output dataset for this job",
"minimum": 0,
"type": [
"integer",
"null"
]
},
"jobSpec": {
"description": "The job configuration used to create this job",
"properties": {
"abortOnError": {
"default": true,
"description": "Should this job abort if too many errors are encountered",
"type": "boolean"
},
"batchJobType": {
"default": "prediction",
"description": "Batch job type.",
"enum": [
"monitoring",
"prediction"
],
"type": "string",
"x-versionadded": "v2.35"
},
"chunkSize": {
"default": "auto",
"description": "Which strategy should be used to determine the chunk size. Can be either a named strategy or a fixed size in bytes.",
"oneOf": [
{
"enum": [
"auto",
"fixed",
"dynamic"
],
"type": "string"
},
{
"maximum": 41943040,
"minimum": 20,
"type": "integer"
}
]
},
"columnNamesRemapping": {
"description": "Remap (rename or remove columns from) the output from this job",
"oneOf": [
{
"description": "Provide a dictionary with key/value pairs to remap (deprecated)",
"type": "object"
},
{
"description": "Provide a list of items to remap",
"items": {
"properties": {
"inputName": {
"description": "Rename column with this name",
"type": "string"
},
"outputName": {
"description": "Rename column to this name (leave as null to remove from the output)",
"type": [
"string",
"null"
]
}
},
"required": [
"inputName",
"outputName"
],
"type": "object"
},
"maxItems": 1000,
"type": "array"
}
]
},
"csvSettings": {
"description": "The CSV settings used for this job",
"properties": {
"delimiter": {
"default": ",",
"description": "CSV fields are delimited by this character. Use the string \"tab\" to denote TSV (TAB separated values).",
"oneOf": [
{
"enum": [
"tab"
],
"type": "string"
},
{
"maxLength": 1,
"minLength": 1,
"type": "string"
}
]
},
"encoding": {
"default": "utf-8",
"description": "The encoding to be used for intake and output. For example (but not limited to): \"shift_jis\", \"latin_1\" or \"mskanji\".",
"type": "string"
},
"quotechar": {
"default": "\"",
"description": "Fields containing the delimiter or newlines must be quoted using this character.",
"maxLength": 1,
"minLength": 1,
"type": "string"
}
},
"required": [
"delimiter",
"encoding",
"quotechar"
],
"type": "object"
},
"deploymentId": {
"description": "ID of deployment which is used in job for processing predictions dataset",
"type": "string"
},
"disableRowLevelErrorHandling": {
"default": false,
"description": "Skip row by row error handling",
"type": "boolean"
},
"explanationAlgorithm": {
"description": "Which algorithm will be used to calculate prediction explanations",
"enum": [
"shap",
"xemp"
],
"type": "string"
},
"explanationClassNames": {
"description": "List of class names that will be explained for each row for multiclass. Mutually exclusive with explanationNumTopClasses. If neither specified - we assume explanationNumTopClasses=1",
"items": {
"description": "Class name to explain",
"type": "string"
},
"maxItems": 10,
"minItems": 1,
"type": "array",
"x-versionadded": "v2.29"
},
"explanationNumTopClasses": {
"description": "Number of top predicted classes for each row that will be explained for multiclass. Mutually exclusive with explanationClassNames. If neither specified - we assume explanationNumTopClasses=1",
"maximum": 10,
"minimum": 1,
"type": "integer",
"x-versionadded": "v2.29"
},
"includePredictionStatus": {
"default": false,
"description": "Include prediction status column in the output",
"type": "boolean"
},
"includeProbabilities": {
"default": true,
"description": "Include probabilities for all classes",
"type": "boolean"
},
"includeProbabilitiesClasses": {
"default": [],
"description": "Include only probabilities for these specific class names.",
"items": {
"description": "Include probability for this class name",
"type": "string"
},
"maxItems": 100,
"type": "array"
},
"intakeSettings": {
"default": {
"type": "localFile"
},
"description": "The response option configured for this job",
"oneOf": [
{
"description": "Stream CSV data chunks from Azure",
"properties": {
"credentialId": {
"description": "Either the populated value of the field or [redacted] due to permission settings",
"oneOf": [
{
"description": "Use the specified credential to access the url",
"type": [
"string",
"null"
]
},
{
"enum": [
"[redacted]"
],
"type": "string"
}
]
},
"format": {
"default": "csv",
"description": "Type of input file format",
"enum": [
"csv",
"parquet"
],
"type": "string",
"x-versionadded": "v2.25"
},
"type": {
"description": "Type name for this intake type",
"enum": [
"azure"
],
"type": "string"
},
"url": {
"description": "URL for the CSV file",
"format": "url",
"type": "string"
}
},
"required": [
"type",
"url"
],
"type": "object"
},
{
"description": "Stream CSV data chunks from data stage storage",
"properties": {
"dataStageId": {
"description": "The ID of the data stage",
"type": "string"
},
"type": {
"description": "Type name for this intake type",
"enum": [
"dataStage"
],
"type": "string"
}
},
"required": [
"dataStageId",
"type"
],
"type": "object"
},
{
"description": "Stream CSV data chunks from AI catalog dataset",
"properties": {
"datasetId": {
"description": "Either the populated value of the field or [redacted] due to permission settings",
"oneOf": [
{
"description": "The ID of the AI catalog dataset",
"type": "string"
},
{
"enum": [
"[redacted]"
],
"type": "string"
}
]
},
"datasetVersionId": {
"description": "The ID of the AI catalog dataset version",
"type": "string"
},
"type": {
"description": "Type name for this intake type",
"enum": [
"dataset"
],
"type": "string"
}
},
"required": [
"datasetId",
"type"
],
"type": "object"
},
{
"description": "Stream CSV data chunks from Google Storage",
"properties": {
"credentialId": {
"description": "Either the populated value of the field or [redacted] due to permission settings",
"oneOf": [
{
"description": "Use the specified credential to access the url",
"type": [
"string",
"null"
]
},
{
"enum": [
"[redacted]"
],
"type": "string"
}
]
},
"format": {
"default": "csv",
"description": "Type of input file format",
"enum": [
"csv",
"parquet"
],
"type": "string",
"x-versionadded": "v2.25"
},
"type": {
"description": "Type name for this intake type",
"enum": [
"gcp"
],
"type": "string"
},
"url": {
"description": "URL for the CSV file",
"format": "url",
"type": "string"
}
},
"required": [
"type",
"url"
],
"type": "object"
},
{
"description": "Stream CSV data chunks from Big Query using GCS",
"properties": {
"bucket": {
"description": "The name of gcs bucket for data export",
"type": "string"
},
"credentialId": {
"description": "Either the populated value of the field or [redacted] due to permission settings",
"oneOf": [
{
"description": "The ID of the GCP credentials",
"type": "string"
},
{
"enum": [
"[redacted]"
],
"type": "string"
}
]
},
"dataset": {
"description": "The name of the specified big query dataset to read input data from",
"type": "string"
},
"table": {
"description": "The name of the specified big query table to read input data from",
"type": "string"
},
"type": {
"description": "Type name for this intake type",
"enum": [
"bigquery"
],
"type": "string"
}
},
"required": [
"bucket",
"credentialId",
"dataset",
"table",
"type"
],
"type": "object"
},
{
"description": "Stream CSV data chunks from Amazon Cloud Storage S3",
"properties": {
"credentialId": {
"description": "Either the populated value of the field or [redacted] due to permission settings",
"oneOf": [
{
"description": "Use the specified credential to access the url",
"type": [
"string",
"null"
]
},
{
"enum": [
"[redacted]"
],
"type": "string"
}
]
},
"endpointUrl": {
"description": "Endpoint URL for the S3 connection (omit to use the default)",
"format": "url",
"type": "string",
"x-versionadded": "v2.29"
},
"format": {
"default": "csv",
"description": "Type of input file format",
"enum": [
"csv",
"parquet"
],
"type": "string",
"x-versionadded": "v2.25"
},
"type": {
"description": "Type name for this intake type",
"enum": [
"s3"
],
"type": "string"
},
"url": {
"description": "URL for the CSV file",
"format": "url",
"type": "string"
}
},
"required": [
"type",
"url"
],
"type": "object"
},
{
"description": "Stream CSV data chunks from Snowflake",
"properties": {
"catalog": {
"description": "The name of the specified database catalog to read input data from.",
"type": "string",
"x-versionadded": "v2.28"
},
"cloudStorageCredentialId": {
"description": "Either the populated value of the field or [redacted] due to permission settings",
"oneOf": [
{
"description": "The ID of the credential holding information about a user with read access to the cloud storage.",
"type": [
"string",
"null"
]
},
{
"enum": [
"[redacted]"
],
"type": "string"
}
]
},
"cloudStorageType": {
"default": "s3",
"description": "Type name for cloud storage",
"enum": [
"azure",
"gcp",
"s3"
],
"type": "string"
},
"credentialId": {
"description": "Either the populated value of the field or [redacted] due to permission settings",
"oneOf": [
{
"description": "The ID of the credential holding information about a user with read access to the Snowflake data source.",
"type": [
"string",
"null"
]
},
{
"enum": [
"[redacted]"
],
"type": "string"
}
]
},
"dataStoreId": {
"description": "Either the populated value of the field or [redacted] due to permission settings",
"oneOf": [
{
"description": "ID of the data store to connect to",
"type": "string"
},
{
"enum": [
"[redacted]"
],
"type": "string"
}
]
},
"externalStage": {
"description": "External storage",
"type": "string"
},
"query": {
"description": "A self-supplied SELECT statement of the dataset you wish to score. Helpful for supplying a more fine-grained selection of data not achievable through specification of \"table\" and/or \"schema\" parameters exclusively.If this job is executed with a job definition, then template variables are available which will be substituted for timestamps: {{ current_run_timestamp }}, {{ last_completed_run_time }}, {{ last_scheduled_run_time }}, {{ next_scheduled_run_time }}, {{ current_run_time }}",
"type": "string"
},
"schema": {
"description": "The name of the specified database schema to read input data from.",
"type": "string"
},
"table": {
"description": "The name of the specified database table to read input data from.",
"type": "string"
},
"type": {
"description": "Type name for this intake type",
"enum": [
"snowflake"
],
"type": "string"
}
},
"required": [
"dataStoreId",
"externalStage",
"type"
],
"type": "object"
},
{
"description": "Stream CSV data chunks from Azure Synapse",
"properties": {
"cloudStorageCredentialId": {
"description": "Either the populated value of the field or [redacted] due to permission settings",
"oneOf": [
{
"description": "The ID of the Azure credential holding information about a user with read access to the cloud storage.",
"type": [
"string",
"null"
]
},
{
"enum": [
"[redacted]"
],
"type": "string"
}
]
},
"credentialId": {
"description": "Either the populated value of the field or [redacted] due to permission settings",
"oneOf": [
{
"description": "The ID of the credential holding information about a user with read access to the JDBC data source.",
"type": [
"string",
"null"
]
},
{
"enum": [
"[redacted]"
],
"type": "string"
}
]
},
"dataStoreId": {
"description": "Either the populated value of the field or [redacted] due to permission settings",
"oneOf": [
{
"description": "ID of the data store to connect to",
"type": "string"
},
{
"enum": [
"[redacted]"
],
"type": "string"
}
]
},
"externalDataSource": {
"description": "External datasource name",
"type": "string"
},
"query": {
"description": "A self-supplied SELECT statement of the dataset you wish to score. Helpful for supplying a more fine-grained selection of data not achievable through specification of \"table\" and/or \"schema\" parameters exclusively.If this job is executed with a job definition, then template variables are available which will be substituted for timestamps: {{ current_run_timestamp }}, {{ last_completed_run_time }}, {{ last_scheduled_run_time }}, {{ next_scheduled_run_time }}, {{ current_run_time }}",
"type": "string"
},
"schema": {
"description": "The name of the specified database schema to read input data from.",
"type": "string"
},
"table": {
"description": "The name of the specified database table to read input data from.",
"type": "string"
},
"type": {
"description": "Type name for this intake type",
"enum": [
"synapse"
],
"type": "string"
}
},
"required": [
"dataStoreId",
"externalDataSource",
"type"
],
"type": "object"
},
{
"description": "Stream CSV data chunks from DSS dataset",
"properties": {
"datasetId": {
"description": "Either the populated value of the field or [redacted] due to permission settings",
"oneOf": [
{
"description": "The ID of the dataset",
"type": "string"
},
{
"enum": [
"[redacted]"
],
"type": "string"
}
]
},
"partition": {
"default": null,
"description": "Partition used to predict",
"enum": [
"holdout",
"validation",
"allBacktests",
null
],
"type": [
"string",
"null"
]
},
"projectId": {
"description": "The ID of the project",
"type": "string"
},
"type": {
"description": "Type name for this intake type",
"enum": [
"dss"
],
"type": "string"
}
},
"required": [
"projectId",
"type"
],
"type": "object"
},
{
"properties": {
"path": {
"description": "Path to data on host filesystem",
"type": "string"
},
"type": {
"description": "Type name for this intake type",
"enum": [
"filesystem"
],
"type": "string"
}
},
"required": [
"path",
"type"
],
"type": "object"
},
{
"description": "Stream CSV data chunks from HTTP",
"properties": {
"type": {
"description": "Type name for this intake type",
"enum": [
"http"
],
"type": "string"
},
"url": {
"description": "URL for the CSV file",
"format": "url",
"type": "string"
}
},
"required": [
"type",
"url"
],
"type": "object"
},
{
"description": "Stream CSV data chunks from JDBC",
"properties": {
"catalog": {
"description": "The name of the specified database catalog to read input data from.",
"type": "string",
"x-versionadded": "v2.22"
},
"credentialId": {
"description": "Either the populated value of the field or [redacted] due to permission settings",
"oneOf": [
{
"description": "The ID of the credential holding information about a user with read access to the JDBC data source.",
"type": [
"string",
"null"
]
},
{
"enum": [
"[redacted]"
],
"type": "string"
}
]
},
"dataStoreId": {
"description": "Either the populated value of the field or [redacted] due to permission settings",
"oneOf": [
{
"description": "ID of the data store to connect to",
"type": "string"
},
{
"enum": [
"[redacted]"
],
"type": "string"
}
]
},
"fetchSize": {
"description": "A user specified fetch size. Changing it can be used to balance throughput and memory usage. Deprecated and ignored since v2.21.",
"maximum": 1000000,
"minimum": 1,
"type": "integer"
},
"query": {
"description": "A self-supplied SELECT statement of the dataset you wish to score. Helpful for supplying a more fine-grained selection of data not achievable through specification of \"table\" and/or \"schema\" parameters exclusively.If this job is executed with a job definition, then template variables are available which will be substituted for timestamps: {{ current_run_timestamp }}, {{ last_completed_run_time }}, {{ last_scheduled_run_time }}, {{ next_scheduled_run_time }}, {{ current_run_time }}",
"type": "string"
},
"schema": {
"description": "The name of the specified database schema to read input data from.",
"type": "string"
},
"table": {
"description": "The name of the specified database table to read input data from.",
"type": "string"
},
"type": {
"description": "Type name for this intake type",
"enum": [
"jdbc"
],
"type": "string"
}
},
"required": [
"dataStoreId",
"type"
],
"type": "object"
},
{
"description": "Stream CSV data chunks from local file storage",
"properties": {
"async": {
"description": "The default behavior (async: true) will still submit the job to the queue and start processing as soon as the upload is started.Setting it to false will postpone submitting the job to the queue until all data has been uploaded.This is helpful if the user is on a bad connection and bottlednecked by the upload speed. Instead of blocking the queue this will allow others to submit to the queue until the upload has finished.",
"type": [
"boolean",
"null"
],
"x-versionadded": "v2.28"
},
"multipart": {
"description": "specify if the data will be uploaded in multiple parts instead of a single file",
"type": "boolean",
"x-versionadded": "v2.27"
},
"type": {
"description": "Type name for this intake type",
"enum": [
"local_file",
"localFile"
],
"type": "string"
}
},
"required": [
"type"
],
"type": "object"
},
{
"description": "Stream CSV data chunks from Datasphere using browser-datasphere",
"properties": {
"catalog": {
"description": "The name of the specified database catalog to read input data from.",
"type": "string"
},
"credentialId": {
"description": "Either the populated value of the field or [redacted] due to permission settings",
"oneOf": [
{
"description": "The ID of the credential holding information about a user with read access to the data source.",
"type": "string"
},
{
"enum": [
"[redacted]"
],
"type": "string"
}
]
},
"dataStoreId": {
"description": "Either the populated value of the field or [redacted] due to permission settings",
"oneOf": [
{
"description": "ID of the data store to connect to",
"type": "string"
},
{
"enum": [
"[redacted]"
],
"type": "string"
}
]
},
"schema": {
"description": "The name of the specified database schema to read input data from.",
"type": "string"
},
"table": {
"description": "The name of the specified database table to read input data from.",
"type": "string"
},
"type": {
"description": "Type name for this intake type",
"enum": [
"datasphere"
],
"type": "string"
}
},
"required": [
"credentialId",
"dataStoreId",
"schema",
"table",
"type"
],
"type": "object",
"x-versionadded": "v2.35"
}
]
},
"maxExplanations": {
"default": 0,
"description": "Number of explanations requested. Will be ordered by strength.",
"maximum": 100,
"minimum": 0,
"type": "integer"
},
"modelId": {
"description": "ID of leaderboard model which is used in job for processing predictions dataset",
"type": "string",
"x-versionadded": "v2.28"
},
"modelPackageId": {
"description": "ID of model package from registry is used in job for processing predictions dataset",
"type": "string",
"x-versionadded": "v2.28"
},
"monitoringBatchPrefix": {
"description": "Name of the batch to create with this job",
"type": [
"string",
"null"
]
},
"numConcurrent": {
"description": "Number of simultaneous requests to run against the prediction instance",
"minimum": 1,
"type": "integer"
},
"outputSettings": {
"description": "The response option configured for this job",
"oneOf": [
{
"description": "Save CSV data chunks to Azure Blob Storage",
"properties": {
"credentialId": {
"description": "Either the populated value of the field or [redacted] due to permission settings",
"oneOf": [
{
"description": "Use the specified credential to access the url",
"type": [
"string",
"null"
]
},
{
"enum": [
"[redacted]"
],
"type": "string"
}
]
},
"format": {
"default": "csv",
"description": "Type of output file format",
"enum": [
"csv",
"parquet"
],
"type": "string",
"x-versionadded": "v2.25"
},
"partitionColumns": {
"description": "For Parquet directory-scoring only. The column names of the intake data of which to partition the dataset. Columns are partitioned in the order they are given. At least one value is required if scoring to a directory (meaning the output url ends with a slash (\"/\").",
"items": {
"type": "string"
},
"maxItems": 100,
"type": "array",
"x-versionadded": "v2.26"
},
"type": {
"description": "Type name for this output type",
"enum": [
"azure"
],
"type": "string"
},
"url": {
"description": "URL for the file or directory",
"format": "url",
"type": "string"
}
},
"required": [
"type",
"url"
],
"type": "object"
},
{
"description": "Save CSV data chunks to Google Storage",
"properties": {
"credentialId": {
"description": "Either the populated value of the field or [redacted] due to permission settings",
"oneOf": [
{
"description": "Use the specified credential to access the url",
"type": [
"string",
"null"
]
},
{
"enum": [
"[redacted]"
],
"type": "string"
}
]
},
"format": {
"default": "csv",
"description": "Type of input file format",
"enum": [
"csv",
"parquet"
],
"type": "string",
"x-versionadded": "v2.25"
},
"partitionColumns": {
"description": "For Parquet directory-scoring only. The column names of the intake data of which to partition the dataset. Columns are partitioned in the order they are given. At least one value is required if scoring to a directory (meaning the output url ends with a slash (\"/\").",
"items": {
"type": "string"
},
"maxItems": 100,
"type": "array",
"x-versionadded": "v2.26"
},
"type": {
"description": "Type name for this output type",
"enum": [
"gcp"
],
"type": "string"
},
"url": {
"description": "URL for the CSV file",
"format": "url",
"type": "string"
}
},
"required": [
"type",
"url"
],
"type": "object"
},
{
"description": "Save CSV data chunks to Google BigQuery in bulk",
"properties": {
"bucket": {
"description": "The name of gcs bucket for data loading",
"type": "string"
},
"credentialId": {
"description": "Either the populated value of the field or [redacted] due to permission settings",
"oneOf": [
{
"description": "The ID of the GCP credentials",
"type": "string"
},
{
"enum": [
"[redacted]"
],
"type": "string"
}
]
},
"dataset": {
"description": "The name of the specified big query dataset to write data back",
"type": "string"
},
"table": {
"description": "The name of the specified big query table to write data back",
"type": "string"
},
"type": {
"description": "Type name for this output type",
"enum": [
"bigquery"
],
"type": "string"
}
},
"required": [
"bucket",
"credentialId",
"dataset",
"table",
"type"
],
"type": "object"
},
{
"description": "Saves CSV data chunks to Amazon Cloud Storage S3",
"properties": {
"credentialId": {
"description": "Either the populated value of the field or [redacted] due to permission settings",
"oneOf": [
{
"description": "Use the specified credential to access the url",
"type": [
"string",
"null"
]
},
{
"enum": [
"[redacted]"
],
"type": "string"
}
]
},
"endpointUrl": {
"description": "Endpoint URL for the S3 connection (omit to use the default)",
"format": "url",
"type": "string",
"x-versionadded": "v2.29"
},
"format": {
"default": "csv",
"description": "Type of output file format",
"enum": [
"csv",
"parquet"
],
"type": "string",
"x-versionadded": "v2.25"
},
"partitionColumns": {
"description": "For Parquet directory-scoring only. The column names of the intake data of which to partition the dataset. Columns are partitioned in the order they are given. At least one value is required if scoring to a directory (meaning the output url ends with a slash (\"/\").",
"items": {
"type": "string"
},
"maxItems": 100,
"type": "array",
"x-versionadded": "v2.26"
},
"serverSideEncryption": {
"description": "Configure Server-Side Encryption for S3 output",
"properties": {
"algorithm": {
"description": "The server-side encryption algorithm used when storing this object in Amazon S3 (for example, AES256, aws:kms).",
"type": "string"
},
"customerAlgorithm": {
"description": "Specifies the algorithm to use to when encrypting the object (for example, AES256).",
"type": "string"
},
"customerKey": {
"description": "Specifies the customer-provided encryption key for Amazon S3 to use in encrypting data. This value is used to store the object and then it is discarded; Amazon S3 does not store the encryption key. The key must be appropriate for use with the algorithm specified in customerAlgorithm. The key must be sent as an base64 encoded string.",
"type": "string"
},
"kmsEncryptionContext": {
"description": "Specifies the Amazon Web Services KMS Encryption Context to use for object encryption. The value of this header is a base64-encoded UTF-8 string holding JSON with the encryption context key-value pairs.",
"type": "string"
},
"kmsKeyId": {
"description": "Specifies the ID of the symmetric customer managed key to use for object encryption.",
"type": "string"
}
},
"type": "object"
},
"type": {
"description": "Type name for this output type",
"enum": [
"s3"
],
"type": "string"
},
"url": {
"description": "URL for the CSV file",
"format": "url",
"type": "string"
}
},
"required": [
"type",
"url"
],
"type": "object"
},
{
"description": "Save CSV data chunks to Snowflake in bulk",
"properties": {
"catalog": {
"description": "The name of the specified database catalog to write output data to.",
"type": "string",
"x-versionadded": "v2.28"
},
"cloudStorageCredentialId": {
"description": "Either the populated value of the field or [redacted] due to permission settings",
"oneOf": [
{
"description": "The ID of the credential holding information about a user with write access to the cloud storage.",
"type": [
"string",
"null"
]
},
{
"enum": [
"[redacted]"
],
"type": "string"
}
]
},
"cloudStorageType": {
"default": "s3",
"description": "Type name for cloud storage",
"enum": [
"azure",
"gcp",
"s3"
],
"type": "string"
},
"createTableIfNotExists": {
"default": false,
"description": "Attempt to create the table first if no existing one is detected, before writing data with the strategy defined in the `statementType` parameter.",
"type": "boolean",
"x-versionadded": "v2.25"
},
"credentialId": {
"description": "Either the populated value of the field or [redacted] due to permission settings",
"oneOf": [
{
"description": "The ID of the credential holding information about a user with write access to the Snowflake data source.",
"type": [
"string",
"null"
]
},
{
"enum": [
"[redacted]"
],
"type": "string"
}
]
},
"dataStoreId": {
"description": "Either the populated value of the field or [redacted] due to permission settings",
"oneOf": [
{
"description": "ID of the data store to connect to",
"type": "string"
},
{
"enum": [
"[redacted]"
],
"type": "string"
}
]
},
"externalStage": {
"description": "External storage",
"type": "string"
},
"schema": {
"description": "The name of the specified database schema to write results to.",
"type": "string"
},
"statementType": {
"description": "The statement type to use when writing the results.",
"enum": [
"insert",
"create_table",
"createTable"
],
"type": "string"
},
"table": {
"description": "The name of the specified database table to write results to.",
"type": "string"
},
"type": {
"description": "Type name for this output type",
"enum": [
"snowflake"
],
"type": "string"
}
},
"required": [
"dataStoreId",
"externalStage",
"statementType",
"table",
"type"
],
"type": "object"
},
{
"description": "Save CSV data chunks to Azure Synapse in bulk",
"properties": {
"cloudStorageCredentialId": {
"description": "Either the populated value of the field or [redacted] due to permission settings",
"oneOf": [
{
"description": "The ID of the credential holding information about a user with write access to the cloud storage.",
"type": [
"string",
"null"
]
},
{
"enum": [
"[redacted]"
],
"type": "string"
}
]
},
"createTableIfNotExists": {
"default": false,
"description": "Attempt to create the table first if no existing one is detected, before writing data with the strategy defined in the `statementType` parameter.",
"type": "boolean",
"x-versionadded": "v2.25"
},
"credentialId": {
"description": "Either the populated value of the field or [redacted] due to permission settings",
"oneOf": [
{
"description": "The ID of the credential holding information about a user with write access to the JDBC data source.",
"type": [
"string",
"null"
]
},
{
"enum": [
"[redacted]"
],
"type": "string"
}
]
},
"dataStoreId": {
"description": "Either the populated value of the field or [redacted] due to permission settings",
"oneOf": [
{
"description": "ID of the data store to connect to",
"type": "string"
},
{
"enum": [
"[redacted]"
],
"type": "string"
}
]
},
"externalDataSource": {
"description": "External data source name",
"type": "string"
},
"schema": {
"description": "The name of the specified database schema to write results to.",
"type": "string"
},
"statementType": {
"description": "The statement type to use when writing the results.",
"enum": [
"insert",
"create_table",
"createTable"
],
"type": "string"
},
"table": {
"description": "The name of the specified database table to write results to.",
"type": "string"
},
"type": {
"description": "Type name for this output type",
"enum": [
"synapse"
],
"type": "string"
}
},
"required": [
"dataStoreId",
"externalDataSource",
"statementType",
"table",
"type"
],
"type": "object"
},
{
"properties": {
"path": {
"description": "Path to results on host filesystem",
"type": "string"
},
"type": {
"description": "Type name for this output type",
"enum": [
"filesystem"
],
"type": "string"
}
},
"required": [
"path",
"type"
],
"type": "object"
},
{
"description": "Save CSV data chunks to HTTP data endpoint",
"properties": {
"headers": {
"description": "Extra headers to send with the request",
"type": "object"
},
"method": {
"description": "Method to use when saving the CSV file",
"enum": [
"POST",
"PUT"
],
"type": "string"
},
"type": {
"description": "Type name for this output type",
"enum": [
"http"
],
"type": "string"
},
"url": {
"description": "URL for the CSV file",
"format": "url",
"type": "string"
}
},
"required": [
"method",
"type",
"url"
],
"type": "object"
},
{
"description": "Save CSV data chunks via JDBC",
"properties": {
"catalog": {
"description": "The name of the specified database catalog to write output data to.",
"type": "string",
"x-versionadded": "v2.22"
},
"commitInterval": {
"default": 600,
"description": "Defines a time interval in seconds between each commit is done to the JDBC source. If set to 0, the batch prediction operation will write the entire job before committing.",
"maximum": 86400,
"minimum": 0,
"type": "integer",
"x-versionadded": "v2.21"
},
"createTableIfNotExists": {
"default": false,
"description": "Attempt to create the table first if no existing one is detected, before writing data with the strategy defined in the `statementType` parameter.",
"type": "boolean",
"x-versionadded": "v2.24"
},
"credentialId": {
"description": "Either the populated value of the field or [redacted] due to permission settings",
"oneOf": [
{
"description": "The ID of the credential holding information about a user with write access to the JDBC data source.",
"type": [
"string",
"null"
]
},
{
"enum": [
"[redacted]"
],
"type": "string"
}
]
},
"dataStoreId": {
"description": "Either the populated value of the field or [redacted] due to permission settings",
"oneOf": [
{
"description": "ID of the data store to connect to",
"type": "string"
},
{
"enum": [
"[redacted]"
],
"type": "string"
}
]
},
"schema": {
"description": "The name of the specified database schema to write the results to.",
"type": "string"
},
"statementType": {
"description": "The statement type to use when writing the results. Deprecation Warning: Use of `create_table` is now discouraged. Use one of the other possibilities along with the parameter `createTableIfNotExists` set to `true`.",
"enum": [
"createTable",
"create_table",
"insert",
"insertUpdate",
"insert_update",
"update"
],
"type": "string"
},
"table": {
"description": "The name of the specified database table to write the results to.If this job is executed with a job definition, then template variables are available which will be substituted for timestamps: {{ current_run_timestamp }}, {{ last_completed_run_time }}, {{ last_scheduled_run_time }}, {{ next_scheduled_run_time }}, {{ current_run_time }}",
"type": "string"
},
"type": {
"description": "Type name for this intake type",
"enum": [
"jdbc"
],
"type": "string"
},
"updateColumns": {
"description": "The column names to be updated if statementType is set to either update or upsert.",
"items": {
"type": "string"
},
"maxItems": 100,
"type": "array"
},
"whereColumns": {
"description": "The column names to be used in the where clause if statementType is set to update or upsert.",
"items": {
"type": "string"
},
"maxItems": 100,
"type": "array"
}
},
"required": [
"dataStoreId",
"statementType",
"table",
"type"
],
"type": "object"
},
{
"description": "Save CSV data chunks to local file storage",
"properties": {
"type": {
"description": "Type name for this output type",
"enum": [
"local_file",
"localFile"
],
"type": "string"
}
},
"required": [
"type"
],
"type": "object"
},
{
"description": "Saves CSV data chunks to Datasphere using browser-datasphere",
"properties": {
"catalog": {
"description": "The name of the specified database catalog to read input data from.",
"type": "string"
},
"credentialId": {
"description": "Either the populated value of the field or [redacted] due to permission settings",
"oneOf": [
{
"description": "The ID of the credential holding information about a user with write access to the data destination.",
"type": "string"
},
{
"enum": [
"[redacted]"
],
"type": "string"
}
]
},
"dataStoreId": {
"description": "Either the populated value of the field or [redacted] due to permission settings",
"oneOf": [
{
"description": "The ID of the data store to connect to",
"type": "string"
},
{
"enum": [
"[redacted]"
],
"type": "string"
}
]
},
"schema": {
"description": "The name of the specified database schema to read input data from.",
"type": "string"
},
"table": {
"description": "The name of the specified database table to read input data from.",
"type": "string"
},
"type": {
"description": "The type name for this output type",
"enum": [
"datasphere"
],
"type": "string"
}
},
"required": [
"credentialId",
"dataStoreId",
"schema",
"table",
"type"
],
"type": "object",
"x-versionadded": "v2.35"
}
]
},
"passthroughColumns": {
"description": "Pass through columns from the original dataset",
"items": {
"description": "A column name from the original dataset to pass through to the resulting predictions",
"maxLength": 50,
"minLength": 1,
"type": "string"
},
"maxItems": 100,
"type": "array"
},
"passthroughColumnsSet": {
"description": "Pass through all columns from the original dataset",
"enum": [
"all"
],
"type": "string"
},
"pinnedModelId": {
"description": "Specify a model ID used for scoring",
"type": "string"
},
"predictionInstance": {
"description": "Override the default prediction instance from the deployment when scoring this job.",
"properties": {
"apiKey": {
"description": "By default, prediction requests will use the API key of the user that created the job. This allows you to make requests on behalf of other users.",
"type": "string"
},
"datarobotKey": {
"description": "If running a job against a prediction instance in the Managed AI Cloud, you must provide the organization level DataRobot-Key.",
"type": "string"
},
"hostName": {
"description": "Override the default host name of the deployment with this.",
"type": "string"
},
"sslEnabled": {
"default": true,
"description": "Use SSL (HTTPS) when communicating with the overriden prediction server.",
"type": "boolean"
}
},
"required": [
"hostName",
"sslEnabled"
],
"type": "object"
},
"predictionThreshold": {
"description": "Threshold is the point that sets the class boundary for a predicted value. The model classifies an observation below the threshold as FALSE, and an observation above the threshold as TRUE. In other words, DataRobot automatically assigns the positive class label to any prediction exceeding the threshold. This value can be set between 0.0 and 1.0.",
"maximum": 1,
"minimum": 0,
"type": "number"
},
"predictionWarningEnabled": {
"description": "Enable prediction warnings.",
"type": [
"boolean",
"null"
]
},
"redactedFields": {
"description": "A list of qualified field names from intake- and/or outputSettings that was redacted due to permissions and sharing settings. For example: intakeSettings.dataStoreId",
"items": {
"description": "Field names that are potentially redacted",
"type": "string"
},
"type": "array",
"x-versionadded": "v2.30"
},
"secondaryDatasetsConfigId": {
"description": "Configuration id for secondary datasets to use when making a prediction.",
"type": "string",
"x-versionadded": "v2.33"
},
"skipDriftTracking": {
"default": false,
"description": "Skip drift tracking for this job.",
"type": "boolean"
},
"thresholdHigh": {
"description": "Compute explanations for predictions above this threshold",
"type": "number"
},
"thresholdLow": {
"description": "Compute explanations for predictions below this threshold",
"type": "number"
},
"timeseriesSettings": {
"description": "Time Series settings included of this job is a Time Series job.",
"oneOf": [
{
"properties": {
"forecastPoint": {
"description": "Used for forecast predictions in order to override the inferred forecast point from the dataset.",
"format": "date-time",
"type": "string"
},
"relaxKnownInAdvanceFeaturesCheck": {
"default": false,
"description": "If activated, missing values in the known in advance features are allowed in the forecast window at prediction time. If omitted or false, missing values are not allowed.",
"type": "boolean"
},
"type": {
"description": "Forecast mode makes predictions using forecastPoint or rows in the dataset without target.",
"enum": [
"forecast"
],
"type": "string"
}
},
"required": [
"type"
],
"type": "object"
},
{
"properties": {
"predictionsEndDate": {
"description": "Used for historical predictions in order to override date to which predictions should be calculated. By default value will be inferred automatically from the dataset.",
"format": "date-time",
"type": "string"
},
"predictionsStartDate": {
"description": "Used for historical predictions in order to override date from which predictions should be calculated. By default value will be inferred automatically from the dataset.",
"format": "date-time",
"type": "string"
},
"relaxKnownInAdvanceFeaturesCheck": {
"default": false,
"description": "If activated, missing values in the known in advance features are allowed in the forecast window at prediction time. If omitted or false, missing values are not allowed.",
"type": "boolean"
},
"type": {
"description": "Historical mode enables bulk predictions which calculates predictions for all possible forecast points and forecast distances in the dataset within the predictionsStartDate/predictionsEndDate range.",
"enum": [
"historical"
],
"type": "string"
}
},
"required": [
"type"
],
"type": "object"
},
{
"properties": {
"relaxKnownInAdvanceFeaturesCheck": {
"default": false,
"description": "If activated, missing values in the known in advance features are allowed in the forecast window at prediction time. If omitted or false, missing values are not allowed.",
"type": "boolean"
},
"type": {
"description": "Forecast mode used for making predictions on subsets of training data.",
"enum": [
"training"
],
"type": "string"
}
},
"required": [
"type"
],
"type": "object"
}
],
"x-versionadded": "v2.20"
}
},
"required": [
"abortOnError",
"csvSettings",
"disableRowLevelErrorHandling",
"includePredictionStatus",
"includeProbabilities",
"includeProbabilitiesClasses",
"intakeSettings",
"maxExplanations",
"redactedFields",
"skipDriftTracking"
],
"type": "object"
},
"links": {
"description": "Links useful for this job",
"properties": {
"csvUpload": {
"description": "The URL used to upload the dataset for this job. Only available for localFile intake.",
"format": "url",
"type": "string"
},
"download": {
"description": "The URL used to download the results from this job. Only available for localFile outputs. Will be null if the download is not yet available.",
"type": [
"string",
"null"
]
},
"self": {
"description": "The URL used access this job.",
"format": "url",
"type": "string"
}
},
"required": [
"self"
],
"type": "object"
},
"logs": {
"description": "The job log.",
"items": {
"description": "A log line from the job log.",
"type": "string"
},
"type": "array"
},
"monitoringBatchId": {
"description": "Id of the monitoring batch created by this job. Only present if the job runs on a deployment with batch monitoring enabled.",
"type": [
"string",
"null"
],
"x-versionadded": "v2.33"
},
"percentageCompleted": {
"description": "Indicates job progress which is based on number of already processed rows in dataset",
"maximum": 100,
"minimum": 0,
"type": "number"
},
"queuePosition": {
"description": "To ensure a dedicated prediction instance is not overloaded, only one job will be run against it at a time. This is the number of jobs that are awaiting processing before this job start running. May not be available in all environments.",
"minimum": 0,
"type": [
"integer",
"null"
],
"x-versionadded": "v2.21"
},
"queued": {
"description": "The job has been put on the queue for execution.",
"type": "boolean",
"x-versionadded": "v2.26"
},
"resultsDeleted": {
"description": "Indicates if the job was subject to garbage collection and had its artifacts deleted (output files, if any, and scoring data on local storage)",
"type": "boolean",
"x-versionadded": "v2.24"
},
"scoredRows": {
"description": "Number of rows that have been used in prediction computation",
"minimum": 0,
"type": "integer"
},
"skippedRows": {
"description": "Number of rows that have been skipped during scoring. May contain non-zero value only in time-series predictions case if provided dataset contains more than required historical rows.",
"minimum": 0,
"type": "integer",
"x-versionadded": "v2.20"
},
"source": {
"description": "Source from which batch job was started",
"type": "string",
"x-versionadded": "v2.24"
},
"status": {
"description": "The current job status",
"enum": [
"INITIALIZING",
"RUNNING",
"COMPLETED",
"ABORTED",
"FAILED"
],
"type": "string"
},
"statusDetails": {
"description": "Explanation for current status",
"type": "string"
}
},
"required": [
"created",
"createdBy",
"elapsedTimeSec",
"failedRows",
"id",
"jobIntakeSize",
"jobOutputSize",
"jobSpec",
"links",
"logs",
"monitoringBatchId",
"percentageCompleted",
"queued",
"scoredRows",
"skippedRows",
"status",
"statusDetails"
],
"type": "object"
}
Properties
Name |
Type |
Required |
Restrictions |
Description |
batchPredictionJobDefinition |
BatchPredictionJobDefinitionResponse |
false |
|
The Batch Prediction Job Definition linking to this job, if any. |
created |
string(date-time) |
true |
|
When was this job created |
createdBy |
BatchPredictionCreatedBy |
true |
|
Who created this job |
elapsedTimeSec |
integer |
true |
minimum: 0
|
Number of seconds the job has been processing for |
failedRows |
integer |
true |
minimum: 0
|
Number of rows that have failed scoring |
hidden |
string(date-time) |
false |
|
When was this job was hidden last, blank if visible |
id |
string |
true |
|
The ID of the Batch Prediction job |
intakeDatasetDisplayName |
string,null |
false |
|
If applicable (e.g. for AI catalog), will contain the dataset name used for the intake dataset. |
jobIntakeSize |
integer,null |
true |
minimum: 0
|
Number of bytes in the intake dataset for this job |
jobOutputSize |
integer,null |
true |
minimum: 0
|
Number of bytes in the output dataset for this job |
jobSpec |
BatchPredictionJobSpecResponse |
true |
|
The job configuration used to create this job |
links |
BatchPredictionJobLinks |
true |
|
Links useful for this job |
logs |
[string] |
true |
|
The job log. |
monitoringBatchId |
string,null |
true |
|
Id of the monitoring batch created by this job. Only present if the job runs on a deployment with batch monitoring enabled. |
percentageCompleted |
number |
true |
maximum: 100 minimum: 0
|
Indicates job progress which is based on number of already processed rows in dataset |
queuePosition |
integer,null |
false |
minimum: 0
|
To ensure a dedicated prediction instance is not overloaded, only one job will be run against it at a time. This is the number of jobs that are awaiting processing before this job start running. May not be available in all environments. |
queued |
boolean |
true |
|
The job has been put on the queue for execution. |
resultsDeleted |
boolean |
false |
|
Indicates if the job was subject to garbage collection and had its artifacts deleted (output files, if any, and scoring data on local storage) |
scoredRows |
integer |
true |
minimum: 0
|
Number of rows that have been used in prediction computation |
skippedRows |
integer |
true |
minimum: 0
|
Number of rows that have been skipped during scoring. May contain non-zero value only in time-series predictions case if provided dataset contains more than required historical rows. |
source |
string |
false |
|
Source from which batch job was started |
status |
string |
true |
|
The current job status |
statusDetails |
string |
true |
|
Explanation for current status |
Enumerated Values
Property |
Value |
status |
[INITIALIZING , RUNNING , COMPLETED , ABORTED , FAILED ] |
BatchPredictionJobSpecResponse
{
"description": "The job configuration used to create this job",
"properties": {
"abortOnError": {
"default": true,
"description": "Should this job abort if too many errors are encountered",
"type": "boolean"
},
"batchJobType": {
"default": "prediction",
"description": "Batch job type.",
"enum": [
"monitoring",
"prediction"
],
"type": "string",
"x-versionadded": "v2.35"
},
"chunkSize": {
"default": "auto",
"description": "Which strategy should be used to determine the chunk size. Can be either a named strategy or a fixed size in bytes.",
"oneOf": [
{
"enum": [
"auto",
"fixed",
"dynamic"
],
"type": "string"
},
{
"maximum": 41943040,
"minimum": 20,
"type": "integer"
}
]
},
"columnNamesRemapping": {
"description": "Remap (rename or remove columns from) the output from this job",
"oneOf": [
{
"description": "Provide a dictionary with key/value pairs to remap (deprecated)",
"type": "object"
},
{
"description": "Provide a list of items to remap",
"items": {
"properties": {
"inputName": {
"description": "Rename column with this name",
"type": "string"
},
"outputName": {
"description": "Rename column to this name (leave as null to remove from the output)",
"type": [
"string",
"null"
]
}
},
"required": [
"inputName",
"outputName"
],
"type": "object"
},
"maxItems": 1000,
"type": "array"
}
]
},
"csvSettings": {
"description": "The CSV settings used for this job",
"properties": {
"delimiter": {
"default": ",",
"description": "CSV fields are delimited by this character. Use the string \"tab\" to denote TSV (TAB separated values).",
"oneOf": [
{
"enum": [
"tab"
],
"type": "string"
},
{
"maxLength": 1,
"minLength": 1,
"type": "string"
}
]
},
"encoding": {
"default": "utf-8",
"description": "The encoding to be used for intake and output. For example (but not limited to): \"shift_jis\", \"latin_1\" or \"mskanji\".",
"type": "string"
},
"quotechar": {
"default": "\"",
"description": "Fields containing the delimiter or newlines must be quoted using this character.",
"maxLength": 1,
"minLength": 1,
"type": "string"
}
},
"required": [
"delimiter",
"encoding",
"quotechar"
],
"type": "object"
},
"deploymentId": {
"description": "ID of deployment which is used in job for processing predictions dataset",
"type": "string"
},
"disableRowLevelErrorHandling": {
"default": false,
"description": "Skip row by row error handling",
"type": "boolean"
},
"explanationAlgorithm": {
"description": "Which algorithm will be used to calculate prediction explanations",
"enum": [
"shap",
"xemp"
],
"type": "string"
},
"explanationClassNames": {
"description": "List of class names that will be explained for each row for multiclass. Mutually exclusive with explanationNumTopClasses. If neither specified - we assume explanationNumTopClasses=1",
"items": {
"description": "Class name to explain",
"type": "string"
},
"maxItems": 10,
"minItems": 1,
"type": "array",
"x-versionadded": "v2.29"
},
"explanationNumTopClasses": {
"description": "Number of top predicted classes for each row that will be explained for multiclass. Mutually exclusive with explanationClassNames. If neither specified - we assume explanationNumTopClasses=1",
"maximum": 10,
"minimum": 1,
"type": "integer",
"x-versionadded": "v2.29"
},
"includePredictionStatus": {
"default": false,
"description": "Include prediction status column in the output",
"type": "boolean"
},
"includeProbabilities": {
"default": true,
"description": "Include probabilities for all classes",
"type": "boolean"
},
"includeProbabilitiesClasses": {
"default": [],
"description": "Include only probabilities for these specific class names.",
"items": {
"description": "Include probability for this class name",
"type": "string"
},
"maxItems": 100,
"type": "array"
},
"intakeSettings": {
"default": {
"type": "localFile"
},
"description": "The response option configured for this job",
"oneOf": [
{
"description": "Stream CSV data chunks from Azure",
"properties": {
"credentialId": {
"description": "Either the populated value of the field or [redacted] due to permission settings",
"oneOf": [
{
"description": "Use the specified credential to access the url",
"type": [
"string",
"null"
]
},
{
"enum": [
"[redacted]"
],
"type": "string"
}
]
},
"format": {
"default": "csv",
"description": "Type of input file format",
"enum": [
"csv",
"parquet"
],
"type": "string",
"x-versionadded": "v2.25"
},
"type": {
"description": "Type name for this intake type",
"enum": [
"azure"
],
"type": "string"
},
"url": {
"description": "URL for the CSV file",
"format": "url",
"type": "string"
}
},
"required": [
"type",
"url"
],
"type": "object"
},
{
"description": "Stream CSV data chunks from data stage storage",
"properties": {
"dataStageId": {
"description": "The ID of the data stage",
"type": "string"
},
"type": {
"description": "Type name for this intake type",
"enum": [
"dataStage"
],
"type": "string"
}
},
"required": [
"dataStageId",
"type"
],
"type": "object"
},
{
"description": "Stream CSV data chunks from AI catalog dataset",
"properties": {
"datasetId": {
"description": "Either the populated value of the field or [redacted] due to permission settings",
"oneOf": [
{
"description": "The ID of the AI catalog dataset",
"type": "string"
},
{
"enum": [
"[redacted]"
],
"type": "string"
}
]
},
"datasetVersionId": {
"description": "The ID of the AI catalog dataset version",
"type": "string"
},
"type": {
"description": "Type name for this intake type",
"enum": [
"dataset"
],
"type": "string"
}
},
"required": [
"datasetId",
"type"
],
"type": "object"
},
{
"description": "Stream CSV data chunks from Google Storage",
"properties": {
"credentialId": {
"description": "Either the populated value of the field or [redacted] due to permission settings",
"oneOf": [
{
"description": "Use the specified credential to access the url",
"type": [
"string",
"null"
]
},
{
"enum": [
"[redacted]"
],
"type": "string"
}
]
},
"format": {
"default": "csv",
"description": "Type of input file format",
"enum": [
"csv",
"parquet"
],
"type": "string",
"x-versionadded": "v2.25"
},
"type": {
"description": "Type name for this intake type",
"enum": [
"gcp"
],
"type": "string"
},
"url": {
"description": "URL for the CSV file",
"format": "url",
"type": "string"
}
},
"required": [
"type",
"url"
],
"type": "object"
},
{
"description": "Stream CSV data chunks from Big Query using GCS",
"properties": {
"bucket": {
"description": "The name of gcs bucket for data export",
"type": "string"
},
"credentialId": {
"description": "Either the populated value of the field or [redacted] due to permission settings",
"oneOf": [
{
"description": "The ID of the GCP credentials",
"type": "string"
},
{
"enum": [
"[redacted]"
],
"type": "string"
}
]
},
"dataset": {
"description": "The name of the specified big query dataset to read input data from",
"type": "string"
},
"table": {
"description": "The name of the specified big query table to read input data from",
"type": "string"
},
"type": {
"description": "Type name for this intake type",
"enum": [
"bigquery"
],
"type": "string"
}
},
"required": [
"bucket",
"credentialId",
"dataset",
"table",
"type"
],
"type": "object"
},
{
"description": "Stream CSV data chunks from Amazon Cloud Storage S3",
"properties": {
"credentialId": {
"description": "Either the populated value of the field or [redacted] due to permission settings",
"oneOf": [
{
"description": "Use the specified credential to access the url",
"type": [
"string",
"null"
]
},
{
"enum": [
"[redacted]"
],
"type": "string"
}
]
},
"endpointUrl": {
"description": "Endpoint URL for the S3 connection (omit to use the default)",
"format": "url",
"type": "string",
"x-versionadded": "v2.29"
},
"format": {
"default": "csv",
"description": "Type of input file format",
"enum": [
"csv",
"parquet"
],
"type": "string",
"x-versionadded": "v2.25"
},
"type": {
"description": "Type name for this intake type",
"enum": [
"s3"
],
"type": "string"
},
"url": {
"description": "URL for the CSV file",
"format": "url",
"type": "string"
}
},
"required": [
"type",
"url"
],
"type": "object"
},
{
"description": "Stream CSV data chunks from Snowflake",
"properties": {
"catalog": {
"description": "The name of the specified database catalog to read input data from.",
"type": "string",
"x-versionadded": "v2.28"
},
"cloudStorageCredentialId": {
"description": "Either the populated value of the field or [redacted] due to permission settings",
"oneOf": [
{
"description": "The ID of the credential holding information about a user with read access to the cloud storage.",
"type": [
"string",
"null"
]
},
{
"enum": [
"[redacted]"
],
"type": "string"
}
]
},
"cloudStorageType": {
"default": "s3",
"description": "Type name for cloud storage",
"enum": [
"azure",
"gcp",
"s3"
],
"type": "string"
},
"credentialId": {
"description": "Either the populated value of the field or [redacted] due to permission settings",
"oneOf": [
{
"description": "The ID of the credential holding information about a user with read access to the Snowflake data source.",
"type": [
"string",
"null"
]
},
{
"enum": [
"[redacted]"
],
"type": "string"
}
]
},
"dataStoreId": {
"description": "Either the populated value of the field or [redacted] due to permission settings",
"oneOf": [
{
"description": "ID of the data store to connect to",
"type": "string"
},
{
"enum": [
"[redacted]"
],
"type": "string"
}
]
},
"externalStage": {
"description": "External storage",
"type": "string"
},
"query": {
"description": "A self-supplied SELECT statement of the dataset you wish to score. Helpful for supplying a more fine-grained selection of data not achievable through specification of \"table\" and/or \"schema\" parameters exclusively.If this job is executed with a job definition, then template variables are available which will be substituted for timestamps: {{ current_run_timestamp }}, {{ last_completed_run_time }}, {{ last_scheduled_run_time }}, {{ next_scheduled_run_time }}, {{ current_run_time }}",
"type": "string"
},
"schema": {
"description": "The name of the specified database schema to read input data from.",
"type": "string"
},
"table": {
"description": "The name of the specified database table to read input data from.",
"type": "string"
},
"type": {
"description": "Type name for this intake type",
"enum": [
"snowflake"
],
"type": "string"
}
},
"required": [
"dataStoreId",
"externalStage",
"type"
],
"type": "object"
},
{
"description": "Stream CSV data chunks from Azure Synapse",
"properties": {
"cloudStorageCredentialId": {
"description": "Either the populated value of the field or [redacted] due to permission settings",
"oneOf": [
{
"description": "The ID of the Azure credential holding information about a user with read access to the cloud storage.",
"type": [
"string",
"null"
]
},
{
"enum": [
"[redacted]"
],
"type": "string"
}
]
},
"credentialId": {
"description": "Either the populated value of the field or [redacted] due to permission settings",
"oneOf": [
{
"description": "The ID of the credential holding information about a user with read access to the JDBC data source.",
"type": [
"string",
"null"
]
},
{
"enum": [
"[redacted]"
],
"type": "string"
}
]
},
"dataStoreId": {
"description": "Either the populated value of the field or [redacted] due to permission settings",
"oneOf": [
{
"description": "ID of the data store to connect to",
"type": "string"
},
{
"enum": [
"[redacted]"
],
"type": "string"
}
]
},
"externalDataSource": {
"description": "External datasource name",
"type": "string"
},
"query": {
"description": "A self-supplied SELECT statement of the dataset you wish to score. Helpful for supplying a more fine-grained selection of data not achievable through specification of \"table\" and/or \"schema\" parameters exclusively.If this job is executed with a job definition, then template variables are available which will be substituted for timestamps: {{ current_run_timestamp }}, {{ last_completed_run_time }}, {{ last_scheduled_run_time }}, {{ next_scheduled_run_time }}, {{ current_run_time }}",
"type": "string"
},
"schema": {
"description": "The name of the specified database schema to read input data from.",
"type": "string"
},
"table": {
"description": "The name of the specified database table to read input data from.",
"type": "string"
},
"type": {
"description": "Type name for this intake type",
"enum": [
"synapse"
],
"type": "string"
}
},
"required": [
"dataStoreId",
"externalDataSource",
"type"
],
"type": "object"
},
{
"description": "Stream CSV data chunks from DSS dataset",
"properties": {
"datasetId": {
"description": "Either the populated value of the field or [redacted] due to permission settings",
"oneOf": [
{
"description": "The ID of the dataset",
"type": "string"
},
{
"enum": [
"[redacted]"
],
"type": "string"
}
]
},
"partition": {
"default": null,
"description": "Partition used to predict",
"enum": [
"holdout",
"validation",
"allBacktests",
null
],
"type": [
"string",
"null"
]
},
"projectId": {
"description": "The ID of the project",
"type": "string"
},
"type": {
"description": "Type name for this intake type",
"enum": [
"dss"
],
"type": "string"
}
},
"required": [
"projectId",
"type"
],
"type": "object"
},
{
"properties": {
"path": {
"description": "Path to data on host filesystem",
"type": "string"
},
"type": {
"description": "Type name for this intake type",
"enum": [
"filesystem"
],
"type": "string"
}
},
"required": [
"path",
"type"
],
"type": "object"
},
{
"description": "Stream CSV data chunks from HTTP",
"properties": {
"type": {
"description": "Type name for this intake type",
"enum": [
"http"
],
"type": "string"
},
"url": {
"description": "URL for the CSV file",
"format": "url",
"type": "string"
}
},
"required": [
"type",
"url"
],
"type": "object"
},
{
"description": "Stream CSV data chunks from JDBC",
"properties": {
"catalog": {
"description": "The name of the specified database catalog to read input data from.",
"type": "string",
"x-versionadded": "v2.22"
},
"credentialId": {
"description": "Either the populated value of the field or [redacted] due to permission settings",
"oneOf": [
{
"description": "The ID of the credential holding information about a user with read access to the JDBC data source.",
"type": [
"string",
"null"
]
},
{
"enum": [
"[redacted]"
],
"type": "string"
}
]
},
"dataStoreId": {
"description": "Either the populated value of the field or [redacted] due to permission settings",
"oneOf": [
{
"description": "ID of the data store to connect to",
"type": "string"
},
{
"enum": [
"[redacted]"
],
"type": "string"
}
]
},
"fetchSize": {
"description": "A user specified fetch size. Changing it can be used to balance throughput and memory usage. Deprecated and ignored since v2.21.",
"maximum": 1000000,
"minimum": 1,
"type": "integer"
},
"query": {
"description": "A self-supplied SELECT statement of the dataset you wish to score. Helpful for supplying a more fine-grained selection of data not achievable through specification of \"table\" and/or \"schema\" parameters exclusively.If this job is executed with a job definition, then template variables are available which will be substituted for timestamps: {{ current_run_timestamp }}, {{ last_completed_run_time }}, {{ last_scheduled_run_time }}, {{ next_scheduled_run_time }}, {{ current_run_time }}",
"type": "string"
},
"schema": {
"description": "The name of the specified database schema to read input data from.",
"type": "string"
},
"table": {
"description": "The name of the specified database table to read input data from.",
"type": "string"
},
"type": {
"description": "Type name for this intake type",
"enum": [
"jdbc"
],
"type": "string"
}
},
"required": [
"dataStoreId",
"type"
],
"type": "object"
},
{
"description": "Stream CSV data chunks from local file storage",
"properties": {
"async": {
"description": "The default behavior (async: true) will still submit the job to the queue and start processing as soon as the upload is started.Setting it to false will postpone submitting the job to the queue until all data has been uploaded.This is helpful if the user is on a bad connection and bottlednecked by the upload speed. Instead of blocking the queue this will allow others to submit to the queue until the upload has finished.",
"type": [
"boolean",
"null"
],
"x-versionadded": "v2.28"
},
"multipart": {
"description": "specify if the data will be uploaded in multiple parts instead of a single file",
"type": "boolean",
"x-versionadded": "v2.27"
},
"type": {
"description": "Type name for this intake type",
"enum": [
"local_file",
"localFile"
],
"type": "string"
}
},
"required": [
"type"
],
"type": "object"
},
{
"description": "Stream CSV data chunks from Datasphere using browser-datasphere",
"properties": {
"catalog": {
"description": "The name of the specified database catalog to read input data from.",
"type": "string"
},
"credentialId": {
"description": "Either the populated value of the field or [redacted] due to permission settings",
"oneOf": [
{
"description": "The ID of the credential holding information about a user with read access to the data source.",
"type": "string"
},
{
"enum": [
"[redacted]"
],
"type": "string"
}
]
},
"dataStoreId": {
"description": "Either the populated value of the field or [redacted] due to permission settings",
"oneOf": [
{
"description": "ID of the data store to connect to",
"type": "string"
},
{
"enum": [
"[redacted]"
],
"type": "string"
}
]
},
"schema": {
"description": "The name of the specified database schema to read input data from.",
"type": "string"
},
"table": {
"description": "The name of the specified database table to read input data from.",
"type": "string"
},
"type": {
"description": "Type name for this intake type",
"enum": [
"datasphere"
],
"type": "string"
}
},
"required": [
"credentialId",
"dataStoreId",
"schema",
"table",
"type"
],
"type": "object",
"x-versionadded": "v2.35"
}
]
},
"maxExplanations": {
"default": 0,
"description": "Number of explanations requested. Will be ordered by strength.",
"maximum": 100,
"minimum": 0,
"type": "integer"
},
"modelId": {
"description": "ID of leaderboard model which is used in job for processing predictions dataset",
"type": "string",
"x-versionadded": "v2.28"
},
"modelPackageId": {
"description": "ID of model package from registry is used in job for processing predictions dataset",
"type": "string",
"x-versionadded": "v2.28"
},
"monitoringBatchPrefix": {
"description": "Name of the batch to create with this job",
"type": [
"string",
"null"
]
},
"numConcurrent": {
"description": "Number of simultaneous requests to run against the prediction instance",
"minimum": 1,
"type": "integer"
},
"outputSettings": {
"description": "The response option configured for this job",
"oneOf": [
{
"description": "Save CSV data chunks to Azure Blob Storage",
"properties": {
"credentialId": {
"description": "Either the populated value of the field or [redacted] due to permission settings",
"oneOf": [
{
"description": "Use the specified credential to access the url",
"type": [
"string",
"null"
]
},
{
"enum": [
"[redacted]"
],
"type": "string"
}
]
},
"format": {
"default": "csv",
"description": "Type of output file format",
"enum": [
"csv",
"parquet"
],
"type": "string",
"x-versionadded": "v2.25"
},
"partitionColumns": {
"description": "For Parquet directory-scoring only. The column names of the intake data of which to partition the dataset. Columns are partitioned in the order they are given. At least one value is required if scoring to a directory (meaning the output url ends with a slash (\"/\").",
"items": {
"type": "string"
},
"maxItems": 100,
"type": "array",
"x-versionadded": "v2.26"
},
"type": {
"description": "Type name for this output type",
"enum": [
"azure"
],
"type": "string"
},
"url": {
"description": "URL for the file or directory",
"format": "url",
"type": "string"
}
},
"required": [
"type",
"url"
],
"type": "object"
},
{
"description": "Save CSV data chunks to Google Storage",
"properties": {
"credentialId": {
"description": "Either the populated value of the field or [redacted] due to permission settings",
"oneOf": [
{
"description": "Use the specified credential to access the url",
"type": [
"string",
"null"
]
},
{
"enum": [
"[redacted]"
],
"type": "string"
}
]
},
"format": {
"default": "csv",
"description": "Type of input file format",
"enum": [
"csv",
"parquet"
],
"type": "string",
"x-versionadded": "v2.25"
},
"partitionColumns": {
"description": "For Parquet directory-scoring only. The column names of the intake data of which to partition the dataset. Columns are partitioned in the order they are given. At least one value is required if scoring to a directory (meaning the output url ends with a slash (\"/\").",
"items": {
"type": "string"
},
"maxItems": 100,
"type": "array",
"x-versionadded": "v2.26"
},
"type": {
"description": "Type name for this output type",
"enum": [
"gcp"
],
"type": "string"
},
"url": {
"description": "URL for the CSV file",
"format": "url",
"type": "string"
}
},
"required": [
"type",
"url"
],
"type": "object"
},
{
"description": "Save CSV data chunks to Google BigQuery in bulk",
"properties": {
"bucket": {
"description": "The name of gcs bucket for data loading",
"type": "string"
},
"credentialId": {
"description": "Either the populated value of the field or [redacted] due to permission settings",
"oneOf": [
{
"description": "The ID of the GCP credentials",
"type": "string"
},
{
"enum": [
"[redacted]"
],
"type": "string"
}
]
},
"dataset": {
"description": "The name of the specified big query dataset to write data back",
"type": "string"
},
"table": {
"description": "The name of the specified big query table to write data back",
"type": "string"
},
"type": {
"description": "Type name for this output type",
"enum": [
"bigquery"
],
"type": "string"
}
},
"required": [
"bucket",
"credentialId",
"dataset",
"table",
"type"
],
"type": "object"
},
{
"description": "Saves CSV data chunks to Amazon Cloud Storage S3",
"properties": {
"credentialId": {
"description": "Either the populated value of the field or [redacted] due to permission settings",
"oneOf": [
{
"description": "Use the specified credential to access the url",
"type": [
"string",
"null"
]
},
{
"enum": [
"[redacted]"
],
"type": "string"
}
]
},
"endpointUrl": {
"description": "Endpoint URL for the S3 connection (omit to use the default)",
"format": "url",
"type": "string",
"x-versionadded": "v2.29"
},
"format": {
"default": "csv",
"description": "Type of output file format",
"enum": [
"csv",
"parquet"
],
"type": "string",
"x-versionadded": "v2.25"
},
"partitionColumns": {
"description": "For Parquet directory-scoring only. The column names of the intake data of which to partition the dataset. Columns are partitioned in the order they are given. At least one value is required if scoring to a directory (meaning the output url ends with a slash (\"/\").",
"items": {
"type": "string"
},
"maxItems": 100,
"type": "array",
"x-versionadded": "v2.26"
},
"serverSideEncryption": {
"description": "Configure Server-Side Encryption for S3 output",
"properties": {
"algorithm": {
"description": "The server-side encryption algorithm used when storing this object in Amazon S3 (for example, AES256, aws:kms).",
"type": "string"
},
"customerAlgorithm": {
"description": "Specifies the algorithm to use to when encrypting the object (for example, AES256).",
"type": "string"
},
"customerKey": {
"description": "Specifies the customer-provided encryption key for Amazon S3 to use in encrypting data. This value is used to store the object and then it is discarded; Amazon S3 does not store the encryption key. The key must be appropriate for use with the algorithm specified in customerAlgorithm. The key must be sent as an base64 encoded string.",
"type": "string"
},
"kmsEncryptionContext": {
"description": "Specifies the Amazon Web Services KMS Encryption Context to use for object encryption. The value of this header is a base64-encoded UTF-8 string holding JSON with the encryption context key-value pairs.",
"type": "string"
},
"kmsKeyId": {
"description": "Specifies the ID of the symmetric customer managed key to use for object encryption.",
"type": "string"
}
},
"type": "object"
},
"type": {
"description": "Type name for this output type",
"enum": [
"s3"
],
"type": "string"
},
"url": {
"description": "URL for the CSV file",
"format": "url",
"type": "string"
}
},
"required": [
"type",
"url"
],
"type": "object"
},
{
"description": "Save CSV data chunks to Snowflake in bulk",
"properties": {
"catalog": {
"description": "The name of the specified database catalog to write output data to.",
"type": "string",
"x-versionadded": "v2.28"
},
"cloudStorageCredentialId": {
"description": "Either the populated value of the field or [redacted] due to permission settings",
"oneOf": [
{
"description": "The ID of the credential holding information about a user with write access to the cloud storage.",
"type": [
"string",
"null"
]
},
{
"enum": [
"[redacted]"
],
"type": "string"
}
]
},
"cloudStorageType": {
"default": "s3",
"description": "Type name for cloud storage",
"enum": [
"azure",
"gcp",
"s3"
],
"type": "string"
},
"createTableIfNotExists": {
"default": false,
"description": "Attempt to create the table first if no existing one is detected, before writing data with the strategy defined in the `statementType` parameter.",
"type": "boolean",
"x-versionadded": "v2.25"
},
"credentialId": {
"description": "Either the populated value of the field or [redacted] due to permission settings",
"oneOf": [
{
"description": "The ID of the credential holding information about a user with write access to the Snowflake data source.",
"type": [
"string",
"null"
]
},
{
"enum": [
"[redacted]"
],
"type": "string"
}
]
},
"dataStoreId": {
"description": "Either the populated value of the field or [redacted] due to permission settings",
"oneOf": [
{
"description": "ID of the data store to connect to",
"type": "string"
},
{
"enum": [
"[redacted]"
],
"type": "string"
}
]
},
"externalStage": {
"description": "External storage",
"type": "string"
},
"schema": {
"description": "The name of the specified database schema to write results to.",
"type": "string"
},
"statementType": {
"description": "The statement type to use when writing the results.",
"enum": [
"insert",
"create_table",
"createTable"
],
"type": "string"
},
"table": {
"description": "The name of the specified database table to write results to.",
"type": "string"
},
"type": {
"description": "Type name for this output type",
"enum": [
"snowflake"
],
"type": "string"
}
},
"required": [
"dataStoreId",
"externalStage",
"statementType",
"table",
"type"
],
"type": "object"
},
{
"description": "Save CSV data chunks to Azure Synapse in bulk",
"properties": {
"cloudStorageCredentialId": {
"description": "Either the populated value of the field or [redacted] due to permission settings",
"oneOf": [
{
"description": "The ID of the credential holding information about a user with write access to the cloud storage.",
"type": [
"string",
"null"
]
},
{
"enum": [
"[redacted]"
],
"type": "string"
}
]
},
"createTableIfNotExists": {
"default": false,
"description": "Attempt to create the table first if no existing one is detected, before writing data with the strategy defined in the `statementType` parameter.",
"type": "boolean",
"x-versionadded": "v2.25"
},
"credentialId": {
"description": "Either the populated value of the field or [redacted] due to permission settings",
"oneOf": [
{
"description": "The ID of the credential holding information about a user with write access to the JDBC data source.",
"type": [
"string",
"null"
]
},
{
"enum": [
"[redacted]"
],
"type": "string"
}
]
},
"dataStoreId": {
"description": "Either the populated value of the field or [redacted] due to permission settings",
"oneOf": [
{
"description": "ID of the data store to connect to",
"type": "string"
},
{
"enum": [
"[redacted]"
],
"type": "string"
}
]
},
"externalDataSource": {
"description": "External data source name",
"type": "string"
},
"schema": {
"description": "The name of the specified database schema to write results to.",
"type": "string"
},
"statementType": {
"description": "The statement type to use when writing the results.",
"enum": [
"insert",
"create_table",
"createTable"
],
"type": "string"
},
"table": {
"description": "The name of the specified database table to write results to.",
"type": "string"
},
"type": {
"description": "Type name for this output type",
"enum": [
"synapse"
],
"type": "string"
}
},
"required": [
"dataStoreId",
"externalDataSource",
"statementType",
"table",
"type"
],
"type": "object"
},
{
"properties": {
"path": {
"description": "Path to results on host filesystem",
"type": "string"
},
"type": {
"description": "Type name for this output type",
"enum": [
"filesystem"
],
"type": "string"
}
},
"required": [
"path",
"type"
],
"type": "object"
},
{
"description": "Save CSV data chunks to HTTP data endpoint",
"properties": {
"headers": {
"description": "Extra headers to send with the request",
"type": "object"
},
"method": {
"description": "Method to use when saving the CSV file",
"enum": [
"POST",
"PUT"
],
"type": "string"
},
"type": {
"description": "Type name for this output type",
"enum": [
"http"
],
"type": "string"
},
"url": {
"description": "URL for the CSV file",
"format": "url",
"type": "string"
}
},
"required": [
"method",
"type",
"url"
],
"type": "object"
},
{
"description": "Save CSV data chunks via JDBC",
"properties": {
"catalog": {
"description": "The name of the specified database catalog to write output data to.",
"type": "string",
"x-versionadded": "v2.22"
},
"commitInterval": {
"default": 600,
"description": "Defines a time interval in seconds between each commit is done to the JDBC source. If set to 0, the batch prediction operation will write the entire job before committing.",
"maximum": 86400,
"minimum": 0,
"type": "integer",
"x-versionadded": "v2.21"
},
"createTableIfNotExists": {
"default": false,
"description": "Attempt to create the table first if no existing one is detected, before writing data with the strategy defined in the `statementType` parameter.",
"type": "boolean",
"x-versionadded": "v2.24"
},
"credentialId": {
"description": "Either the populated value of the field or [redacted] due to permission settings",
"oneOf": [
{
"description": "The ID of the credential holding information about a user with write access to the JDBC data source.",
"type": [
"string",
"null"
]
},
{
"enum": [
"[redacted]"
],
"type": "string"
}
]
},
"dataStoreId": {
"description": "Either the populated value of the field or [redacted] due to permission settings",
"oneOf": [
{
"description": "ID of the data store to connect to",
"type": "string"
},
{
"enum": [
"[redacted]"
],
"type": "string"
}
]
},
"schema": {
"description": "The name of the specified database schema to write the results to.",
"type": "string"
},
"statementType": {
"description": "The statement type to use when writing the results. Deprecation Warning: Use of `create_table` is now discouraged. Use one of the other possibilities along with the parameter `createTableIfNotExists` set to `true`.",
"enum": [
"createTable",
"create_table",
"insert",
"insertUpdate",
"insert_update",
"update"
],
"type": "string"
},
"table": {
"description": "The name of the specified database table to write the results to.If this job is executed with a job definition, then template variables are available which will be substituted for timestamps: {{ current_run_timestamp }}, {{ last_completed_run_time }}, {{ last_scheduled_run_time }}, {{ next_scheduled_run_time }}, {{ current_run_time }}",
"type": "string"
},
"type": {
"description": "Type name for this intake type",
"enum": [
"jdbc"
],
"type": "string"
},
"updateColumns": {
"description": "The column names to be updated if statementType is set to either update or upsert.",
"items": {
"type": "string"
},
"maxItems": 100,
"type": "array"
},
"whereColumns": {
"description": "The column names to be used in the where clause if statementType is set to update or upsert.",
"items": {
"type": "string"
},
"maxItems": 100,
"type": "array"
}
},
"required": [
"dataStoreId",
"statementType",
"table",
"type"
],
"type": "object"
},
{
"description": "Save CSV data chunks to local file storage",
"properties": {
"type": {
"description": "Type name for this output type",
"enum": [
"local_file",
"localFile"
],
"type": "string"
}
},
"required": [
"type"
],
"type": "object"
},
{
"description": "Saves CSV data chunks to Datasphere using browser-datasphere",
"properties": {
"catalog": {
"description": "The name of the specified database catalog to read input data from.",
"type": "string"
},
"credentialId": {
"description": "Either the populated value of the field or [redacted] due to permission settings",
"oneOf": [
{
"description": "The ID of the credential holding information about a user with write access to the data destination.",
"type": "string"
},
{
"enum": [
"[redacted]"
],
"type": "string"
}
]
},
"dataStoreId": {
"description": "Either the populated value of the field or [redacted] due to permission settings",
"oneOf": [
{
"description": "The ID of the data store to connect to",
"type": "string"
},
{
"enum": [
"[redacted]"
],
"type": "string"
}
]
},
"schema": {
"description": "The name of the specified database schema to read input data from.",
"type": "string"
},
"table": {
"description": "The name of the specified database table to read input data from.",
"type": "string"
},
"type": {
"description": "The type name for this output type",
"enum": [
"datasphere"
],
"type": "string"
}
},
"required": [
"credentialId",
"dataStoreId",
"schema",
"table",
"type"
],
"type": "object",
"x-versionadded": "v2.35"
}
]
},
"passthroughColumns": {
"description": "Pass through columns from the original dataset",
"items": {
"description": "A column name from the original dataset to pass through to the resulting predictions",
"maxLength": 50,
"minLength": 1,
"type": "string"
},
"maxItems": 100,
"type": "array"
},
"passthroughColumnsSet": {
"description": "Pass through all columns from the original dataset",
"enum": [
"all"
],
"type": "string"
},
"pinnedModelId": {
"description": "Specify a model ID used for scoring",
"type": "string"
},
"predictionInstance": {
"description": "Override the default prediction instance from the deployment when scoring this job.",
"properties": {
"apiKey": {
"description": "By default, prediction requests will use the API key of the user that created the job. This allows you to make requests on behalf of other users.",
"type": "string"
},
"datarobotKey": {
"description": "If running a job against a prediction instance in the Managed AI Cloud, you must provide the organization level DataRobot-Key.",
"type": "string"
},
"hostName": {
"description": "Override the default host name of the deployment with this.",
"type": "string"
},
"sslEnabled": {
"default": true,
"description": "Use SSL (HTTPS) when communicating with the overriden prediction server.",
"type": "boolean"
}
},
"required": [
"hostName",
"sslEnabled"
],
"type": "object"
},
"predictionThreshold": {
"description": "Threshold is the point that sets the class boundary for a predicted value. The model classifies an observation below the threshold as FALSE, and an observation above the threshold as TRUE. In other words, DataRobot automatically assigns the positive class label to any prediction exceeding the threshold. This value can be set between 0.0 and 1.0.",
"maximum": 1,
"minimum": 0,
"type": "number"
},
"predictionWarningEnabled": {
"description": "Enable prediction warnings.",
"type": [
"boolean",
"null"
]
},
"redactedFields": {
"description": "A list of qualified field names from intake- and/or outputSettings that was redacted due to permissions and sharing settings. For example: intakeSettings.dataStoreId",
"items": {
"description": "Field names that are potentially redacted",
"type": "string"
},
"type": "array",
"x-versionadded": "v2.30"
},
"secondaryDatasetsConfigId": {
"description": "Configuration id for secondary datasets to use when making a prediction.",
"type": "string",
"x-versionadded": "v2.33"
},
"skipDriftTracking": {
"default": false,
"description": "Skip drift tracking for this job.",
"type": "boolean"
},
"thresholdHigh": {
"description": "Compute explanations for predictions above this threshold",
"type": "number"
},
"thresholdLow": {
"description": "Compute explanations for predictions below this threshold",
"type": "number"
},
"timeseriesSettings": {
"description": "Time Series settings included of this job is a Time Series job.",
"oneOf": [
{
"properties": {
"forecastPoint": {
"description": "Used for forecast predictions in order to override the inferred forecast point from the dataset.",
"format": "date-time",
"type": "string"
},
"relaxKnownInAdvanceFeaturesCheck": {
"default": false,
"description": "If activated, missing values in the known in advance features are allowed in the forecast window at prediction time. If omitted or false, missing values are not allowed.",
"type": "boolean"
},
"type": {
"description": "Forecast mode makes predictions using forecastPoint or rows in the dataset without target.",
"enum": [
"forecast"
],
"type": "string"
}
},
"required": [
"type"
],
"type": "object"
},
{
"properties": {
"predictionsEndDate": {
"description": "Used for historical predictions in order to override date to which predictions should be calculated. By default value will be inferred automatically from the dataset.",
"format": "date-time",
"type": "string"
},
"predictionsStartDate": {
"description": "Used for historical predictions in order to override date from which predictions should be calculated. By default value will be inferred automatically from the dataset.",
"format": "date-time",
"type": "string"
},
"relaxKnownInAdvanceFeaturesCheck": {
"default": false,
"description": "If activated, missing values in the known in advance features are allowed in the forecast window at prediction time. If omitted or false, missing values are not allowed.",
"type": "boolean"
},
"type": {
"description": "Historical mode enables bulk predictions which calculates predictions for all possible forecast points and forecast distances in the dataset within the predictionsStartDate/predictionsEndDate range.",
"enum": [
"historical"
],
"type": "string"
}
},
"required": [
"type"
],
"type": "object"
},
{
"properties": {
"relaxKnownInAdvanceFeaturesCheck": {
"default": false,
"description": "If activated, missing values in the known in advance features are allowed in the forecast window at prediction time. If omitted or false, missing values are not allowed.",
"type": "boolean"
},
"type": {
"description": "Forecast mode used for making predictions on subsets of training data.",
"enum": [
"training"
],
"type": "string"
}
},
"required": [
"type"
],
"type": "object"
}
],
"x-versionadded": "v2.20"
}
},
"required": [
"abortOnError",
"csvSettings",
"disableRowLevelErrorHandling",
"includePredictionStatus",
"includeProbabilities",
"includeProbabilitiesClasses",
"intakeSettings",
"maxExplanations",
"redactedFields",
"skipDriftTracking"
],
"type": "object"
}
The job configuration used to create this job
Properties
Name |
Type |
Required |
Restrictions |
Description |
abortOnError |
boolean |
true |
|
Should this job abort if too many errors are encountered |
batchJobType |
string |
false |
|
Batch job type. |
chunkSize |
any |
false |
|
Which strategy should be used to determine the chunk size. Can be either a named strategy or a fixed size in bytes. |
oneOf
Name |
Type |
Required |
Restrictions |
Description |
» anonymous |
string |
false |
|
none |
xor
Name |
Type |
Required |
Restrictions |
Description |
» anonymous |
integer |
false |
maximum: 41943040 minimum: 20
|
none |
continued
Name |
Type |
Required |
Restrictions |
Description |
columnNamesRemapping |
any |
false |
|
Remap (rename or remove columns from) the output from this job |
oneOf
Name |
Type |
Required |
Restrictions |
Description |
» anonymous |
object |
false |
|
Provide a dictionary with key/value pairs to remap (deprecated) |
xor
Name |
Type |
Required |
Restrictions |
Description |
» anonymous |
[BatchPredictionJobRemapping] |
false |
maxItems: 1000
|
Provide a list of items to remap |
continued
Name |
Type |
Required |
Restrictions |
Description |
csvSettings |
BatchPredictionJobCSVSettings |
true |
|
The CSV settings used for this job |
deploymentId |
string |
false |
|
ID of deployment which is used in job for processing predictions dataset |
disableRowLevelErrorHandling |
boolean |
true |
|
Skip row by row error handling |
explanationAlgorithm |
string |
false |
|
Which algorithm will be used to calculate prediction explanations |
explanationClassNames |
[string] |
false |
maxItems: 10 minItems: 1
|
List of class names that will be explained for each row for multiclass. Mutually exclusive with explanationNumTopClasses. If neither specified - we assume explanationNumTopClasses=1 |
explanationNumTopClasses |
integer |
false |
maximum: 10 minimum: 1
|
Number of top predicted classes for each row that will be explained for multiclass. Mutually exclusive with explanationClassNames. If neither specified - we assume explanationNumTopClasses=1 |
includePredictionStatus |
boolean |
true |
|
Include prediction status column in the output |
includeProbabilities |
boolean |
true |
|
Include probabilities for all classes |
includeProbabilitiesClasses |
[string] |
true |
maxItems: 100
|
Include only probabilities for these specific class names. |
intakeSettings |
any |
true |
|
The response option configured for this job |
oneOf
Name |
Type |
Required |
Restrictions |
Description |
» anonymous |
AzureDataStreamer |
false |
|
Stream CSV data chunks from Azure |
xor
Name |
Type |
Required |
Restrictions |
Description |
» anonymous |
DataStageDataStreamer |
false |
|
Stream CSV data chunks from data stage storage |
xor
Name |
Type |
Required |
Restrictions |
Description |
» anonymous |
CatalogDataStreamer |
false |
|
Stream CSV data chunks from AI catalog dataset |
xor
Name |
Type |
Required |
Restrictions |
Description |
» anonymous |
GCPDataStreamer |
false |
|
Stream CSV data chunks from Google Storage |
xor
Name |
Type |
Required |
Restrictions |
Description |
» anonymous |
BigQueryDataStreamer |
false |
|
Stream CSV data chunks from Big Query using GCS |
xor
Name |
Type |
Required |
Restrictions |
Description |
» anonymous |
S3DataStreamer |
false |
|
Stream CSV data chunks from Amazon Cloud Storage S3 |
xor
Name |
Type |
Required |
Restrictions |
Description |
» anonymous |
SnowflakeDataStreamer |
false |
|
Stream CSV data chunks from Snowflake |
xor
Name |
Type |
Required |
Restrictions |
Description |
» anonymous |
SynapseDataStreamer |
false |
|
Stream CSV data chunks from Azure Synapse |
xor
Name |
Type |
Required |
Restrictions |
Description |
» anonymous |
DSSDataStreamer |
false |
|
Stream CSV data chunks from DSS dataset |
xor
xor
Name |
Type |
Required |
Restrictions |
Description |
» anonymous |
HTTPDataStreamer |
false |
|
Stream CSV data chunks from HTTP |
xor
Name |
Type |
Required |
Restrictions |
Description |
» anonymous |
JDBCDataStreamer |
false |
|
Stream CSV data chunks from JDBC |
xor
Name |
Type |
Required |
Restrictions |
Description |
» anonymous |
LocalFileDataStreamer |
false |
|
Stream CSV data chunks from local file storage |
xor
Name |
Type |
Required |
Restrictions |
Description |
» anonymous |
DatasphereDataStreamer |
false |
|
Stream CSV data chunks from Datasphere using browser-datasphere |
continued
Name |
Type |
Required |
Restrictions |
Description |
maxExplanations |
integer |
true |
maximum: 100 minimum: 0
|
Number of explanations requested. Will be ordered by strength. |
modelId |
string |
false |
|
ID of leaderboard model which is used in job for processing predictions dataset |
modelPackageId |
string |
false |
|
ID of model package from registry is used in job for processing predictions dataset |
monitoringBatchPrefix |
string,null |
false |
|
Name of the batch to create with this job |
numConcurrent |
integer |
false |
minimum: 1
|
Number of simultaneous requests to run against the prediction instance |
outputSettings |
any |
false |
|
The response option configured for this job |
oneOf
Name |
Type |
Required |
Restrictions |
Description |
» anonymous |
AzureOutputAdaptor |
false |
|
Save CSV data chunks to Azure Blob Storage |
xor
Name |
Type |
Required |
Restrictions |
Description |
» anonymous |
GCPOutputAdaptor |
false |
|
Save CSV data chunks to Google Storage |
xor
Name |
Type |
Required |
Restrictions |
Description |
» anonymous |
BigQueryOutputAdaptor |
false |
|
Save CSV data chunks to Google BigQuery in bulk |
xor
Name |
Type |
Required |
Restrictions |
Description |
» anonymous |
S3OutputAdaptor |
false |
|
Saves CSV data chunks to Amazon Cloud Storage S3 |
xor
Name |
Type |
Required |
Restrictions |
Description |
» anonymous |
SnowflakeOutputAdaptor |
false |
|
Save CSV data chunks to Snowflake in bulk |
xor
Name |
Type |
Required |
Restrictions |
Description |
» anonymous |
SynapseOutputAdaptor |
false |
|
Save CSV data chunks to Azure Synapse in bulk |
xor
xor
Name |
Type |
Required |
Restrictions |
Description |
» anonymous |
HttpOutputAdaptor |
false |
|
Save CSV data chunks to HTTP data endpoint |
xor
Name |
Type |
Required |
Restrictions |
Description |
» anonymous |
JdbcOutputAdaptor |
false |
|
Save CSV data chunks via JDBC |
xor
Name |
Type |
Required |
Restrictions |
Description |
» anonymous |
LocalFileOutputAdaptor |
false |
|
Save CSV data chunks to local file storage |
xor
Name |
Type |
Required |
Restrictions |
Description |
» anonymous |
DatasphereOutputAdatpor |
false |
|
Saves CSV data chunks to Datasphere using browser-datasphere |
continued
Name |
Type |
Required |
Restrictions |
Description |
passthroughColumns |
[string] |
false |
maxItems: 100
|
Pass through columns from the original dataset |
passthroughColumnsSet |
string |
false |
|
Pass through all columns from the original dataset |
pinnedModelId |
string |
false |
|
Specify a model ID used for scoring |
predictionInstance |
BatchPredictionJobPredictionInstance |
false |
|
Override the default prediction instance from the deployment when scoring this job. |
predictionThreshold |
number |
false |
maximum: 1 minimum: 0
|
Threshold is the point that sets the class boundary for a predicted value. The model classifies an observation below the threshold as FALSE, and an observation above the threshold as TRUE. In other words, DataRobot automatically assigns the positive class label to any prediction exceeding the threshold. This value can be set between 0.0 and 1.0. |
predictionWarningEnabled |
boolean,null |
false |
|
Enable prediction warnings. |
redactedFields |
[string] |
true |
|
A list of qualified field names from intake- and/or outputSettings that was redacted due to permissions and sharing settings. For example: intakeSettings.dataStoreId |
secondaryDatasetsConfigId |
string |
false |
|
Configuration id for secondary datasets to use when making a prediction. |
skipDriftTracking |
boolean |
true |
|
Skip drift tracking for this job. |
thresholdHigh |
number |
false |
|
Compute explanations for predictions above this threshold |
thresholdLow |
number |
false |
|
Compute explanations for predictions below this threshold |
timeseriesSettings |
any |
false |
|
Time Series settings included of this job is a Time Series job. |
oneOf
xor
xor
Enumerated Values
Property |
Value |
batchJobType |
[monitoring , prediction ] |
anonymous |
[auto , fixed , dynamic ] |
explanationAlgorithm |
[shap , xemp ] |
passthroughColumnsSet |
all |
BatchPredictionJobTimeSeriesSettingsForecast
{
"properties": {
"forecastPoint": {
"description": "Used for forecast predictions in order to override the inferred forecast point from the dataset.",
"format": "date-time",
"type": "string"
},
"relaxKnownInAdvanceFeaturesCheck": {
"default": false,
"description": "If activated, missing values in the known in advance features are allowed in the forecast window at prediction time. If omitted or false, missing values are not allowed.",
"type": "boolean"
},
"type": {
"description": "Forecast mode makes predictions using forecastPoint or rows in the dataset without target.",
"enum": [
"forecast"
],
"type": "string"
}
},
"required": [
"type"
],
"type": "object"
}
Properties
Name |
Type |
Required |
Restrictions |
Description |
forecastPoint |
string(date-time) |
false |
|
Used for forecast predictions in order to override the inferred forecast point from the dataset. |
relaxKnownInAdvanceFeaturesCheck |
boolean |
false |
|
If activated, missing values in the known in advance features are allowed in the forecast window at prediction time. If omitted or false, missing values are not allowed. |
type |
string |
true |
|
Forecast mode makes predictions using forecastPoint or rows in the dataset without target. |
Enumerated Values
Property |
Value |
type |
forecast |
BatchPredictionJobTimeSeriesSettingsForecastWithPolicy
{
"properties": {
"forecastPointPolicy": {
"description": "Forecast point policy",
"properties": {
"configuration": {
"description": "Customize if forecast point based on job run time needs to be shifted.",
"properties": {
"offset": {
"description": "Offset to apply to scheduled run time of the job in a ISO-8601 format toobtain a relative forecast point. Example of the positive offset 'P2DT5H3M', example of the negative offset '-P2DT5H4M'",
"format": "offset",
"type": "string"
}
},
"required": [
"offset"
],
"type": "object"
},
"type": {
"description": "Type of the forecast point policy. Forecast point will be based on the scheduled run time of the job or the current moment in UTC if job was launched manually. Run time can be adjusted backwards or forwards.",
"enum": [
"jobRunTimeBased"
],
"type": "string"
}
},
"required": [
"type"
],
"type": "object"
},
"relaxKnownInAdvanceFeaturesCheck": {
"default": false,
"description": "If activated, missing values in the known in advance features are allowed in the forecast window at prediction time. If omitted or false, missing values are not allowed.",
"type": "boolean"
},
"type": {
"description": "Forecast mode makes predictions using forecastPoint or rows in the dataset without target.",
"enum": [
"forecast"
],
"type": "string"
}
},
"required": [
"forecastPointPolicy",
"type"
],
"type": "object"
}
Properties
Name |
Type |
Required |
Restrictions |
Description |
forecastPointPolicy |
JobRunTimeBasedForecastPointPolicy |
true |
|
Forecast point policy |
relaxKnownInAdvanceFeaturesCheck |
boolean |
false |
|
If activated, missing values in the known in advance features are allowed in the forecast window at prediction time. If omitted or false, missing values are not allowed. |
type |
string |
true |
|
Forecast mode makes predictions using forecastPoint or rows in the dataset without target. |
Enumerated Values
Property |
Value |
type |
forecast |
BatchPredictionJobTimeSeriesSettingsHistorical
{
"properties": {
"predictionsEndDate": {
"description": "Used for historical predictions in order to override date to which predictions should be calculated. By default value will be inferred automatically from the dataset.",
"format": "date-time",
"type": "string"
},
"predictionsStartDate": {
"description": "Used for historical predictions in order to override date from which predictions should be calculated. By default value will be inferred automatically from the dataset.",
"format": "date-time",
"type": "string"
},
"relaxKnownInAdvanceFeaturesCheck": {
"default": false,
"description": "If activated, missing values in the known in advance features are allowed in the forecast window at prediction time. If omitted or false, missing values are not allowed.",
"type": "boolean"
},
"type": {
"description": "Historical mode enables bulk predictions which calculates predictions for all possible forecast points and forecast distances in the dataset within the predictionsStartDate/predictionsEndDate range.",
"enum": [
"historical"
],
"type": "string"
}
},
"required": [
"type"
],
"type": "object"
}
Properties
Name |
Type |
Required |
Restrictions |
Description |
predictionsEndDate |
string(date-time) |
false |
|
Used for historical predictions in order to override date to which predictions should be calculated. By default value will be inferred automatically from the dataset. |
predictionsStartDate |
string(date-time) |
false |
|
Used for historical predictions in order to override date from which predictions should be calculated. By default value will be inferred automatically from the dataset. |
relaxKnownInAdvanceFeaturesCheck |
boolean |
false |
|
If activated, missing values in the known in advance features are allowed in the forecast window at prediction time. If omitted or false, missing values are not allowed. |
type |
string |
true |
|
Historical mode enables bulk predictions which calculates predictions for all possible forecast points and forecast distances in the dataset within the predictionsStartDate/predictionsEndDate range. |
Enumerated Values
Property |
Value |
type |
historical |
BatchPredictionJobTimeSeriesSettingsTraining
{
"properties": {
"relaxKnownInAdvanceFeaturesCheck": {
"default": false,
"description": "If activated, missing values in the known in advance features are allowed in the forecast window at prediction time. If omitted or false, missing values are not allowed.",
"type": "boolean"
},
"type": {
"description": "Forecast mode used for making predictions on subsets of training data.",
"enum": [
"training"
],
"type": "string"
}
},
"required": [
"type"
],
"type": "object"
}
Properties
Name |
Type |
Required |
Restrictions |
Description |
relaxKnownInAdvanceFeaturesCheck |
boolean |
false |
|
If activated, missing values in the known in advance features are allowed in the forecast window at prediction time. If omitted or false, missing values are not allowed. |
type |
string |
true |
|
Forecast mode used for making predictions on subsets of training data. |
Enumerated Values
Property |
Value |
type |
training |
BatchPredictionJobUpdate
{
"properties": {
"aborted": {
"description": "Time when job abortion happened",
"format": "date-time",
"type": [
"string",
"null"
],
"x-versionadded": "v2.26"
},
"completed": {
"description": "Time when job completed scoring",
"format": "date-time",
"type": [
"string",
"null"
],
"x-versionadded": "v2.26"
},
"failedRows": {
"description": "Number of rows that have failed scoring",
"type": "integer",
"x-versionadded": "v2.26"
},
"hidden": {
"description": "Hides or unhides the job from the job list",
"type": "boolean",
"x-versionadded": "v2.24"
},
"jobIntakeSize": {
"description": "Number of bytes in the intake dataset for this job",
"type": [
"integer",
"null"
],
"x-versionadded": "v2.26"
},
"jobOutputSize": {
"description": "Number of bytes in the output dataset for this job",
"type": [
"integer",
"null"
],
"x-versionadded": "v2.26"
},
"logs": {
"description": "The job log.",
"items": {
"description": "A log line from the job log.",
"type": "string"
},
"type": "array",
"x-versionadded": "v2.26"
},
"scoredRows": {
"description": "Number of rows that have been used in prediction computation",
"type": "integer",
"x-versionadded": "v2.26"
},
"skippedRows": {
"description": "Number of rows that have been skipped during scoring. May contain non-zero value only in time-series predictions case if provided dataset contains more than required historical rows.",
"type": "integer",
"x-versionadded": "v2.26"
},
"started": {
"description": "Time when job scoring begin",
"format": "date-time",
"type": [
"string",
"null"
],
"x-versionadded": "v2.26"
},
"status": {
"description": "The current job status",
"enum": [
"INITIALIZING",
"RUNNING",
"COMPLETED",
"ABORTED",
"FAILED"
],
"type": "string",
"x-versionadded": "v2.26"
}
},
"type": "object"
}
Properties
Name |
Type |
Required |
Restrictions |
Description |
aborted |
string,null(date-time) |
false |
|
Time when job abortion happened |
completed |
string,null(date-time) |
false |
|
Time when job completed scoring |
failedRows |
integer |
false |
|
Number of rows that have failed scoring |
hidden |
boolean |
false |
|
Hides or unhides the job from the job list |
jobIntakeSize |
integer,null |
false |
|
Number of bytes in the intake dataset for this job |
jobOutputSize |
integer,null |
false |
|
Number of bytes in the output dataset for this job |
logs |
[string] |
false |
|
The job log. |
scoredRows |
integer |
false |
|
Number of rows that have been used in prediction computation |
skippedRows |
integer |
false |
|
Number of rows that have been skipped during scoring. May contain non-zero value only in time-series predictions case if provided dataset contains more than required historical rows. |
started |
string,null(date-time) |
false |
|
Time when job scoring begin |
status |
string |
false |
|
The current job status |
Enumerated Values
Property |
Value |
status |
[INITIALIZING , RUNNING , COMPLETED , ABORTED , FAILED ] |
BigQueryDataStreamer
{
"description": "Stream CSV data chunks from Big Query using GCS",
"properties": {
"bucket": {
"description": "The name of gcs bucket for data export",
"type": "string"
},
"credentialId": {
"description": "Either the populated value of the field or [redacted] due to permission settings",
"oneOf": [
{
"description": "The ID of the GCP credentials",
"type": "string"
},
{
"enum": [
"[redacted]"
],
"type": "string"
}
]
},
"dataset": {
"description": "The name of the specified big query dataset to read input data from",
"type": "string"
},
"table": {
"description": "The name of the specified big query table to read input data from",
"type": "string"
},
"type": {
"description": "Type name for this intake type",
"enum": [
"bigquery"
],
"type": "string"
}
},
"required": [
"bucket",
"credentialId",
"dataset",
"table",
"type"
],
"type": "object"
}
Stream CSV data chunks from Big Query using GCS
Properties
Name |
Type |
Required |
Restrictions |
Description |
bucket |
string |
true |
|
The name of gcs bucket for data export |
credentialId |
any |
true |
|
Either the populated value of the field or [redacted] due to permission settings |
oneOf
Name |
Type |
Required |
Restrictions |
Description |
» anonymous |
string |
false |
|
The ID of the GCP credentials |
xor
Name |
Type |
Required |
Restrictions |
Description |
» anonymous |
string |
false |
|
none |
continued
Name |
Type |
Required |
Restrictions |
Description |
dataset |
string |
true |
|
The name of the specified big query dataset to read input data from |
table |
string |
true |
|
The name of the specified big query table to read input data from |
type |
string |
true |
|
Type name for this intake type |
Enumerated Values
Property |
Value |
anonymous |
[redacted] |
type |
bigquery |
BigQueryIntake
{
"description": "Stream CSV data chunks from Big Query using GCS",
"properties": {
"bucket": {
"description": "The name of gcs bucket for data export",
"type": "string"
},
"credentialId": {
"description": "The ID of the GCP credentials",
"type": "string"
},
"dataset": {
"description": "The name of the specified big query dataset to read input data from",
"type": "string"
},
"table": {
"description": "The name of the specified big query table to read input data from",
"type": "string"
},
"type": {
"description": "Type name for this intake type",
"enum": [
"bigquery"
],
"type": "string"
}
},
"required": [
"bucket",
"credentialId",
"dataset",
"table",
"type"
],
"type": "object"
}
Stream CSV data chunks from Big Query using GCS
Properties
Name |
Type |
Required |
Restrictions |
Description |
bucket |
string |
true |
|
The name of gcs bucket for data export |
credentialId |
string |
true |
|
The ID of the GCP credentials |
dataset |
string |
true |
|
The name of the specified big query dataset to read input data from |
table |
string |
true |
|
The name of the specified big query table to read input data from |
type |
string |
true |
|
Type name for this intake type |
Enumerated Values
Property |
Value |
type |
bigquery |
BigQueryOutput
{
"description": "Save CSV data chunks to Google BigQuery in bulk",
"properties": {
"bucket": {
"description": "The name of gcs bucket for data loading",
"type": "string"
},
"credentialId": {
"description": "The ID of the GCP credentials",
"type": "string"
},
"dataset": {
"description": "The name of the specified big query dataset to write data back",
"type": "string"
},
"table": {
"description": "The name of the specified big query table to write data back",
"type": "string"
},
"type": {
"description": "Type name for this output type",
"enum": [
"bigquery"
],
"type": "string"
}
},
"required": [
"bucket",
"credentialId",
"dataset",
"table",
"type"
],
"type": "object"
}
Save CSV data chunks to Google BigQuery in bulk
Properties
Name |
Type |
Required |
Restrictions |
Description |
bucket |
string |
true |
|
The name of gcs bucket for data loading |
credentialId |
string |
true |
|
The ID of the GCP credentials |
dataset |
string |
true |
|
The name of the specified big query dataset to write data back |
table |
string |
true |
|
The name of the specified big query table to write data back |
type |
string |
true |
|
Type name for this output type |
Enumerated Values
Property |
Value |
type |
bigquery |
BigQueryOutputAdaptor
{
"description": "Save CSV data chunks to Google BigQuery in bulk",
"properties": {
"bucket": {
"description": "The name of gcs bucket for data loading",
"type": "string"
},
"credentialId": {
"description": "Either the populated value of the field or [redacted] due to permission settings",
"oneOf": [
{
"description": "The ID of the GCP credentials",
"type": "string"
},
{
"enum": [
"[redacted]"
],
"type": "string"
}
]
},
"dataset": {
"description": "The name of the specified big query dataset to write data back",
"type": "string"
},
"table": {
"description": "The name of the specified big query table to write data back",
"type": "string"
},
"type": {
"description": "Type name for this output type",
"enum": [
"bigquery"
],
"type": "string"
}
},
"required": [
"bucket",
"credentialId",
"dataset",
"table",
"type"
],
"type": "object"
}
Save CSV data chunks to Google BigQuery in bulk
Properties
Name |
Type |
Required |
Restrictions |
Description |
bucket |
string |
true |
|
The name of gcs bucket for data loading |
credentialId |
any |
true |
|
Either the populated value of the field or [redacted] due to permission settings |
oneOf
Name |
Type |
Required |
Restrictions |
Description |
» anonymous |
string |
false |
|
The ID of the GCP credentials |
xor
Name |
Type |
Required |
Restrictions |
Description |
» anonymous |
string |
false |
|
none |
continued
Name |
Type |
Required |
Restrictions |
Description |
dataset |
string |
true |
|
The name of the specified big query dataset to write data back |
table |
string |
true |
|
The name of the specified big query table to write data back |
type |
string |
true |
|
Type name for this output type |
Enumerated Values
Property |
Value |
anonymous |
[redacted] |
type |
bigquery |
Catalog
{
"description": "Stream CSV data chunks from AI catalog dataset",
"properties": {
"datasetId": {
"description": "The ID of the AI catalog dataset",
"type": "string"
},
"datasetVersionId": {
"description": "The ID of the AI catalog dataset version",
"type": "string"
},
"type": {
"description": "Type name for this intake type",
"enum": [
"dataset"
],
"type": "string"
}
},
"required": [
"datasetId",
"type"
],
"type": "object"
}
Stream CSV data chunks from AI catalog dataset
Properties
Name |
Type |
Required |
Restrictions |
Description |
datasetId |
string |
true |
|
The ID of the AI catalog dataset |
datasetVersionId |
string |
false |
|
The ID of the AI catalog dataset version |
type |
string |
true |
|
Type name for this intake type |
Enumerated Values
Property |
Value |
type |
dataset |
CatalogDataStreamer
{
"description": "Stream CSV data chunks from AI catalog dataset",
"properties": {
"datasetId": {
"description": "Either the populated value of the field or [redacted] due to permission settings",
"oneOf": [
{
"description": "The ID of the AI catalog dataset",
"type": "string"
},
{
"enum": [
"[redacted]"
],
"type": "string"
}
]
},
"datasetVersionId": {
"description": "The ID of the AI catalog dataset version",
"type": "string"
},
"type": {
"description": "Type name for this intake type",
"enum": [
"dataset"
],
"type": "string"
}
},
"required": [
"datasetId",
"type"
],
"type": "object"
}
Stream CSV data chunks from AI catalog dataset
Properties
Name |
Type |
Required |
Restrictions |
Description |
datasetId |
any |
true |
|
Either the populated value of the field or [redacted] due to permission settings |
oneOf
Name |
Type |
Required |
Restrictions |
Description |
» anonymous |
string |
false |
|
The ID of the AI catalog dataset |
xor
Name |
Type |
Required |
Restrictions |
Description |
» anonymous |
string |
false |
|
none |
continued
Name |
Type |
Required |
Restrictions |
Description |
datasetVersionId |
string |
false |
|
The ID of the AI catalog dataset version |
type |
string |
true |
|
Type name for this intake type |
Enumerated Values
Property |
Value |
anonymous |
[redacted] |
type |
dataset |
CreatePredictionDatasetResponse
{
"properties": {
"datasetId": {
"description": "The ID of the newly created prediction dataset.",
"type": "string"
}
},
"required": [
"datasetId"
],
"type": "object"
}
Properties
Name |
Type |
Required |
Restrictions |
Description |
datasetId |
string |
true |
|
The ID of the newly created prediction dataset. |
CreatePredictionFromDataset
{
"properties": {
"actualValueColumn": {
"description": "For time series projects only. Actual value column name, valid for the prediction files if the project is unsupervised and the dataset is considered as bulk predictions dataset. This value is optional.",
"type": "string",
"x-versionadded": "v2.21"
},
"datasetId": {
"description": "The dataset to compute predictions for - must have previously been uploaded.",
"type": "string"
},
"explanationAlgorithm": {
"description": "If set to `shap`, the response will include prediction explanations based on the SHAP explainer (SHapley Additive exPlanations). Defaults to null (no prediction explanations).",
"enum": [
"shap"
],
"type": "string"
},
"forecastPoint": {
"description": "For time series projects only. The time in the dataset relative to which predictions are generated. This value is optional. If not specified the default value is the value in the row with the latest specified timestamp. Specifying this value for a project that is not a time series project will result in an error.",
"format": "date-time",
"type": "string",
"x-versionadded": "v2.20"
},
"includeFdwCounts": {
"default": false,
"description": "For time series projects with partial history only. Indicates if feature derivation window counts `featureDerivationWindowCounts` will be part of the response.",
"type": "boolean",
"x-versionadded": "v2.24"
},
"includePredictionIntervals": {
"description": "Specifies whether prediction intervals should be calculated for this request. Defaults to True if `predictionIntervalsSize` is specified, otherwise defaults to False.",
"type": "boolean",
"x-versionadded": "v2.16"
},
"maxExplanations": {
"description": "Specifies the maximum number of explanation values that should be returned for each row, ordered by absolute value, greatest to least. In the case of 'shap': If not set, explanations are returned for all features. If the number of features is greater than the 'maxExplanations', the sum of remaining values will also be returned as 'shapRemainingTotal'. Defaults to null for datasets narrower than 100 columns, defaults to 100 for datasets wider than 100 columns. Cannot be set if 'explanationAlgorithm' is omitted.",
"maximum": 100,
"minimum": 1,
"type": "integer",
"x-versionadded": "v2.21"
},
"modelId": {
"description": "The model to make predictions on.",
"type": "string"
},
"predictionIntervalsSize": {
"description": "Represents the percentile to use for the size of the prediction intervals. Defaults to 80 if `includePredictionIntervals` is True.",
"maximum": 100,
"minimum": 1,
"type": "integer",
"x-versionadded": "v2.16"
},
"predictionThreshold": {
"description": "Threshold used for binary classification in predictions. Accepts values from 0.0 to 1.0. If not specified, model default prediction threshold will be used.",
"maximum": 1,
"minimum": 0,
"type": "number",
"x-versionadded": "v2.22"
},
"predictionsEndDate": {
"description": "The end date for bulk predictions, exclusive. Used for time series projects only. Note that this parameter is used for generating historical predictions using the training data, not for future predictions. If not specified, the dataset is not considered as a bulk predictions dataset. This parameter should be provided in conjunction with a ``predictionsStartDate``, and cannot be provided with the ``forecastPoint`` parameter.",
"format": "date-time",
"type": "string",
"x-versionadded": "v2.20"
},
"predictionsStartDate": {
"description": "The start date for bulk predictions. Used for time series projects only. Note that this parameter is used for generating historical predictions using the training data, not for future predictions. If not specified, the dataset is not considered as a bulk predictions dataset. This parameter should be provided in conjunction with a ``predictionsEndDate``, and cannot be provided with the ``forecastPoint`` parameter.",
"format": "date-time",
"type": "string",
"x-versionadded": "v2.20"
}
},
"required": [
"datasetId",
"modelId"
],
"type": "object"
}
Properties
Name |
Type |
Required |
Restrictions |
Description |
actualValueColumn |
string |
false |
|
For time series projects only. Actual value column name, valid for the prediction files if the project is unsupervised and the dataset is considered as bulk predictions dataset. This value is optional. |
datasetId |
string |
true |
|
The dataset to compute predictions for - must have previously been uploaded. |
explanationAlgorithm |
string |
false |
|
If set to shap , the response will include prediction explanations based on the SHAP explainer (SHapley Additive exPlanations). Defaults to null (no prediction explanations). |
forecastPoint |
string(date-time) |
false |
|
For time series projects only. The time in the dataset relative to which predictions are generated. This value is optional. If not specified the default value is the value in the row with the latest specified timestamp. Specifying this value for a project that is not a time series project will result in an error. |
includeFdwCounts |
boolean |
false |
|
For time series projects with partial history only. Indicates if feature derivation window counts featureDerivationWindowCounts will be part of the response. |
includePredictionIntervals |
boolean |
false |
|
Specifies whether prediction intervals should be calculated for this request. Defaults to True if predictionIntervalsSize is specified, otherwise defaults to False. |
maxExplanations |
integer |
false |
maximum: 100 minimum: 1
|
Specifies the maximum number of explanation values that should be returned for each row, ordered by absolute value, greatest to least. In the case of 'shap': If not set, explanations are returned for all features. If the number of features is greater than the 'maxExplanations', the sum of remaining values will also be returned as 'shapRemainingTotal'. Defaults to null for datasets narrower than 100 columns, defaults to 100 for datasets wider than 100 columns. Cannot be set if 'explanationAlgorithm' is omitted. |
modelId |
string |
true |
|
The model to make predictions on. |
predictionIntervalsSize |
integer |
false |
maximum: 100 minimum: 1
|
Represents the percentile to use for the size of the prediction intervals. Defaults to 80 if includePredictionIntervals is True. |
predictionThreshold |
number |
false |
maximum: 1 minimum: 0
|
Threshold used for binary classification in predictions. Accepts values from 0.0 to 1.0. If not specified, model default prediction threshold will be used. |
predictionsEndDate |
string(date-time) |
false |
|
The end date for bulk predictions, exclusive. Used for time series projects only. Note that this parameter is used for generating historical predictions using the training data, not for future predictions. If not specified, the dataset is not considered as a bulk predictions dataset. This parameter should be provided in conjunction with a predictionsStartDate , and cannot be provided with the forecastPoint parameter. |
predictionsStartDate |
string(date-time) |
false |
|
The start date for bulk predictions. Used for time series projects only. Note that this parameter is used for generating historical predictions using the training data, not for future predictions. If not specified, the dataset is not considered as a bulk predictions dataset. This parameter should be provided in conjunction with a predictionsEndDate , and cannot be provided with the forecastPoint parameter. |
Enumerated Values
Property |
Value |
explanationAlgorithm |
shap |
CreateTrainingPrediction
{
"properties": {
"dataSubset": {
"default": "all",
"description": "Subset of data predicted on: The value \"all\" returns predictions for all rows in the dataset including data used for training, validation, holdout and any rows discarded. This is not available for large datasets or projects created with Date/Time partitioning. The value \"validationAndHoldout\" returns predictions for the rows used to calculate the validation score and the holdout score. Not available for large projects or Date/Time projects for models trained into the validation set. The value \"holdout\" returns predictions for the rows used to calculate the holdout score. Not available for projects created without a holdout or for models trained into holdout for large datasets or created with Date/Time partitioning. The value \"allBacktests\" returns predictions for the rows used to calculate the backtesting scores for Date/Time projects. The value \"validation\" returns predictions for the rows used to calculate the validation score.",
"enum": [
"all",
"validationAndHoldout",
"holdout",
"allBacktests",
"validation",
"crossValidation"
],
"type": "string",
"x-enum-versionadded": [
{
"value": "validation",
"x-versionadded": "v2.21"
}
]
},
"explanationAlgorithm": {
"description": "If set to \"shap\", the response will include prediction explanations based on the SHAP explainer (SHapley Additive exPlanations). Defaults to null (no prediction explanations)",
"type": "string",
"x-versionadded": "v2.21"
},
"maxExplanations": {
"description": "Specifies the maximum number of explanation values that should be returned for each row, ordered by absolute value, greatest to least. In the case of \"shap\": If not set, explanations are returned for all features. If the number of features is greater than the \"maxExplanations\", the sum of remaining values will also be returned as \"shapRemainingTotal\". Defaults to null for datasets narrower than 100 columns, defaults to 100 for datasets wider than 100 columns. Cannot be set if \"explanationAlgorithm\" is omitted.",
"maximum": 100,
"minimum": 1,
"type": "integer",
"x-versionadded": "v2.21"
},
"modelId": {
"description": "The model to make predictions on",
"type": "string"
}
},
"required": [
"dataSubset",
"modelId"
],
"type": "object"
}
Properties
Name |
Type |
Required |
Restrictions |
Description |
dataSubset |
string |
true |
|
Subset of data predicted on: The value "all" returns predictions for all rows in the dataset including data used for training, validation, holdout and any rows discarded. This is not available for large datasets or projects created with Date/Time partitioning. The value "validationAndHoldout" returns predictions for the rows used to calculate the validation score and the holdout score. Not available for large projects or Date/Time projects for models trained into the validation set. The value "holdout" returns predictions for the rows used to calculate the holdout score. Not available for projects created without a holdout or for models trained into holdout for large datasets or created with Date/Time partitioning. The value "allBacktests" returns predictions for the rows used to calculate the backtesting scores for Date/Time projects. The value "validation" returns predictions for the rows used to calculate the validation score. |
explanationAlgorithm |
string |
false |
|
If set to "shap", the response will include prediction explanations based on the SHAP explainer (SHapley Additive exPlanations). Defaults to null (no prediction explanations) |
maxExplanations |
integer |
false |
maximum: 100 minimum: 1
|
Specifies the maximum number of explanation values that should be returned for each row, ordered by absolute value, greatest to least. In the case of "shap": If not set, explanations are returned for all features. If the number of features is greater than the "maxExplanations", the sum of remaining values will also be returned as "shapRemainingTotal". Defaults to null for datasets narrower than 100 columns, defaults to 100 for datasets wider than 100 columns. Cannot be set if "explanationAlgorithm" is omitted. |
modelId |
string |
true |
|
The model to make predictions on |
Enumerated Values
Property |
Value |
dataSubset |
[all , validationAndHoldout , holdout , allBacktests , validation , crossValidation ] |
CredentialId
{
"properties": {
"catalogVersionId": {
"description": "The ID of the latest version of the catalog entry.",
"type": "string"
},
"credentialId": {
"description": "The ID of the set of credentials to use instead of user and password. Note that with this change, username and password will become optional.",
"type": "string"
},
"url": {
"description": "The link to retrieve more detailed information about the entity that uses this catalog dataset.",
"type": "string"
}
},
"required": [
"credentialId"
],
"type": "object"
}
Properties
Name |
Type |
Required |
Restrictions |
Description |
catalogVersionId |
string |
false |
|
The ID of the latest version of the catalog entry. |
credentialId |
string |
true |
|
The ID of the set of credentials to use instead of user and password. Note that with this change, username and password will become optional. |
url |
string |
false |
|
The link to retrieve more detailed information about the entity that uses this catalog dataset. |
DSS
{
"description": "Stream CSV data chunks from DSS dataset",
"properties": {
"datasetId": {
"description": "The ID of the dataset",
"type": "string"
},
"partition": {
"default": null,
"description": "Partition used to predict",
"enum": [
"holdout",
"validation",
"allBacktests",
null
],
"type": [
"string",
"null"
]
},
"projectId": {
"description": "The ID of the project",
"type": "string"
},
"type": {
"description": "Type name for this intake type",
"enum": [
"dss"
],
"type": "string"
}
},
"required": [
"projectId",
"type"
],
"type": "object"
}
Stream CSV data chunks from DSS dataset
Properties
Name |
Type |
Required |
Restrictions |
Description |
datasetId |
string |
false |
|
The ID of the dataset |
partition |
string,null |
false |
|
Partition used to predict |
projectId |
string |
true |
|
The ID of the project |
type |
string |
true |
|
Type name for this intake type |
Enumerated Values
Property |
Value |
partition |
[holdout , validation , allBacktests , null ] |
type |
dss |
DSSDataStreamer
{
"description": "Stream CSV data chunks from DSS dataset",
"properties": {
"datasetId": {
"description": "Either the populated value of the field or [redacted] due to permission settings",
"oneOf": [
{
"description": "The ID of the dataset",
"type": "string"
},
{
"enum": [
"[redacted]"
],
"type": "string"
}
]
},
"partition": {
"default": null,
"description": "Partition used to predict",
"enum": [
"holdout",
"validation",
"allBacktests",
null
],
"type": [
"string",
"null"
]
},
"projectId": {
"description": "The ID of the project",
"type": "string"
},
"type": {
"description": "Type name for this intake type",
"enum": [
"dss"
],
"type": "string"
}
},
"required": [
"projectId",
"type"
],
"type": "object"
}
Stream CSV data chunks from DSS dataset
Properties
Name |
Type |
Required |
Restrictions |
Description |
datasetId |
any |
false |
|
Either the populated value of the field or [redacted] due to permission settings |
oneOf
Name |
Type |
Required |
Restrictions |
Description |
» anonymous |
string |
false |
|
The ID of the dataset |
xor
Name |
Type |
Required |
Restrictions |
Description |
» anonymous |
string |
false |
|
none |
continued
Name |
Type |
Required |
Restrictions |
Description |
partition |
string,null |
false |
|
Partition used to predict |
projectId |
string |
true |
|
The ID of the project |
type |
string |
true |
|
Type name for this intake type |
Enumerated Values
Property |
Value |
anonymous |
[redacted] |
partition |
[holdout , validation , allBacktests , null ] |
type |
dss |
DataQualityWarningsRecord
{
"description": "A Json object of available warnings about potential problems in this prediction dataset. Empty if no warnings.",
"properties": {
"hasKiaMissingValuesInForecastWindow": {
"description": "If true, known-in-advance features in this dataset have missing values in the forecast window. Absence of the known-in-advance values can negatively impact prediction quality. Only applies for time series projects.",
"type": "boolean",
"x-versionadded": "v2.15"
},
"insufficientRowsForEvaluatingModels": {
"description": "If true, the dataset has a target column present indicating it can be used to evaluate model performance but too few rows to be trustworthy in so doing. If false, either it has no target column at all or it has sufficient rows for model evaluation. Only applies for regression, binary classification, multiclass classification projects and time series unsupervised projects.",
"type": "boolean",
"x-versionadded": "v2.19"
},
"singleClassActualValueColumn": {
"description": "If true, actual value column has only one class and such insights as ROC curve can not be calculated. Only applies for binary classification projects or unsupervised projects.",
"type": "boolean",
"x-versionadded": "v2.21"
}
},
"type": "object"
}
A Json object of available warnings about potential problems in this prediction dataset. Empty if no warnings.
Properties
Name |
Type |
Required |
Restrictions |
Description |
hasKiaMissingValuesInForecastWindow |
boolean |
false |
|
If true, known-in-advance features in this dataset have missing values in the forecast window. Absence of the known-in-advance values can negatively impact prediction quality. Only applies for time series projects. |
insufficientRowsForEvaluatingModels |
boolean |
false |
|
If true, the dataset has a target column present indicating it can be used to evaluate model performance but too few rows to be trustworthy in so doing. If false, either it has no target column at all or it has sufficient rows for model evaluation. Only applies for regression, binary classification, multiclass classification projects and time series unsupervised projects. |
singleClassActualValueColumn |
boolean |
false |
|
If true, actual value column has only one class and such insights as ROC curve can not be calculated. Only applies for binary classification projects or unsupervised projects. |
DataStageDataStreamer
{
"description": "Stream CSV data chunks from data stage storage",
"properties": {
"dataStageId": {
"description": "The ID of the data stage",
"type": "string"
},
"type": {
"description": "Type name for this intake type",
"enum": [
"dataStage"
],
"type": "string"
}
},
"required": [
"dataStageId",
"type"
],
"type": "object"
}
Stream CSV data chunks from data stage storage
Properties
Name |
Type |
Required |
Restrictions |
Description |
dataStageId |
string |
true |
|
The ID of the data stage |
type |
string |
true |
|
Type name for this intake type |
Enumerated Values
Property |
Value |
type |
dataStage |
DataStageIntake
{
"description": "Stream CSV data chunks from data stage storage",
"properties": {
"dataStageId": {
"description": "The ID of the data stage",
"type": "string"
},
"type": {
"description": "Type name for this intake type",
"enum": [
"dataStage"
],
"type": "string"
}
},
"required": [
"dataStageId",
"type"
],
"type": "object"
}
Stream CSV data chunks from data stage storage
Properties
Name |
Type |
Required |
Restrictions |
Description |
dataStageId |
string |
true |
|
The ID of the data stage |
type |
string |
true |
|
Type name for this intake type |
Enumerated Values
Property |
Value |
type |
dataStage |
DatabricksAccessTokenCredentials
{
"properties": {
"credentialType": {
"description": "The type of these credentials, 'databricks_access_token_account' here.",
"enum": [
"databricks_access_token_account"
],
"type": "string"
},
"databricksAccessToken": {
"description": "Databricks personal access token.",
"minLength": 1,
"type": "string"
}
},
"required": [
"credentialType",
"databricksAccessToken"
],
"type": "object"
}
Properties
Name |
Type |
Required |
Restrictions |
Description |
credentialType |
string |
true |
|
The type of these credentials, 'databricks_access_token_account' here. |
databricksAccessToken |
string |
true |
minLength: 1 minLength: 1
|
Databricks personal access token. |
Enumerated Values
Property |
Value |
credentialType |
databricks_access_token_account |
DatasphereDataStreamer
{
"description": "Stream CSV data chunks from Datasphere using browser-datasphere",
"properties": {
"catalog": {
"description": "The name of the specified database catalog to read input data from.",
"type": "string"
},
"credentialId": {
"description": "Either the populated value of the field or [redacted] due to permission settings",
"oneOf": [
{
"description": "The ID of the credential holding information about a user with read access to the data source.",
"type": "string"
},
{
"enum": [
"[redacted]"
],
"type": "string"
}
]
},
"dataStoreId": {
"description": "Either the populated value of the field or [redacted] due to permission settings",
"oneOf": [
{
"description": "ID of the data store to connect to",
"type": "string"
},
{
"enum": [
"[redacted]"
],
"type": "string"
}
]
},
"schema": {
"description": "The name of the specified database schema to read input data from.",
"type": "string"
},
"table": {
"description": "The name of the specified database table to read input data from.",
"type": "string"
},
"type": {
"description": "Type name for this intake type",
"enum": [
"datasphere"
],
"type": "string"
}
},
"required": [
"credentialId",
"dataStoreId",
"schema",
"table",
"type"
],
"type": "object",
"x-versionadded": "v2.35"
}
Stream CSV data chunks from Datasphere using browser-datasphere
Properties
Name |
Type |
Required |
Restrictions |
Description |
catalog |
string |
false |
|
The name of the specified database catalog to read input data from. |
credentialId |
any |
true |
|
Either the populated value of the field or [redacted] due to permission settings |
oneOf
Name |
Type |
Required |
Restrictions |
Description |
» anonymous |
string |
false |
|
The ID of the credential holding information about a user with read access to the data source. |
xor
Name |
Type |
Required |
Restrictions |
Description |
» anonymous |
string |
false |
|
none |
continued
Name |
Type |
Required |
Restrictions |
Description |
dataStoreId |
any |
true |
|
Either the populated value of the field or [redacted] due to permission settings |
oneOf
Name |
Type |
Required |
Restrictions |
Description |
» anonymous |
string |
false |
|
ID of the data store to connect to |
xor
Name |
Type |
Required |
Restrictions |
Description |
» anonymous |
string |
false |
|
none |
continued
Name |
Type |
Required |
Restrictions |
Description |
schema |
string |
true |
|
The name of the specified database schema to read input data from. |
table |
string |
true |
|
The name of the specified database table to read input data from. |
type |
string |
true |
|
Type name for this intake type |
Enumerated Values
Property |
Value |
anonymous |
[redacted] |
anonymous |
[redacted] |
type |
datasphere |
DatasphereIntake
{
"description": "Stream CSV data chunks from Datasphere using browser-datasphere",
"properties": {
"catalog": {
"description": "The name of the specified database catalog to read input data from.",
"type": "string"
},
"credentialId": {
"description": "The ID of the credential holding information about a user with read access to the data source.",
"type": "string"
},
"dataStoreId": {
"description": "ID of the data store to connect to",
"type": "string"
},
"schema": {
"description": "The name of the specified database schema to read input data from.",
"type": "string"
},
"table": {
"description": "The name of the specified database table to read input data from.",
"type": "string"
},
"type": {
"description": "Type name for this intake type",
"enum": [
"datasphere"
],
"type": "string"
}
},
"required": [
"credentialId",
"dataStoreId",
"schema",
"table",
"type"
],
"type": "object",
"x-versionadded": "v2.35"
}
Stream CSV data chunks from Datasphere using browser-datasphere
Properties
Name |
Type |
Required |
Restrictions |
Description |
catalog |
string |
false |
|
The name of the specified database catalog to read input data from. |
credentialId |
string |
true |
|
The ID of the credential holding information about a user with read access to the data source. |
dataStoreId |
string |
true |
|
ID of the data store to connect to |
schema |
string |
true |
|
The name of the specified database schema to read input data from. |
table |
string |
true |
|
The name of the specified database table to read input data from. |
type |
string |
true |
|
Type name for this intake type |
Enumerated Values
Property |
Value |
type |
datasphere |
DatasphereOutput
{
"description": "Saves CSV data chunks to Datasphere using browser-datasphere",
"properties": {
"catalog": {
"description": "The name of the specified database catalog to read input data from.",
"type": "string"
},
"credentialId": {
"description": "The ID of the credential holding information about a user with write access to the data destination.",
"type": "string"
},
"dataStoreId": {
"description": "The ID of the data store to connect to",
"type": "string"
},
"schema": {
"description": "The name of the specified database schema to read input data from.",
"type": "string"
},
"table": {
"description": "The name of the specified database table to read input data from.",
"type": "string"
},
"type": {
"description": "The type name for this output type",
"enum": [
"datasphere"
],
"type": "string"
}
},
"required": [
"credentialId",
"dataStoreId",
"schema",
"table",
"type"
],
"type": "object",
"x-versionadded": "v2.35"
}
Saves CSV data chunks to Datasphere using browser-datasphere
Properties
Name |
Type |
Required |
Restrictions |
Description |
catalog |
string |
false |
|
The name of the specified database catalog to read input data from. |
credentialId |
string |
true |
|
The ID of the credential holding information about a user with write access to the data destination. |
dataStoreId |
string |
true |
|
The ID of the data store to connect to |
schema |
string |
true |
|
The name of the specified database schema to read input data from. |
table |
string |
true |
|
The name of the specified database table to read input data from. |
type |
string |
true |
|
The type name for this output type |
Enumerated Values
Property |
Value |
type |
datasphere |
DatasphereOutputAdatpor
{
"description": "Saves CSV data chunks to Datasphere using browser-datasphere",
"properties": {
"catalog": {
"description": "The name of the specified database catalog to read input data from.",
"type": "string"
},
"credentialId": {
"description": "Either the populated value of the field or [redacted] due to permission settings",
"oneOf": [
{
"description": "The ID of the credential holding information about a user with write access to the data destination.",
"type": "string"
},
{
"enum": [
"[redacted]"
],
"type": "string"
}
]
},
"dataStoreId": {
"description": "Either the populated value of the field or [redacted] due to permission settings",
"oneOf": [
{
"description": "The ID of the data store to connect to",
"type": "string"
},
{
"enum": [
"[redacted]"
],
"type": "string"
}
]
},
"schema": {
"description": "The name of the specified database schema to read input data from.",
"type": "string"
},
"table": {
"description": "The name of the specified database table to read input data from.",
"type": "string"
},
"type": {
"description": "The type name for this output type",
"enum": [
"datasphere"
],
"type": "string"
}
},
"required": [
"credentialId",
"dataStoreId",
"schema",
"table",
"type"
],
"type": "object",
"x-versionadded": "v2.35"
}
Saves CSV data chunks to Datasphere using browser-datasphere
Properties
Name |
Type |
Required |
Restrictions |
Description |
catalog |
string |
false |
|
The name of the specified database catalog to read input data from. |
credentialId |
any |
true |
|
Either the populated value of the field or [redacted] due to permission settings |
oneOf
Name |
Type |
Required |
Restrictions |
Description |
» anonymous |
string |
false |
|
The ID of the credential holding information about a user with write access to the data destination. |
xor
Name |
Type |
Required |
Restrictions |
Description |
» anonymous |
string |
false |
|
none |
continued
Name |
Type |
Required |
Restrictions |
Description |
dataStoreId |
any |
true |
|
Either the populated value of the field or [redacted] due to permission settings |
oneOf
Name |
Type |
Required |
Restrictions |
Description |
» anonymous |
string |
false |
|
The ID of the data store to connect to |
xor
Name |
Type |
Required |
Restrictions |
Description |
» anonymous |
string |
false |
|
none |
continued
Name |
Type |
Required |
Restrictions |
Description |
schema |
string |
true |
|
The name of the specified database schema to read input data from. |
table |
string |
true |
|
The name of the specified database table to read input data from. |
type |
string |
true |
|
The type name for this output type |
Enumerated Values
Property |
Value |
anonymous |
[redacted] |
anonymous |
[redacted] |
type |
datasphere |
FileSystemDataStreamer
{
"properties": {
"path": {
"description": "Path to data on host filesystem",
"type": "string"
},
"type": {
"description": "Type name for this intake type",
"enum": [
"filesystem"
],
"type": "string"
}
},
"required": [
"path",
"type"
],
"type": "object"
}
Properties
Name |
Type |
Required |
Restrictions |
Description |
path |
string |
true |
|
Path to data on host filesystem |
type |
string |
true |
|
Type name for this intake type |
Enumerated Values
Property |
Value |
type |
filesystem |
FileSystemIntake
{
"properties": {
"path": {
"description": "Path to data on host filesystem",
"type": "string"
},
"type": {
"description": "Type name for this intake type",
"enum": [
"filesystem"
],
"type": "string"
}
},
"required": [
"path",
"type"
],
"type": "object"
}
Properties
Name |
Type |
Required |
Restrictions |
Description |
path |
string |
true |
|
Path to data on host filesystem |
type |
string |
true |
|
Type name for this intake type |
Enumerated Values
Property |
Value |
type |
filesystem |
FileSystemOutput
{
"properties": {
"path": {
"description": "Path to results on host filesystem",
"type": "string"
},
"type": {
"description": "Type name for this output type",
"enum": [
"filesystem"
],
"type": "string"
}
},
"required": [
"path",
"type"
],
"type": "object"
}
Properties
Name |
Type |
Required |
Restrictions |
Description |
path |
string |
true |
|
Path to results on host filesystem |
type |
string |
true |
|
Type name for this output type |
Enumerated Values
Property |
Value |
type |
filesystem |
FileSystemOutputAdaptor
{
"properties": {
"path": {
"description": "Path to results on host filesystem",
"type": "string"
},
"type": {
"description": "Type name for this output type",
"enum": [
"filesystem"
],
"type": "string"
}
},
"required": [
"path",
"type"
],
"type": "object"
}
Properties
Name |
Type |
Required |
Restrictions |
Description |
path |
string |
true |
|
Path to results on host filesystem |
type |
string |
true |
|
Type name for this output type |
Enumerated Values
Property |
Value |
type |
filesystem |
GCPDataStreamer
{
"description": "Stream CSV data chunks from Google Storage",
"properties": {
"credentialId": {
"description": "Either the populated value of the field or [redacted] due to permission settings",
"oneOf": [
{
"description": "Use the specified credential to access the url",
"type": [
"string",
"null"
]
},
{
"enum": [
"[redacted]"
],
"type": "string"
}
]
},
"format": {
"default": "csv",
"description": "Type of input file format",
"enum": [
"csv",
"parquet"
],
"type": "string",
"x-versionadded": "v2.25"
},
"type": {
"description": "Type name for this intake type",
"enum": [
"gcp"
],
"type": "string"
},
"url": {
"description": "URL for the CSV file",
"format": "url",
"type": "string"
}
},
"required": [
"type",
"url"
],
"type": "object"
}
Stream CSV data chunks from Google Storage
Properties
Name |
Type |
Required |
Restrictions |
Description |
credentialId |
any |
false |
|
Either the populated value of the field or [redacted] due to permission settings |
oneOf
Name |
Type |
Required |
Restrictions |
Description |
» anonymous |
string,null |
false |
|
Use the specified credential to access the url |
xor
Name |
Type |
Required |
Restrictions |
Description |
» anonymous |
string |
false |
|
none |
continued
Name |
Type |
Required |
Restrictions |
Description |
format |
string |
false |
|
Type of input file format |
type |
string |
true |
|
Type name for this intake type |
url |
string(url) |
true |
|
URL for the CSV file |
Enumerated Values
Property |
Value |
anonymous |
[redacted] |
format |
[csv , parquet ] |
type |
gcp |
GCPIntake
{
"description": "Stream CSV data chunks from Google Storage",
"properties": {
"credentialId": {
"description": "Use the specified credential to access the url",
"type": [
"string",
"null"
]
},
"format": {
"default": "csv",
"description": "Type of input file format",
"enum": [
"csv",
"parquet"
],
"type": "string",
"x-versionadded": "v2.25"
},
"type": {
"description": "Type name for this intake type",
"enum": [
"gcp"
],
"type": "string"
},
"url": {
"description": "URL for the CSV file",
"format": "url",
"type": "string"
}
},
"required": [
"type",
"url"
],
"type": "object"
}
Stream CSV data chunks from Google Storage
Properties
Name |
Type |
Required |
Restrictions |
Description |
credentialId |
string,null |
false |
|
Use the specified credential to access the url |
format |
string |
false |
|
Type of input file format |
type |
string |
true |
|
Type name for this intake type |
url |
string(url) |
true |
|
URL for the CSV file |
Enumerated Values
Property |
Value |
format |
[csv , parquet ] |
type |
gcp |
GCPKey
{
"description": "The Google Cloud Platform (GCP) key. Output is the downloaded JSON resulting from creating a service account *User Managed Key* (in the *IAM & admin > Service accounts section* of GCP).Required if googleConfigId/configId is not specified.Cannot include this parameter if googleConfigId/configId is specified.",
"properties": {
"authProviderX509CertUrl": {
"description": "Auth provider X509 certificate URL.",
"format": "uri",
"type": "string"
},
"authUri": {
"description": "Auth URI.",
"format": "uri",
"type": "string"
},
"clientEmail": {
"description": "Client email address.",
"type": "string"
},
"clientId": {
"description": "Client ID.",
"type": "string"
},
"clientX509CertUrl": {
"description": "Client X509 certificate URL.",
"format": "uri",
"type": "string"
},
"privateKey": {
"description": "Private key.",
"type": "string"
},
"privateKeyId": {
"description": "Private key ID",
"type": "string"
},
"projectId": {
"description": "Project ID.",
"type": "string"
},
"tokenUri": {
"description": "Token URI.",
"format": "uri",
"type": "string"
},
"type": {
"description": "GCP account type.",
"enum": [
"service_account"
],
"type": "string"
}
},
"required": [
"type"
],
"type": "object"
}
The Google Cloud Platform (GCP) key. Output is the downloaded JSON resulting from creating a service account User Managed Key (in the IAM & admin > Service accounts section of GCP).Required if googleConfigId/configId is not specified.Cannot include this parameter if googleConfigId/configId is specified.
Properties
Name |
Type |
Required |
Restrictions |
Description |
authProviderX509CertUrl |
string(uri) |
false |
|
Auth provider X509 certificate URL. |
authUri |
string(uri) |
false |
|
Auth URI. |
clientEmail |
string |
false |
|
Client email address. |
clientId |
string |
false |
|
Client ID. |
clientX509CertUrl |
string(uri) |
false |
|
Client X509 certificate URL. |
privateKey |
string |
false |
|
Private key. |
privateKeyId |
string |
false |
|
Private key ID |
projectId |
string |
false |
|
Project ID. |
tokenUri |
string(uri) |
false |
|
Token URI. |
type |
string |
true |
|
GCP account type. |
Enumerated Values
Property |
Value |
type |
service_account |
GCPOutput
{
"description": "Save CSV data chunks to Google Storage",
"properties": {
"credentialId": {
"description": "Use the specified credential to access the url",
"type": [
"string",
"null"
]
},
"format": {
"default": "csv",
"description": "Type of input file format",
"enum": [
"csv",
"parquet"
],
"type": "string",
"x-versionadded": "v2.25"
},
"partitionColumns": {
"description": "For Parquet directory-scoring only. The column names of the intake data of which to partition the dataset. Columns are partitioned in the order they are given. At least one value is required if scoring to a directory (meaning the output url ends with a slash (\"/\").",
"items": {
"type": "string"
},
"maxItems": 100,
"type": "array",
"x-versionadded": "v2.26"
},
"type": {
"description": "Type name for this output type",
"enum": [
"gcp"
],
"type": "string"
},
"url": {
"description": "URL for the CSV file",
"format": "url",
"type": "string"
}
},
"required": [
"type",
"url"
],
"type": "object"
}
Save CSV data chunks to Google Storage
Properties
Name |
Type |
Required |
Restrictions |
Description |
credentialId |
string,null |
false |
|
Use the specified credential to access the url |
format |
string |
false |
|
Type of input file format |
partitionColumns |
[string] |
false |
maxItems: 100
|
For Parquet directory-scoring only. The column names of the intake data of which to partition the dataset. Columns are partitioned in the order they are given. At least one value is required if scoring to a directory (meaning the output url ends with a slash ("/"). |
type |
string |
true |
|
Type name for this output type |
url |
string(url) |
true |
|
URL for the CSV file |
Enumerated Values
Property |
Value |
format |
[csv , parquet ] |
type |
gcp |
GCPOutputAdaptor
{
"description": "Save CSV data chunks to Google Storage",
"properties": {
"credentialId": {
"description": "Either the populated value of the field or [redacted] due to permission settings",
"oneOf": [
{
"description": "Use the specified credential to access the url",
"type": [
"string",
"null"
]
},
{
"enum": [
"[redacted]"
],
"type": "string"
}
]
},
"format": {
"default": "csv",
"description": "Type of input file format",
"enum": [
"csv",
"parquet"
],
"type": "string",
"x-versionadded": "v2.25"
},
"partitionColumns": {
"description": "For Parquet directory-scoring only. The column names of the intake data of which to partition the dataset. Columns are partitioned in the order they are given. At least one value is required if scoring to a directory (meaning the output url ends with a slash (\"/\").",
"items": {
"type": "string"
},
"maxItems": 100,
"type": "array",
"x-versionadded": "v2.26"
},
"type": {
"description": "Type name for this output type",
"enum": [
"gcp"
],
"type": "string"
},
"url": {
"description": "URL for the CSV file",
"format": "url",
"type": "string"
}
},
"required": [
"type",
"url"
],
"type": "object"
}
Save CSV data chunks to Google Storage
Properties
Name |
Type |
Required |
Restrictions |
Description |
credentialId |
any |
false |
|
Either the populated value of the field or [redacted] due to permission settings |
oneOf
Name |
Type |
Required |
Restrictions |
Description |
» anonymous |
string,null |
false |
|
Use the specified credential to access the url |
xor
Name |
Type |
Required |
Restrictions |
Description |
» anonymous |
string |
false |
|
none |
continued
Name |
Type |
Required |
Restrictions |
Description |
format |
string |
false |
|
Type of input file format |
partitionColumns |
[string] |
false |
maxItems: 100
|
For Parquet directory-scoring only. The column names of the intake data of which to partition the dataset. Columns are partitioned in the order they are given. At least one value is required if scoring to a directory (meaning the output url ends with a slash ("/"). |
type |
string |
true |
|
Type name for this output type |
url |
string(url) |
true |
|
URL for the CSV file |
Enumerated Values
Property |
Value |
anonymous |
[redacted] |
format |
[csv , parquet ] |
type |
gcp |
GoogleServiceAccountCredentials
{
"properties": {
"configId": {
"description": "ID of Secure configurations shared by admin.Alternative to googleConfigId (deprecated). If specified, cannot include gcpKey.",
"type": "string"
},
"credentialType": {
"description": "The type of these credentials, 'gcp' here.",
"enum": [
"gcp"
],
"type": "string"
},
"gcpKey": {
"description": "The Google Cloud Platform (GCP) key. Output is the downloaded JSON resulting from creating a service account *User Managed Key* (in the *IAM & admin > Service accounts section* of GCP).Required if googleConfigId/configId is not specified.Cannot include this parameter if googleConfigId/configId is specified.",
"properties": {
"authProviderX509CertUrl": {
"description": "Auth provider X509 certificate URL.",
"format": "uri",
"type": "string"
},
"authUri": {
"description": "Auth URI.",
"format": "uri",
"type": "string"
},
"clientEmail": {
"description": "Client email address.",
"type": "string"
},
"clientId": {
"description": "Client ID.",
"type": "string"
},
"clientX509CertUrl": {
"description": "Client X509 certificate URL.",
"format": "uri",
"type": "string"
},
"privateKey": {
"description": "Private key.",
"type": "string"
},
"privateKeyId": {
"description": "Private key ID",
"type": "string"
},
"projectId": {
"description": "Project ID.",
"type": "string"
},
"tokenUri": {
"description": "Token URI.",
"format": "uri",
"type": "string"
},
"type": {
"description": "GCP account type.",
"enum": [
"service_account"
],
"type": "string"
}
},
"required": [
"type"
],
"type": "object"
},
"googleConfigId": {
"description": "ID of Secure configurations shared by admin. This is deprecated.Please use configId instead. If specified, cannot include gcpKey.",
"type": "string"
}
},
"required": [
"credentialType"
],
"type": "object"
}
Properties
Name |
Type |
Required |
Restrictions |
Description |
configId |
string |
false |
|
ID of Secure configurations shared by admin.Alternative to googleConfigId (deprecated). If specified, cannot include gcpKey. |
credentialType |
string |
true |
|
The type of these credentials, 'gcp' here. |
gcpKey |
GCPKey |
false |
|
The Google Cloud Platform (GCP) key. Output is the downloaded JSON resulting from creating a service account User Managed Key (in the IAM & admin > Service accounts section of GCP).Required if googleConfigId/configId is not specified.Cannot include this parameter if googleConfigId/configId is specified. |
googleConfigId |
string |
false |
|
ID of Secure configurations shared by admin. This is deprecated.Please use configId instead. If specified, cannot include gcpKey. |
Enumerated Values
Property |
Value |
credentialType |
gcp |
HTTPDataStreamer
{
"description": "Stream CSV data chunks from HTTP",
"properties": {
"type": {
"description": "Type name for this intake type",
"enum": [
"http"
],
"type": "string"
},
"url": {
"description": "URL for the CSV file",
"format": "url",
"type": "string"
}
},
"required": [
"type",
"url"
],
"type": "object"
}
Stream CSV data chunks from HTTP
Properties
Name |
Type |
Required |
Restrictions |
Description |
type |
string |
true |
|
Type name for this intake type |
url |
string(url) |
true |
|
URL for the CSV file |
Enumerated Values
HTTPIntake
{
"description": "Stream CSV data chunks from HTTP",
"properties": {
"type": {
"description": "Type name for this intake type",
"enum": [
"http"
],
"type": "string"
},
"url": {
"description": "URL for the CSV file",
"format": "url",
"type": "string"
}
},
"required": [
"type",
"url"
],
"type": "object"
}
Stream CSV data chunks from HTTP
Properties
Name |
Type |
Required |
Restrictions |
Description |
type |
string |
true |
|
Type name for this intake type |
url |
string(url) |
true |
|
URL for the CSV file |
Enumerated Values
HTTPOutput
{
"description": "Save CSV data chunks to HTTP data endpoint",
"properties": {
"headers": {
"description": "Extra headers to send with the request",
"type": "object"
},
"method": {
"description": "Method to use when saving the CSV file",
"enum": [
"POST",
"PUT"
],
"type": "string"
},
"type": {
"description": "Type name for this output type",
"enum": [
"http"
],
"type": "string"
},
"url": {
"description": "URL for the CSV file",
"format": "url",
"type": "string"
}
},
"required": [
"method",
"type",
"url"
],
"type": "object"
}
Save CSV data chunks to HTTP data endpoint
Properties
Name |
Type |
Required |
Restrictions |
Description |
headers |
object |
false |
|
Extra headers to send with the request |
method |
string |
true |
|
Method to use when saving the CSV file |
type |
string |
true |
|
Type name for this output type |
url |
string(url) |
true |
|
URL for the CSV file |
Enumerated Values
Property |
Value |
method |
[POST , PUT ] |
type |
http |
HttpOutputAdaptor
{
"description": "Save CSV data chunks to HTTP data endpoint",
"properties": {
"headers": {
"description": "Extra headers to send with the request",
"type": "object"
},
"method": {
"description": "Method to use when saving the CSV file",
"enum": [
"POST",
"PUT"
],
"type": "string"
},
"type": {
"description": "Type name for this output type",
"enum": [
"http"
],
"type": "string"
},
"url": {
"description": "URL for the CSV file",
"format": "url",
"type": "string"
}
},
"required": [
"method",
"type",
"url"
],
"type": "object"
}
Save CSV data chunks to HTTP data endpoint
Properties
Name |
Type |
Required |
Restrictions |
Description |
headers |
object |
false |
|
Extra headers to send with the request |
method |
string |
true |
|
Method to use when saving the CSV file |
type |
string |
true |
|
Type name for this output type |
url |
string(url) |
true |
|
URL for the CSV file |
Enumerated Values
Property |
Value |
method |
[POST , PUT ] |
type |
http |
JDBCDataStreamer
{
"description": "Stream CSV data chunks from JDBC",
"properties": {
"catalog": {
"description": "The name of the specified database catalog to read input data from.",
"type": "string",
"x-versionadded": "v2.22"
},
"credentialId": {
"description": "Either the populated value of the field or [redacted] due to permission settings",
"oneOf": [
{
"description": "The ID of the credential holding information about a user with read access to the JDBC data source.",
"type": [
"string",
"null"
]
},
{
"enum": [
"[redacted]"
],
"type": "string"
}
]
},
"dataStoreId": {
"description": "Either the populated value of the field or [redacted] due to permission settings",
"oneOf": [
{
"description": "ID of the data store to connect to",
"type": "string"
},
{
"enum": [
"[redacted]"
],
"type": "string"
}
]
},
"fetchSize": {
"description": "A user specified fetch size. Changing it can be used to balance throughput and memory usage. Deprecated and ignored since v2.21.",
"maximum": 1000000,
"minimum": 1,
"type": "integer"
},
"query": {
"description": "A self-supplied SELECT statement of the dataset you wish to score. Helpful for supplying a more fine-grained selection of data not achievable through specification of \"table\" and/or \"schema\" parameters exclusively.If this job is executed with a job definition, then template variables are available which will be substituted for timestamps: {{ current_run_timestamp }}, {{ last_completed_run_time }}, {{ last_scheduled_run_time }}, {{ next_scheduled_run_time }}, {{ current_run_time }}",
"type": "string"
},
"schema": {
"description": "The name of the specified database schema to read input data from.",
"type": "string"
},
"table": {
"description": "The name of the specified database table to read input data from.",
"type": "string"
},
"type": {
"description": "Type name for this intake type",
"enum": [
"jdbc"
],
"type": "string"
}
},
"required": [
"dataStoreId",
"type"
],
"type": "object"
}
Stream CSV data chunks from JDBC
Properties
Name |
Type |
Required |
Restrictions |
Description |
catalog |
string |
false |
|
The name of the specified database catalog to read input data from. |
credentialId |
any |
false |
|
Either the populated value of the field or [redacted] due to permission settings |
oneOf
Name |
Type |
Required |
Restrictions |
Description |
» anonymous |
string,null |
false |
|
The ID of the credential holding information about a user with read access to the JDBC data source. |
xor
Name |
Type |
Required |
Restrictions |
Description |
» anonymous |
string |
false |
|
none |
continued
Name |
Type |
Required |
Restrictions |
Description |
dataStoreId |
any |
true |
|
Either the populated value of the field or [redacted] due to permission settings |
oneOf
Name |
Type |
Required |
Restrictions |
Description |
» anonymous |
string |
false |
|
ID of the data store to connect to |
xor
Name |
Type |
Required |
Restrictions |
Description |
» anonymous |
string |
false |
|
none |
continued
Name |
Type |
Required |
Restrictions |
Description |
fetchSize |
integer |
false |
maximum: 1000000 minimum: 1
|
A user specified fetch size. Changing it can be used to balance throughput and memory usage. Deprecated and ignored since v2.21. |
query |
string |
false |
|
A self-supplied SELECT statement of the dataset you wish to score. Helpful for supplying a more fine-grained selection of data not achievable through specification of "table" and/or "schema" parameters exclusively.If this job is executed with a job definition, then template variables are available which will be substituted for timestamps: {{ current_run_timestamp }}, {{ last_completed_run_time }}, {{ last_scheduled_run_time }}, {{ next_scheduled_run_time }}, {{ current_run_time }} |
schema |
string |
false |
|
The name of the specified database schema to read input data from. |
table |
string |
false |
|
The name of the specified database table to read input data from. |
type |
string |
true |
|
Type name for this intake type |
Enumerated Values
Property |
Value |
anonymous |
[redacted] |
anonymous |
[redacted] |
type |
jdbc |
JDBCIntake
{
"description": "Stream CSV data chunks from JDBC",
"properties": {
"catalog": {
"description": "The name of the specified database catalog to read input data from.",
"type": "string",
"x-versionadded": "v2.22"
},
"credentialId": {
"description": "The ID of the credential holding information about a user with read access to the JDBC data source.",
"type": [
"string",
"null"
]
},
"dataStoreId": {
"description": "ID of the data store to connect to",
"type": "string"
},
"fetchSize": {
"description": "A user specified fetch size. Changing it can be used to balance throughput and memory usage. Deprecated and ignored since v2.21.",
"maximum": 1000000,
"minimum": 1,
"type": "integer"
},
"query": {
"description": "A self-supplied SELECT statement of the dataset you wish to score. Helpful for supplying a more fine-grained selection of data not achievable through specification of \"table\" and/or \"schema\" parameters exclusively.If this job is executed with a job definition, then template variables are available which will be substituted for timestamps: {{ current_run_timestamp }}, {{ last_completed_run_time }}, {{ last_scheduled_run_time }}, {{ next_scheduled_run_time }}, {{ current_run_time }}",
"type": "string"
},
"schema": {
"description": "The name of the specified database schema to read input data from.",
"type": "string"
},
"table": {
"description": "The name of the specified database table to read input data from.",
"type": "string"
},
"type": {
"description": "Type name for this intake type",
"enum": [
"jdbc"
],
"type": "string"
}
},
"required": [
"dataStoreId",
"type"
],
"type": "object"
}
Stream CSV data chunks from JDBC
Properties
Name |
Type |
Required |
Restrictions |
Description |
catalog |
string |
false |
|
The name of the specified database catalog to read input data from. |
credentialId |
string,null |
false |
|
The ID of the credential holding information about a user with read access to the JDBC data source. |
dataStoreId |
string |
true |
|
ID of the data store to connect to |
fetchSize |
integer |
false |
maximum: 1000000 minimum: 1
|
A user specified fetch size. Changing it can be used to balance throughput and memory usage. Deprecated and ignored since v2.21. |
query |
string |
false |
|
A self-supplied SELECT statement of the dataset you wish to score. Helpful for supplying a more fine-grained selection of data not achievable through specification of "table" and/or "schema" parameters exclusively.If this job is executed with a job definition, then template variables are available which will be substituted for timestamps: {{ current_run_timestamp }}, {{ last_completed_run_time }}, {{ last_scheduled_run_time }}, {{ next_scheduled_run_time }}, {{ current_run_time }} |
schema |
string |
false |
|
The name of the specified database schema to read input data from. |
table |
string |
false |
|
The name of the specified database table to read input data from. |
type |
string |
true |
|
Type name for this intake type |
Enumerated Values
JDBCOutput
{
"description": "Save CSV data chunks via JDBC",
"properties": {
"catalog": {
"description": "The name of the specified database catalog to write output data to.",
"type": "string",
"x-versionadded": "v2.22"
},
"commitInterval": {
"default": 600,
"description": "Defines a time interval in seconds between each commit is done to the JDBC source. If set to 0, the batch prediction operation will write the entire job before committing.",
"maximum": 86400,
"minimum": 0,
"type": "integer",
"x-versionadded": "v2.21"
},
"createTableIfNotExists": {
"default": false,
"description": "Attempt to create the table first if no existing one is detected, before writing data with the strategy defined in the `statementType` parameter.",
"type": "boolean",
"x-versionadded": "v2.24"
},
"credentialId": {
"description": "The ID of the credential holding information about a user with write access to the JDBC data source.",
"type": [
"string",
"null"
]
},
"dataStoreId": {
"description": "ID of the data store to connect to",
"type": "string"
},
"schema": {
"description": "The name of the specified database schema to write the results to.",
"type": "string"
},
"statementType": {
"description": "The statement type to use when writing the results. Deprecation Warning: Use of `create_table` is now discouraged. Use one of the other possibilities along with the parameter `createTableIfNotExists` set to `true`.",
"enum": [
"createTable",
"create_table",
"insert",
"insertUpdate",
"insert_update",
"update"
],
"type": "string"
},
"table": {
"description": "The name of the specified database table to write the results to.If this job is executed with a job definition, then template variables are available which will be substituted for timestamps: {{ current_run_timestamp }}, {{ last_completed_run_time }}, {{ last_scheduled_run_time }}, {{ next_scheduled_run_time }}, {{ current_run_time }}",
"type": "string"
},
"type": {
"description": "Type name for this intake type",
"enum": [
"jdbc"
],
"type": "string"
},
"updateColumns": {
"description": "The column names to be updated if statementType is set to either update or upsert.",
"items": {
"type": "string"
},
"maxItems": 100,
"type": "array"
},
"whereColumns": {
"description": "The column names to be used in the where clause if statementType is set to update or upsert.",
"items": {
"type": "string"
},
"maxItems": 100,
"type": "array"
}
},
"required": [
"dataStoreId",
"statementType",
"table",
"type"
],
"type": "object"
}
Save CSV data chunks via JDBC
Properties
Name |
Type |
Required |
Restrictions |
Description |
catalog |
string |
false |
|
The name of the specified database catalog to write output data to. |
commitInterval |
integer |
false |
maximum: 86400 minimum: 0
|
Defines a time interval in seconds between each commit is done to the JDBC source. If set to 0, the batch prediction operation will write the entire job before committing. |
createTableIfNotExists |
boolean |
false |
|
Attempt to create the table first if no existing one is detected, before writing data with the strategy defined in the statementType parameter. |
credentialId |
string,null |
false |
|
The ID of the credential holding information about a user with write access to the JDBC data source. |
dataStoreId |
string |
true |
|
ID of the data store to connect to |
schema |
string |
false |
|
The name of the specified database schema to write the results to. |
statementType |
string |
true |
|
The statement type to use when writing the results. Deprecation Warning: Use of create_table is now discouraged. Use one of the other possibilities along with the parameter createTableIfNotExists set to true . |
table |
string |
true |
|
The name of the specified database table to write the results to.If this job is executed with a job definition, then template variables are available which will be substituted for timestamps: {{ current_run_timestamp }}, {{ last_completed_run_time }}, {{ last_scheduled_run_time }}, {{ next_scheduled_run_time }}, {{ current_run_time }} |
type |
string |
true |
|
Type name for this intake type |
updateColumns |
[string] |
false |
maxItems: 100
|
The column names to be updated if statementType is set to either update or upsert. |
whereColumns |
[string] |
false |
maxItems: 100
|
The column names to be used in the where clause if statementType is set to update or upsert. |
Enumerated Values
Property |
Value |
statementType |
[createTable , create_table , insert , insertUpdate , insert_update , update ] |
type |
jdbc |
JdbcOutputAdaptor
{
"description": "Save CSV data chunks via JDBC",
"properties": {
"catalog": {
"description": "The name of the specified database catalog to write output data to.",
"type": "string",
"x-versionadded": "v2.22"
},
"commitInterval": {
"default": 600,
"description": "Defines a time interval in seconds between each commit is done to the JDBC source. If set to 0, the batch prediction operation will write the entire job before committing.",
"maximum": 86400,
"minimum": 0,
"type": "integer",
"x-versionadded": "v2.21"
},
"createTableIfNotExists": {
"default": false,
"description": "Attempt to create the table first if no existing one is detected, before writing data with the strategy defined in the `statementType` parameter.",
"type": "boolean",
"x-versionadded": "v2.24"
},
"credentialId": {
"description": "Either the populated value of the field or [redacted] due to permission settings",
"oneOf": [
{
"description": "The ID of the credential holding information about a user with write access to the JDBC data source.",
"type": [
"string",
"null"
]
},
{
"enum": [
"[redacted]"
],
"type": "string"
}
]
},
"dataStoreId": {
"description": "Either the populated value of the field or [redacted] due to permission settings",
"oneOf": [
{
"description": "ID of the data store to connect to",
"type": "string"
},
{
"enum": [
"[redacted]"
],
"type": "string"
}
]
},
"schema": {
"description": "The name of the specified database schema to write the results to.",
"type": "string"
},
"statementType": {
"description": "The statement type to use when writing the results. Deprecation Warning: Use of `create_table` is now discouraged. Use one of the other possibilities along with the parameter `createTableIfNotExists` set to `true`.",
"enum": [
"createTable",
"create_table",
"insert",
"insertUpdate",
"insert_update",
"update"
],
"type": "string"
},
"table": {
"description": "The name of the specified database table to write the results to.If this job is executed with a job definition, then template variables are available which will be substituted for timestamps: {{ current_run_timestamp }}, {{ last_completed_run_time }}, {{ last_scheduled_run_time }}, {{ next_scheduled_run_time }}, {{ current_run_time }}",
"type": "string"
},
"type": {
"description": "Type name for this intake type",
"enum": [
"jdbc"
],
"type": "string"
},
"updateColumns": {
"description": "The column names to be updated if statementType is set to either update or upsert.",
"items": {
"type": "string"
},
"maxItems": 100,
"type": "array"
},
"whereColumns": {
"description": "The column names to be used in the where clause if statementType is set to update or upsert.",
"items": {
"type": "string"
},
"maxItems": 100,
"type": "array"
}
},
"required": [
"dataStoreId",
"statementType",
"table",
"type"
],
"type": "object"
}
Save CSV data chunks via JDBC
Properties
Name |
Type |
Required |
Restrictions |
Description |
catalog |
string |
false |
|
The name of the specified database catalog to write output data to. |
commitInterval |
integer |
false |
maximum: 86400 minimum: 0
|
Defines a time interval in seconds between each commit is done to the JDBC source. If set to 0, the batch prediction operation will write the entire job before committing. |
createTableIfNotExists |
boolean |
false |
|
Attempt to create the table first if no existing one is detected, before writing data with the strategy defined in the statementType parameter. |
credentialId |
any |
false |
|
Either the populated value of the field or [redacted] due to permission settings |
oneOf
Name |
Type |
Required |
Restrictions |
Description |
» anonymous |
string,null |
false |
|
The ID of the credential holding information about a user with write access to the JDBC data source. |
xor
Name |
Type |
Required |
Restrictions |
Description |
» anonymous |
string |
false |
|
none |
continued
Name |
Type |
Required |
Restrictions |
Description |
dataStoreId |
any |
true |
|
Either the populated value of the field or [redacted] due to permission settings |
oneOf
Name |
Type |
Required |
Restrictions |
Description |
» anonymous |
string |
false |
|
ID of the data store to connect to |
xor
Name |
Type |
Required |
Restrictions |
Description |
» anonymous |
string |
false |
|
none |
continued
Name |
Type |
Required |
Restrictions |
Description |
schema |
string |
false |
|
The name of the specified database schema to write the results to. |
statementType |
string |
true |
|
The statement type to use when writing the results. Deprecation Warning: Use of create_table is now discouraged. Use one of the other possibilities along with the parameter createTableIfNotExists set to true . |
table |
string |
true |
|
The name of the specified database table to write the results to.If this job is executed with a job definition, then template variables are available which will be substituted for timestamps: {{ current_run_timestamp }}, {{ last_completed_run_time }}, {{ last_scheduled_run_time }}, {{ next_scheduled_run_time }}, {{ current_run_time }} |
type |
string |
true |
|
Type name for this intake type |
updateColumns |
[string] |
false |
maxItems: 100
|
The column names to be updated if statementType is set to either update or upsert. |
whereColumns |
[string] |
false |
maxItems: 100
|
The column names to be used in the where clause if statementType is set to update or upsert. |
Enumerated Values
Property |
Value |
anonymous |
[redacted] |
anonymous |
[redacted] |
statementType |
[createTable , create_table , insert , insertUpdate , insert_update , update ] |
type |
jdbc |
JobRunTimeBasedForecastPointPolicy
{
"description": "Forecast point policy",
"properties": {
"configuration": {
"description": "Customize if forecast point based on job run time needs to be shifted.",
"properties": {
"offset": {
"description": "Offset to apply to scheduled run time of the job in a ISO-8601 format toobtain a relative forecast point. Example of the positive offset 'P2DT5H3M', example of the negative offset '-P2DT5H4M'",
"format": "offset",
"type": "string"
}
},
"required": [
"offset"
],
"type": "object"
},
"type": {
"description": "Type of the forecast point policy. Forecast point will be based on the scheduled run time of the job or the current moment in UTC if job was launched manually. Run time can be adjusted backwards or forwards.",
"enum": [
"jobRunTimeBased"
],
"type": "string"
}
},
"required": [
"type"
],
"type": "object"
}
Forecast point policy
Properties
Name |
Type |
Required |
Restrictions |
Description |
configuration |
JobRunTimeBasedForecastPointPolicySettings |
false |
|
Customize if forecast point based on job run time needs to be shifted. |
type |
string |
true |
|
Type of the forecast point policy. Forecast point will be based on the scheduled run time of the job or the current moment in UTC if job was launched manually. Run time can be adjusted backwards or forwards. |
Enumerated Values
Property |
Value |
type |
jobRunTimeBased |
JobRunTimeBasedForecastPointPolicySettings
{
"description": "Customize if forecast point based on job run time needs to be shifted.",
"properties": {
"offset": {
"description": "Offset to apply to scheduled run time of the job in a ISO-8601 format toobtain a relative forecast point. Example of the positive offset 'P2DT5H3M', example of the negative offset '-P2DT5H4M'",
"format": "offset",
"type": "string"
}
},
"required": [
"offset"
],
"type": "object"
}
Customize if forecast point based on job run time needs to be shifted.
Properties
Name |
Type |
Required |
Restrictions |
Description |
offset |
string(offset) |
true |
|
Offset to apply to scheduled run time of the job in a ISO-8601 format toobtain a relative forecast point. Example of the positive offset 'P2DT5H3M', example of the negative offset '-P2DT5H4M' |
LocalFileDataStreamer
{
"description": "Stream CSV data chunks from local file storage",
"properties": {
"async": {
"description": "The default behavior (async: true) will still submit the job to the queue and start processing as soon as the upload is started.Setting it to false will postpone submitting the job to the queue until all data has been uploaded.This is helpful if the user is on a bad connection and bottlednecked by the upload speed. Instead of blocking the queue this will allow others to submit to the queue until the upload has finished.",
"type": [
"boolean",
"null"
],
"x-versionadded": "v2.28"
},
"multipart": {
"description": "specify if the data will be uploaded in multiple parts instead of a single file",
"type": "boolean",
"x-versionadded": "v2.27"
},
"type": {
"description": "Type name for this intake type",
"enum": [
"local_file",
"localFile"
],
"type": "string"
}
},
"required": [
"type"
],
"type": "object"
}
Stream CSV data chunks from local file storage
Properties
Name |
Type |
Required |
Restrictions |
Description |
async |
boolean,null |
false |
|
The default behavior (async: true) will still submit the job to the queue and start processing as soon as the upload is started.Setting it to false will postpone submitting the job to the queue until all data has been uploaded.This is helpful if the user is on a bad connection and bottlednecked by the upload speed. Instead of blocking the queue this will allow others to submit to the queue until the upload has finished. |
multipart |
boolean |
false |
|
specify if the data will be uploaded in multiple parts instead of a single file |
type |
string |
true |
|
Type name for this intake type |
Enumerated Values
Property |
Value |
type |
[local_file , localFile ] |
LocalFileIntake
{
"description": "Stream CSV data chunks from local file storage",
"properties": {
"async": {
"description": "The default behavior (async: true) will still submit the job to the queue and start processing as soon as the upload is started.Setting it to false will postpone submitting the job to the queue until all data has been uploaded.This is helpful if the user is on a bad connection and bottlednecked by the upload speed. Instead of blocking the queue this will allow others to submit to the queue until the upload has finished.",
"type": [
"boolean",
"null"
],
"x-versionadded": "v2.28"
},
"multipart": {
"description": "specify if the data will be uploaded in multiple parts instead of a single file",
"type": "boolean",
"x-versionadded": "v2.27"
},
"type": {
"description": "Type name for this intake type",
"enum": [
"local_file",
"localFile"
],
"type": "string"
}
},
"required": [
"type"
],
"type": "object"
}
Stream CSV data chunks from local file storage
Properties
Name |
Type |
Required |
Restrictions |
Description |
async |
boolean,null |
false |
|
The default behavior (async: true) will still submit the job to the queue and start processing as soon as the upload is started.Setting it to false will postpone submitting the job to the queue until all data has been uploaded.This is helpful if the user is on a bad connection and bottlednecked by the upload speed. Instead of blocking the queue this will allow others to submit to the queue until the upload has finished. |
multipart |
boolean |
false |
|
specify if the data will be uploaded in multiple parts instead of a single file |
type |
string |
true |
|
Type name for this intake type |
Enumerated Values
Property |
Value |
type |
[local_file , localFile ] |
LocalFileOutput
{
"description": "Save CSV data chunks to local file storage",
"properties": {
"type": {
"description": "Type name for this output type",
"enum": [
"local_file",
"localFile"
],
"type": "string"
}
},
"required": [
"type"
],
"type": "object"
}
Save CSV data chunks to local file storage
Properties
Name |
Type |
Required |
Restrictions |
Description |
type |
string |
true |
|
Type name for this output type |
Enumerated Values
Property |
Value |
type |
[local_file , localFile ] |
LocalFileOutputAdaptor
{
"description": "Save CSV data chunks to local file storage",
"properties": {
"type": {
"description": "Type name for this output type",
"enum": [
"local_file",
"localFile"
],
"type": "string"
}
},
"required": [
"type"
],
"type": "object"
}
Save CSV data chunks to local file storage
Properties
Name |
Type |
Required |
Restrictions |
Description |
type |
string |
true |
|
Type name for this output type |
Enumerated Values
Property |
Value |
type |
[local_file , localFile ] |
MonitoringAggregation
{
"description": "Defines the aggregation policy for monitoring jobs.",
"properties": {
"retentionPolicy": {
"default": "percentage",
"description": "Monitoring jobs retention policy for aggregation.",
"enum": [
"samples",
"percentage"
],
"type": "string"
},
"retentionValue": {
"default": 0,
"description": "Amount/percentage of samples to retain.",
"type": "integer"
}
},
"type": "object"
}
Defines the aggregation policy for monitoring jobs.
Properties
Name |
Type |
Required |
Restrictions |
Description |
retentionPolicy |
string |
false |
|
Monitoring jobs retention policy for aggregation. |
retentionValue |
integer |
false |
|
Amount/percentage of samples to retain. |
Enumerated Values
Property |
Value |
retentionPolicy |
[samples , percentage ] |
MonitoringColumnsMapping
{
"description": "Column names mapping for monitoring",
"properties": {
"actedUponColumn": {
"description": "Name of column that contains value for acted_on.",
"type": "string"
},
"actualsTimestampColumn": {
"description": "Name of column that contains actual timestamps.",
"type": "string"
},
"actualsValueColumn": {
"description": "Name of column that contains actuals value.",
"type": "string"
},
"associationIdColumn": {
"description": "Name of column that contains association Id.",
"type": "string"
},
"customMetricId": {
"description": "Id of custom metric to process values for.",
"type": "string"
},
"customMetricTimestampColumn": {
"description": "Name of column that contains custom metric values timestamps.",
"type": "string"
},
"customMetricTimestampFormat": {
"description": "Format of timestamps from customMetricTimestampColumn.",
"type": "string"
},
"customMetricValueColumn": {
"description": "Name of column that contains values for custom metric.",
"type": "string"
},
"monitoredStatusColumn": {
"description": "Column name used to mark monitored rows.",
"type": "string"
},
"predictionsColumns": {
"description": "Name of the column(s) which contain prediction values.",
"oneOf": [
{
"description": "Map containing column name(s) and class name(s) for multiclass problem.",
"items": {
"properties": {
"className": {
"description": "Class name.",
"type": "string"
},
"columnName": {
"description": "Column name that contains the prediction for a specific class.",
"type": "string"
}
},
"required": [
"className",
"columnName"
],
"type": "object"
},
"maxItems": 100,
"type": "array"
},
{
"description": "Column name that contains the prediction for regressions problem.",
"type": "string"
}
]
},
"reportDrift": {
"description": "True to report drift, False otherwise.",
"type": "boolean"
},
"reportPredictions": {
"description": "True to report prediction, False otherwise.",
"type": "boolean"
},
"uniqueRowIdentifierColumns": {
"description": "Column(s) name of unique row identifiers.",
"items": {
"type": "string"
},
"maxItems": 100,
"type": "array"
}
},
"type": "object"
}
Column names mapping for monitoring
Properties
Name |
Type |
Required |
Restrictions |
Description |
actedUponColumn |
string |
false |
|
Name of column that contains value for acted_on. |
actualsTimestampColumn |
string |
false |
|
Name of column that contains actual timestamps. |
actualsValueColumn |
string |
false |
|
Name of column that contains actuals value. |
associationIdColumn |
string |
false |
|
Name of column that contains association Id. |
customMetricId |
string |
false |
|
Id of custom metric to process values for. |
customMetricTimestampColumn |
string |
false |
|
Name of column that contains custom metric values timestamps. |
customMetricTimestampFormat |
string |
false |
|
Format of timestamps from customMetricTimestampColumn. |
customMetricValueColumn |
string |
false |
|
Name of column that contains values for custom metric. |
monitoredStatusColumn |
string |
false |
|
Column name used to mark monitored rows. |
predictionsColumns |
any |
false |
|
Name of the column(s) which contain prediction values. |
oneOf
Name |
Type |
Required |
Restrictions |
Description |
» anonymous |
[PredictionColumMap] |
false |
maxItems: 100
|
Map containing column name(s) and class name(s) for multiclass problem. |
xor
Name |
Type |
Required |
Restrictions |
Description |
» anonymous |
string |
false |
|
Column name that contains the prediction for regressions problem. |
continued
Name |
Type |
Required |
Restrictions |
Description |
reportDrift |
boolean |
false |
|
True to report drift, False otherwise. |
reportPredictions |
boolean |
false |
|
True to report prediction, False otherwise. |
uniqueRowIdentifierColumns |
[string] |
false |
maxItems: 100
|
Column(s) name of unique row identifiers. |
MonitoringOutputSettings
{
"description": "Output settings for monitoring jobs",
"properties": {
"monitoredStatusColumn": {
"description": "Column name used to mark monitored rows.",
"type": "string"
},
"uniqueRowIdentifierColumns": {
"description": "Column(s) name of unique row identifiers.",
"items": {
"type": "string"
},
"maxItems": 100,
"type": "array"
}
},
"required": [
"monitoredStatusColumn",
"uniqueRowIdentifierColumns"
],
"type": "object"
}
Output settings for monitoring jobs
Properties
Name |
Type |
Required |
Restrictions |
Description |
monitoredStatusColumn |
string |
true |
|
Column name used to mark monitored rows. |
uniqueRowIdentifierColumns |
[string] |
true |
maxItems: 100
|
Column(s) name of unique row identifiers. |
OAuthCredentials
{
"properties": {
"credentialType": {
"description": "The type of these credentials, 'oauth' here.",
"enum": [
"oauth"
],
"type": "string"
},
"oauthAccessToken": {
"default": null,
"description": "The oauth access token.",
"type": [
"string",
"null"
]
},
"oauthClientId": {
"default": null,
"description": "The oauth client ID.",
"type": [
"string",
"null"
]
},
"oauthClientSecret": {
"default": null,
"description": "The oauth client secret.",
"type": [
"string",
"null"
]
},
"oauthRefreshToken": {
"description": "The oauth refresh token.",
"type": "string"
}
},
"required": [
"credentialType",
"oauthRefreshToken"
],
"type": "object"
}
Properties
Name |
Type |
Required |
Restrictions |
Description |
credentialType |
string |
true |
|
The type of these credentials, 'oauth' here. |
oauthAccessToken |
string,null |
false |
|
The oauth access token. |
oauthClientId |
string,null |
false |
|
The oauth client ID. |
oauthClientSecret |
string,null |
false |
|
The oauth client secret. |
oauthRefreshToken |
string |
true |
|
The oauth refresh token. |
Enumerated Values
Property |
Value |
credentialType |
oauth |
PasswordCredentials
{
"properties": {
"catalogVersionId": {
"description": "The ID of the latest version of the catalog entry.",
"type": "string"
},
"password": {
"description": "The password (in cleartext) for database authentication. The password will be encrypted on the server side in scope of HTTP request and never saved or stored.",
"type": "string"
},
"url": {
"description": "The link to retrieve more detailed information about the entity that uses this catalog dataset.",
"type": "string"
},
"user": {
"description": "The username for database authentication.",
"type": "string"
}
},
"required": [
"password",
"user"
],
"type": "object"
}
Properties
Name |
Type |
Required |
Restrictions |
Description |
catalogVersionId |
string |
false |
|
The ID of the latest version of the catalog entry. |
password |
string |
true |
|
The password (in cleartext) for database authentication. The password will be encrypted on the server side in scope of HTTP request and never saved or stored. |
url |
string |
false |
|
The link to retrieve more detailed information about the entity that uses this catalog dataset. |
user |
string |
true |
|
The username for database authentication. |
PerNgramTextExplanations
{
"properties": {
"isUnknown": {
"description": "Whether the ngram is identifiable by the blueprint or not.",
"type": "boolean",
"x-versionadded": "v2.28"
},
"ngrams": {
"description": "List of JSON objects with the ngram starting index, ngram ending index and unknown ngram information.",
"items": {
"properties": {
"label": {
"description": "Describes what this model output corresponds to. For regression projects, it is the name of the target feature. For classification projects, it is a level from the target feature. For Anomaly Detection models it is an Anomaly Score.",
"type": "string"
},
"value": {
"description": "The output of the prediction. For regression projects, it is the predicted value of the target. For classification projects, it is the predicted probability the row belongs to the class identified by the label.",
"type": "number"
}
},
"required": [
"label",
"value"
],
"type": "object"
},
"maxItems": 1000,
"type": "array",
"x-versionadded": "v2.28"
},
"qualitativateStrength": {
"description": "A human-readable description of how strongly these ngrams's affected the prediction(e.g. '+++', '--', '+', '<+', '<-').",
"type": "string",
"x-versionadded": "v2.28"
},
"strength": {
"description": "The amount these ngrams's affected the prediction.",
"type": "number",
"x-versionadded": "v2.28"
}
},
"required": [
"isUnknown",
"ngrams",
"qualitativateStrength",
"strength"
],
"type": "object"
}
Properties
Name |
Type |
Required |
Restrictions |
Description |
isUnknown |
boolean |
true |
|
Whether the ngram is identifiable by the blueprint or not. |
ngrams |
[PredictionExplanationsPredictionValues] |
true |
maxItems: 1000
|
List of JSON objects with the ngram starting index, ngram ending index and unknown ngram information. |
qualitativateStrength |
string |
true |
|
A human-readable description of how strongly these ngrams's affected the prediction(e.g. '+++', '--', '+', '<+', '<-'). |
strength |
number |
true |
|
The amount these ngrams's affected the prediction. |
PredictJobDetailsResponse
{
"properties": {
"id": {
"description": "the job ID of the job",
"type": "string"
},
"isBlocked": {
"description": "True if a job is waiting for its dependencies to be resolved first.",
"type": "boolean"
},
"message": {
"description": "An optional message about the job",
"type": "string"
},
"modelId": {
"description": "The ID of the model",
"type": "string"
},
"projectId": {
"description": "the project the job belongs to",
"type": "string"
},
"status": {
"description": "the status of the job",
"enum": [
"queue",
"inprogress",
"error",
"ABORTED",
"COMPLETED"
],
"type": "string"
}
},
"required": [
"id",
"isBlocked",
"message",
"modelId",
"projectId",
"status"
],
"type": "object"
}
Properties
Name |
Type |
Required |
Restrictions |
Description |
id |
string |
true |
|
the job ID of the job |
isBlocked |
boolean |
true |
|
True if a job is waiting for its dependencies to be resolved first. |
message |
string |
true |
|
An optional message about the job |
modelId |
string |
true |
|
The ID of the model |
projectId |
string |
true |
|
the project the job belongs to |
status |
string |
true |
|
the status of the job |
Enumerated Values
Property |
Value |
status |
[queue , inprogress , error , ABORTED , COMPLETED ] |
PredictionArrayObjectValues
{
"description": "Predicted values",
"properties": {
"label": {
"description": "For regression problems this will be the name of the target column, 'Anomaly score' or ignored field. For classification projects this will be the name of the class.",
"oneOf": [
{
"type": "string"
},
{
"type": "number"
}
]
},
"threshold": {
"description": "Threshold used in multilabel classification for this class.",
"maximum": 1,
"minimum": 0,
"type": "number"
},
"value": {
"description": "The predicted probability of the class identified by the label.",
"type": "number"
}
},
"required": [
"label",
"value"
],
"type": "object"
}
Predicted values
Properties
Name |
Type |
Required |
Restrictions |
Description |
label |
any |
true |
|
For regression problems this will be the name of the target column, 'Anomaly score' or ignored field. For classification projects this will be the name of the class. |
oneOf
Name |
Type |
Required |
Restrictions |
Description |
» anonymous |
string |
false |
|
none |
xor
Name |
Type |
Required |
Restrictions |
Description |
» anonymous |
number |
false |
|
none |
continued
Name |
Type |
Required |
Restrictions |
Description |
threshold |
number |
false |
maximum: 1 minimum: 0
|
Threshold used in multilabel classification for this class. |
value |
number |
true |
|
The predicted probability of the class identified by the label. |
PredictionColumMap
{
"properties": {
"className": {
"description": "Class name.",
"type": "string"
},
"columnName": {
"description": "Column name that contains the prediction for a specific class.",
"type": "string"
}
},
"required": [
"className",
"columnName"
],
"type": "object"
}
Properties
Name |
Type |
Required |
Restrictions |
Description |
className |
string |
true |
|
Class name. |
columnName |
string |
true |
|
Column name that contains the prediction for a specific class. |
PredictionDataSource
{
"properties": {
"actualValueColumn": {
"description": "The actual value column name, valid for the prediction files if the project is unsupervised and the dataset is considered as bulk predictions dataset.",
"type": "string",
"x-versionadded": "v2.21"
},
"credentialData": {
"description": "The credentials to authenticate with the database, to use instead of user/password or credential ID.",
"oneOf": [
{
"properties": {
"credentialType": {
"description": "The type of these credentials, 'basic' here.",
"enum": [
"basic"
],
"type": "string"
},
"password": {
"description": "The password for database authentication. The password is encrypted at rest and never saved / stored.",
"type": "string"
},
"user": {
"description": "The username for database authentication.",
"type": "string"
}
},
"required": [
"credentialType",
"password",
"user"
],
"type": "object"
},
{
"properties": {
"awsAccessKeyId": {
"description": "The S3 AWS access key ID. Required if configId is not specified.Cannot include this parameter if configId is specified.",
"type": "string"
},
"awsSecretAccessKey": {
"description": "The S3 AWS secret access key. Required if configId is not specified.Cannot include this parameter if configId is specified.",
"type": "string"
},
"awsSessionToken": {
"default": null,
"description": "The S3 AWS session token for AWS temporary credentials.Cannot include this parameter if configId is specified.",
"type": [
"string",
"null"
]
},
"configId": {
"description": "ID of Secure configurations of credentials shared by admin.If specified, cannot include awsAccessKeyId, awsSecretAccessKey or awsSessionToken",
"type": "string"
},
"credentialType": {
"description": "The type of these credentials, 's3' here.",
"enum": [
"s3"
],
"type": "string"
}
},
"required": [
"credentialType"
],
"type": "object"
},
{
"properties": {
"credentialType": {
"description": "The type of these credentials, 'oauth' here.",
"enum": [
"oauth"
],
"type": "string"
},
"oauthAccessToken": {
"default": null,
"description": "The oauth access token.",
"type": [
"string",
"null"
]
},
"oauthClientId": {
"default": null,
"description": "The oauth client ID.",
"type": [
"string",
"null"
]
},
"oauthClientSecret": {
"default": null,
"description": "The oauth client secret.",
"type": [
"string",
"null"
]
},
"oauthRefreshToken": {
"description": "The oauth refresh token.",
"type": "string"
}
},
"required": [
"credentialType",
"oauthRefreshToken"
],
"type": "object"
}
],
"x-versionadded": "v2.23"
},
"credentialId": {
"description": "The credential ID to use for database authentication.",
"type": "string",
"x-versionadded": "v2.19"
},
"credentials": {
"description": "A list of credentials for the secondary datasets used in feature discovery project.",
"items": {
"oneOf": [
{
"properties": {
"catalogVersionId": {
"description": "The ID of the latest version of the catalog entry.",
"type": "string"
},
"password": {
"description": "The password (in cleartext) for database authentication. The password will be encrypted on the server side in scope of HTTP request and never saved or stored.",
"type": "string"
},
"url": {
"description": "The link to retrieve more detailed information about the entity that uses this catalog dataset.",
"type": "string"
},
"user": {
"description": "The username for database authentication.",
"type": "string"
}
},
"required": [
"password",
"user"
],
"type": "object"
},
{
"properties": {
"catalogVersionId": {
"description": "The ID of the latest version of the catalog entry.",
"type": "string"
},
"credentialId": {
"description": "The ID of the set of credentials to use instead of user and password. Note that with this change, username and password will become optional.",
"type": "string"
},
"url": {
"description": "The link to retrieve more detailed information about the entity that uses this catalog dataset.",
"type": "string"
}
},
"required": [
"credentialId"
],
"type": "object"
}
]
},
"maxItems": 30,
"type": "array",
"x-versionadded": "v2.19"
},
"dataSourceId": {
"description": "The ID of ``DataSource``.",
"type": "string"
},
"forecastPoint": {
"description": "For time series projects only. The time in the dataset relative to which predictions are generated. This value is optional. If not specified the default value is the value in the row with the latest specified timestamp. Specifying this value for a project that is not a time series project will result in an error.",
"format": "date-time",
"type": "string"
},
"password": {
"description": "The password (in cleartext) for database authentication. The password will be encrypted on the server side in scope of HTTP request and never saved or stored. DEPRECATED: please use ``credentialId`` or ``credentialData`` instead.",
"type": "string",
"x-versiondeprecated": "v2.23"
},
"predictionsEndDate": {
"description": "The end date for bulk predictions, exclusive. Used for time series projects only. Note that this parameter is used for generating historical predictions using the training data, not for future predictions. If not specified, the dataset is not considered as a bulk predictions dataset. This parameter should be provided in conjunction with a ``predictionsStartDate``, and cannot be provided with the ``forecastPoint`` parameter.",
"format": "date-time",
"type": "string"
},
"predictionsStartDate": {
"description": "The start date for bulk predictions. Used for time series projects only. Note that this parameter is used for generating historical predictions using the training data, not for future predictions. If not specified, the dataset is not considered as a bulk predictions dataset. This parameter should be provided in conjunction with a ``predictionsEndDate``, and cannot be provided with the ``forecastPoint`` parameter.",
"format": "date-time",
"type": "string"
},
"relaxKnownInAdvanceFeaturesCheck": {
"description": "For time series projects only. If true, missing values in the known in advance features are allowed in the forecast window at the prediction time. This value is optional. If omitted or false, missing values are not allowed.",
"type": "boolean",
"x-versionadded": "v2.15"
},
"secondaryDatasetsConfigId": {
"description": "For feature discovery projects only. The ID of the alternative secondary dataset config to use during prediction.",
"type": "string",
"x-versionadded": "v2.19"
},
"useKerberos": {
"default": false,
"description": "If true, use kerberos authentication for database authentication. Default is false.",
"type": "boolean",
"x-versionadded": "v2.19"
},
"user": {
"description": "The username for database authentication. DEPRECATED: please use ``credentialId`` or ``credentialData`` instead.",
"type": "string",
"x-versiondeprecated": "v2.23"
}
},
"required": [
"dataSourceId"
],
"type": "object"
}
Properties
Name |
Type |
Required |
Restrictions |
Description |
actualValueColumn |
string |
false |
|
The actual value column name, valid for the prediction files if the project is unsupervised and the dataset is considered as bulk predictions dataset. |
credentialData |
any |
false |
|
The credentials to authenticate with the database, to use instead of user/password or credential ID. |
oneOf
Name |
Type |
Required |
Restrictions |
Description |
» anonymous |
BasicCredentials |
false |
|
none |
xor
Name |
Type |
Required |
Restrictions |
Description |
» anonymous |
S3Credentials |
false |
|
none |
xor
Name |
Type |
Required |
Restrictions |
Description |
» anonymous |
OAuthCredentials |
false |
|
none |
continued
Name |
Type |
Required |
Restrictions |
Description |
credentialId |
string |
false |
|
The credential ID to use for database authentication. |
credentials |
[oneOf] |
false |
maxItems: 30
|
A list of credentials for the secondary datasets used in feature discovery project. |
oneOf
xor
Name |
Type |
Required |
Restrictions |
Description |
» anonymous |
CredentialId |
false |
|
none |
continued
Name |
Type |
Required |
Restrictions |
Description |
dataSourceId |
string |
true |
|
The ID of DataSource . |
forecastPoint |
string(date-time) |
false |
|
For time series projects only. The time in the dataset relative to which predictions are generated. This value is optional. If not specified the default value is the value in the row with the latest specified timestamp. Specifying this value for a project that is not a time series project will result in an error. |
password |
string |
false |
|
The password (in cleartext) for database authentication. The password will be encrypted on the server side in scope of HTTP request and never saved or stored. DEPRECATED: please use credentialId or credentialData instead. |
predictionsEndDate |
string(date-time) |
false |
|
The end date for bulk predictions, exclusive. Used for time series projects only. Note that this parameter is used for generating historical predictions using the training data, not for future predictions. If not specified, the dataset is not considered as a bulk predictions dataset. This parameter should be provided in conjunction with a predictionsStartDate , and cannot be provided with the forecastPoint parameter. |
predictionsStartDate |
string(date-time) |
false |
|
The start date for bulk predictions. Used for time series projects only. Note that this parameter is used for generating historical predictions using the training data, not for future predictions. If not specified, the dataset is not considered as a bulk predictions dataset. This parameter should be provided in conjunction with a predictionsEndDate , and cannot be provided with the forecastPoint parameter. |
relaxKnownInAdvanceFeaturesCheck |
boolean |
false |
|
For time series projects only. If true, missing values in the known in advance features are allowed in the forecast window at the prediction time. This value is optional. If omitted or false, missing values are not allowed. |
secondaryDatasetsConfigId |
string |
false |
|
For feature discovery projects only. The ID of the alternative secondary dataset config to use during prediction. |
useKerberos |
boolean |
false |
|
If true, use kerberos authentication for database authentication. Default is false. |
user |
string |
false |
|
The username for database authentication. DEPRECATED: please use credentialId or credentialData instead. |
PredictionDatasetListControllerResponse
{
"properties": {
"count": {
"description": "The number of items returned on this page.",
"minimum": 0,
"type": "integer"
},
"data": {
"description": "Each has the same schema as if retrieving the dataset individually from [GET /api/v2/projects/{projectId}/predictionDatasets/{datasetId}/][get-apiv2projectsprojectidpredictiondatasetsdatasetid]",
"items": {
"properties": {
"actualValueColumn": {
"description": "Optional, only available for unsupervised projects, in case dataset was uploaded with actual value column specified. Name of the column which will be used to calculate the classification metrics and insights.",
"type": [
"string",
"null"
],
"x-versionadded": "v2.21"
},
"catalogId": {
"description": "The ID of the AI catalog entry used to create the prediction, dataset or None if not created from the AI catalog.",
"type": [
"string",
"null"
],
"x-versionadded": "v2.21"
},
"catalogVersionId": {
"description": "The ID of the AI catalog version used to create the prediction dataset, or None if not created from the AI catalog.",
"type": [
"string",
"null"
],
"x-versionadded": "v2.21"
},
"containsTargetValues": {
"description": "If True, dataset contains target values and can be used to calculate the classification metrics and insights. Only applies for supervised projects.",
"type": [
"boolean",
"null"
],
"x-versionadded": "v2.21"
},
"created": {
"description": "The date string of when the dataset was created, of the format`YYYY-mm-ddTHH:MM:SS.ssssssZ`, like ``2016-06-09T11:32:34.170338Z``.",
"format": "date-time",
"type": "string"
},
"dataEndDate": {
"description": "Only available for time series projects, a date string representing the maximum primary date of the prediction dataset.",
"format": "date-time",
"type": "string",
"x-versionadded": "v2.20"
},
"dataQualityWarnings": {
"description": "A Json object of available warnings about potential problems in this prediction dataset. Empty if no warnings.",
"properties": {
"hasKiaMissingValuesInForecastWindow": {
"description": "If true, known-in-advance features in this dataset have missing values in the forecast window. Absence of the known-in-advance values can negatively impact prediction quality. Only applies for time series projects.",
"type": "boolean",
"x-versionadded": "v2.15"
},
"insufficientRowsForEvaluatingModels": {
"description": "If true, the dataset has a target column present indicating it can be used to evaluate model performance but too few rows to be trustworthy in so doing. If false, either it has no target column at all or it has sufficient rows for model evaluation. Only applies for regression, binary classification, multiclass classification projects and time series unsupervised projects.",
"type": "boolean",
"x-versionadded": "v2.19"
},
"singleClassActualValueColumn": {
"description": "If true, actual value column has only one class and such insights as ROC curve can not be calculated. Only applies for binary classification projects or unsupervised projects.",
"type": "boolean",
"x-versionadded": "v2.21"
}
},
"type": "object"
},
"dataStartDate": {
"description": "Only available for time series projects, a date string representing the minimum primary date of the prediction dataset.",
"format": "date-time",
"type": "string",
"x-versionadded": "v2.20"
},
"detectedActualValueColumns": {
"description": "Only available for unsupervised projects, a list of detected `actualValueColumnInfo` objects which can be used to calculate the classification metrics and insights.",
"items": {
"properties": {
"missingCount": {
"description": "Count of the missing values in the column.",
"type": "integer",
"x-versionadded": "v2.21"
},
"name": {
"description": "Name of the column.",
"type": "string",
"x-versionadded": "v2.21"
}
},
"required": [
"missingCount",
"name"
],
"type": "object"
},
"type": "array",
"x-versionadded": "v2.21"
},
"forecastPoint": {
"description": "The date string of the forecastPoint of this prediction dataset. Only non-null for time series projects.",
"type": [
"string",
"null"
],
"x-versionadded": "v2.8"
},
"forecastPointRange": {
"description": "Only available for time series projects, the start and end of the range of dates available for use as the forecast point, detected based on the uploaded prediction dataset.",
"items": {
"description": "Date string of a forecast point.",
"format": "date-time",
"type": "string"
},
"type": "array",
"x-versionadded": "v2.20"
},
"id": {
"description": "The ID of this dataset.",
"type": "string"
},
"maxForecastDate": {
"description": "Only available for time series projects, a date string representing the maximum forecast date of this prediction dataset.",
"format": "date-time",
"type": "string",
"x-versionadded": "v2.20"
},
"name": {
"description": "The name of the dataset when it was uploaded.",
"type": "string"
},
"numColumns": {
"description": "The number of columns in this dataset.",
"type": "integer"
},
"numRows": {
"description": "The number of rows in this dataset.",
"type": "integer"
},
"predictionsEndDate": {
"description": "The date string of the prediction end date of this prediction dataset. Used for bulk predictions. Note that this parameter is for generating historical predictions using the training data. Only non-null for time series projects.",
"format": "date-time",
"type": [
"string",
"null"
]
},
"predictionsStartDate": {
"description": "The date string of the prediction start date of this prediction dataset. Used for bulk predictions. Note that this parameter is for generating historical predictions using the training data. Only non-null for time series projects.",
"format": "date-time",
"type": [
"string",
"null"
]
},
"projectId": {
"description": "The project ID that owns this dataset.",
"type": "string"
},
"secondaryDatasetsConfigId": {
"description": "Only available for Feature discovery projects. Id of the secondary dataset config used by the dataset for the prediction.",
"type": "string",
"x-versionadded": "v2.21"
}
},
"required": [
"catalogId",
"catalogVersionId",
"created",
"dataQualityWarnings",
"forecastPoint",
"id",
"name",
"numColumns",
"numRows",
"predictionsEndDate",
"predictionsStartDate",
"projectId"
],
"type": "object"
},
"type": "array"
},
"next": {
"description": "A URL pointing to the next page (if `null`, there is no next page).",
"type": [
"string",
"null"
]
},
"previous": {
"description": "A URL pointing to the previous page (if `null`, there is no previous page).",
"type": [
"string",
"null"
]
}
},
"required": [
"count",
"data",
"next",
"previous"
],
"type": "object"
}
Properties
Name |
Type |
Required |
Restrictions |
Description |
count |
integer |
true |
minimum: 0
|
The number of items returned on this page. |
data |
[PredictionDatasetRetrieveResponse] |
true |
|
Each has the same schema as if retrieving the dataset individually from GET /api/v2/projects/{projectId}/predictionDatasets/{datasetId}/ |
next |
string,null |
true |
|
A URL pointing to the next page (if null , there is no next page). |
previous |
string,null |
true |
|
A URL pointing to the previous page (if null , there is no previous page). |
PredictionDatasetRetrieveResponse
{
"properties": {
"actualValueColumn": {
"description": "Optional, only available for unsupervised projects, in case dataset was uploaded with actual value column specified. Name of the column which will be used to calculate the classification metrics and insights.",
"type": [
"string",
"null"
],
"x-versionadded": "v2.21"
},
"catalogId": {
"description": "The ID of the AI catalog entry used to create the prediction, dataset or None if not created from the AI catalog.",
"type": [
"string",
"null"
],
"x-versionadded": "v2.21"
},
"catalogVersionId": {
"description": "The ID of the AI catalog version used to create the prediction dataset, or None if not created from the AI catalog.",
"type": [
"string",
"null"
],
"x-versionadded": "v2.21"
},
"containsTargetValues": {
"description": "If True, dataset contains target values and can be used to calculate the classification metrics and insights. Only applies for supervised projects.",
"type": [
"boolean",
"null"
],
"x-versionadded": "v2.21"
},
"created": {
"description": "The date string of when the dataset was created, of the format`YYYY-mm-ddTHH:MM:SS.ssssssZ`, like ``2016-06-09T11:32:34.170338Z``.",
"format": "date-time",
"type": "string"
},
"dataEndDate": {
"description": "Only available for time series projects, a date string representing the maximum primary date of the prediction dataset.",
"format": "date-time",
"type": "string",
"x-versionadded": "v2.20"
},
"dataQualityWarnings": {
"description": "A Json object of available warnings about potential problems in this prediction dataset. Empty if no warnings.",
"properties": {
"hasKiaMissingValuesInForecastWindow": {
"description": "If true, known-in-advance features in this dataset have missing values in the forecast window. Absence of the known-in-advance values can negatively impact prediction quality. Only applies for time series projects.",
"type": "boolean",
"x-versionadded": "v2.15"
},
"insufficientRowsForEvaluatingModels": {
"description": "If true, the dataset has a target column present indicating it can be used to evaluate model performance but too few rows to be trustworthy in so doing. If false, either it has no target column at all or it has sufficient rows for model evaluation. Only applies for regression, binary classification, multiclass classification projects and time series unsupervised projects.",
"type": "boolean",
"x-versionadded": "v2.19"
},
"singleClassActualValueColumn": {
"description": "If true, actual value column has only one class and such insights as ROC curve can not be calculated. Only applies for binary classification projects or unsupervised projects.",
"type": "boolean",
"x-versionadded": "v2.21"
}
},
"type": "object"
},
"dataStartDate": {
"description": "Only available for time series projects, a date string representing the minimum primary date of the prediction dataset.",
"format": "date-time",
"type": "string",
"x-versionadded": "v2.20"
},
"detectedActualValueColumns": {
"description": "Only available for unsupervised projects, a list of detected `actualValueColumnInfo` objects which can be used to calculate the classification metrics and insights.",
"items": {
"properties": {
"missingCount": {
"description": "Count of the missing values in the column.",
"type": "integer",
"x-versionadded": "v2.21"
},
"name": {
"description": "Name of the column.",
"type": "string",
"x-versionadded": "v2.21"
}
},
"required": [
"missingCount",
"name"
],
"type": "object"
},
"type": "array",
"x-versionadded": "v2.21"
},
"forecastPoint": {
"description": "The date string of the forecastPoint of this prediction dataset. Only non-null for time series projects.",
"type": [
"string",
"null"
],
"x-versionadded": "v2.8"
},
"forecastPointRange": {
"description": "Only available for time series projects, the start and end of the range of dates available for use as the forecast point, detected based on the uploaded prediction dataset.",
"items": {
"description": "Date string of a forecast point.",
"format": "date-time",
"type": "string"
},
"type": "array",
"x-versionadded": "v2.20"
},
"id": {
"description": "The ID of this dataset.",
"type": "string"
},
"maxForecastDate": {
"description": "Only available for time series projects, a date string representing the maximum forecast date of this prediction dataset.",
"format": "date-time",
"type": "string",
"x-versionadded": "v2.20"
},
"name": {
"description": "The name of the dataset when it was uploaded.",
"type": "string"
},
"numColumns": {
"description": "The number of columns in this dataset.",
"type": "integer"
},
"numRows": {
"description": "The number of rows in this dataset.",
"type": "integer"
},
"predictionsEndDate": {
"description": "The date string of the prediction end date of this prediction dataset. Used for bulk predictions. Note that this parameter is for generating historical predictions using the training data. Only non-null for time series projects.",
"format": "date-time",
"type": [
"string",
"null"
]
},
"predictionsStartDate": {
"description": "The date string of the prediction start date of this prediction dataset. Used for bulk predictions. Note that this parameter is for generating historical predictions using the training data. Only non-null for time series projects.",
"format": "date-time",
"type": [
"string",
"null"
]
},
"projectId": {
"description": "The project ID that owns this dataset.",
"type": "string"
},
"secondaryDatasetsConfigId": {
"description": "Only available for Feature discovery projects. Id of the secondary dataset config used by the dataset for the prediction.",
"type": "string",
"x-versionadded": "v2.21"
}
},
"required": [
"catalogId",
"catalogVersionId",
"created",
"dataQualityWarnings",
"forecastPoint",
"id",
"name",
"numColumns",
"numRows",
"predictionsEndDate",
"predictionsStartDate",
"projectId"
],
"type": "object"
}
Properties
Name |
Type |
Required |
Restrictions |
Description |
actualValueColumn |
string,null |
false |
|
Optional, only available for unsupervised projects, in case dataset was uploaded with actual value column specified. Name of the column which will be used to calculate the classification metrics and insights. |
catalogId |
string,null |
true |
|
The ID of the AI catalog entry used to create the prediction, dataset or None if not created from the AI catalog. |
catalogVersionId |
string,null |
true |
|
The ID of the AI catalog version used to create the prediction dataset, or None if not created from the AI catalog. |
containsTargetValues |
boolean,null |
false |
|
If True, dataset contains target values and can be used to calculate the classification metrics and insights. Only applies for supervised projects. |
created |
string(date-time) |
true |
|
The date string of when the dataset was created, of the formatYYYY-mm-ddTHH:MM:SS.ssssssZ , like 2016-06-09T11:32:34.170338Z . |
dataEndDate |
string(date-time) |
false |
|
Only available for time series projects, a date string representing the maximum primary date of the prediction dataset. |
dataQualityWarnings |
DataQualityWarningsRecord |
true |
|
A Json object of available warnings about potential problems in this prediction dataset. Empty if no warnings. |
dataStartDate |
string(date-time) |
false |
|
Only available for time series projects, a date string representing the minimum primary date of the prediction dataset. |
detectedActualValueColumns |
[ActualValueColumnInfo] |
false |
|
Only available for unsupervised projects, a list of detected actualValueColumnInfo objects which can be used to calculate the classification metrics and insights. |
forecastPoint |
string,null |
true |
|
The date string of the forecastPoint of this prediction dataset. Only non-null for time series projects. |
forecastPointRange |
[string] |
false |
|
Only available for time series projects, the start and end of the range of dates available for use as the forecast point, detected based on the uploaded prediction dataset. |
id |
string |
true |
|
The ID of this dataset. |
maxForecastDate |
string(date-time) |
false |
|
Only available for time series projects, a date string representing the maximum forecast date of this prediction dataset. |
name |
string |
true |
|
The name of the dataset when it was uploaded. |
numColumns |
integer |
true |
|
The number of columns in this dataset. |
numRows |
integer |
true |
|
The number of rows in this dataset. |
predictionsEndDate |
string,null(date-time) |
true |
|
The date string of the prediction end date of this prediction dataset. Used for bulk predictions. Note that this parameter is for generating historical predictions using the training data. Only non-null for time series projects. |
predictionsStartDate |
string,null(date-time) |
true |
|
The date string of the prediction start date of this prediction dataset. Used for bulk predictions. Note that this parameter is for generating historical predictions using the training data. Only non-null for time series projects. |
projectId |
string |
true |
|
The project ID that owns this dataset. |
secondaryDatasetsConfigId |
string |
false |
|
Only available for Feature discovery projects. Id of the secondary dataset config used by the dataset for the prediction. |
PredictionExplanation
{
"properties": {
"feature": {
"description": "The name of the feature contributing to the prediction.",
"type": "string"
},
"featureValue": {
"description": "The value the feature took on for this row. For image features, this value is the URL of the input image (New in v2.21).",
"type": "string"
},
"imageExplanationUrl": {
"description": "For image features, the URL of the image containing the input image overlaid by the activation heatmap. For non-image features, this field is null.",
"type": [
"string",
"null"
],
"x-versionadded": "v2.21"
},
"label": {
"description": "Describes what this model output corresponds to. For regression projects, it is the name of the target feature. For classification projects, it is a level from the target feature. For Anomaly Detection models it is an Anomaly Score.",
"type": "string"
},
"perNgramTextExplanations": {
"description": "For text features, an array of JSON object containing the per ngram based text prediction explanations.",
"items": {
"properties": {
"isUnknown": {
"description": "Whether the ngram is identifiable by the blueprint or not.",
"type": "boolean",
"x-versionadded": "v2.28"
},
"ngrams": {
"description": "List of JSON objects with the ngram starting index, ngram ending index and unknown ngram information.",
"items": {
"properties": {
"label": {
"description": "Describes what this model output corresponds to. For regression projects, it is the name of the target feature. For classification projects, it is a level from the target feature. For Anomaly Detection models it is an Anomaly Score.",
"type": "string"
},
"value": {
"description": "The output of the prediction. For regression projects, it is the predicted value of the target. For classification projects, it is the predicted probability the row belongs to the class identified by the label.",
"type": "number"
}
},
"required": [
"label",
"value"
],
"type": "object"
},
"maxItems": 1000,
"type": "array",
"x-versionadded": "v2.28"
},
"qualitativateStrength": {
"description": "A human-readable description of how strongly these ngrams's affected the prediction(e.g. '+++', '--', '+', '<+', '<-').",
"type": "string",
"x-versionadded": "v2.28"
},
"strength": {
"description": "The amount these ngrams's affected the prediction.",
"type": "number",
"x-versionadded": "v2.28"
}
},
"required": [
"isUnknown",
"ngrams",
"qualitativateStrength",
"strength"
],
"type": "object"
},
"maxItems": 10000,
"type": "array",
"x-versionadded": "v2.28"
},
"qualitativateStrength": {
"description": "A human-readable description of how strongly the feature affected the prediction. A large positive effect is denoted '+++', medium '++', small '+', very small '<+'. A large negative effect is denoted '---', medium '--', small '-', very small '<-'.",
"type": "string"
},
"strength": {
"description": "The amount this feature's value affected the prediction.",
"type": "number"
}
},
"required": [
"feature",
"featureValue",
"imageExplanationUrl",
"label",
"qualitativateStrength",
"strength"
],
"type": "object"
}
Properties
Name |
Type |
Required |
Restrictions |
Description |
feature |
string |
true |
|
The name of the feature contributing to the prediction. |
featureValue |
string |
true |
|
The value the feature took on for this row. For image features, this value is the URL of the input image (New in v2.21). |
imageExplanationUrl |
string,null |
true |
|
For image features, the URL of the image containing the input image overlaid by the activation heatmap. For non-image features, this field is null. |
label |
string |
true |
|
Describes what this model output corresponds to. For regression projects, it is the name of the target feature. For classification projects, it is a level from the target feature. For Anomaly Detection models it is an Anomaly Score. |
perNgramTextExplanations |
[PerNgramTextExplanations] |
false |
maxItems: 10000
|
For text features, an array of JSON object containing the per ngram based text prediction explanations. |
qualitativateStrength |
string |
true |
|
A human-readable description of how strongly the feature affected the prediction. A large positive effect is denoted '+, medium', small '+', very small '<+'. A large negative effect is denoted '---', medium '--', small '-', very small '<-'. |
strength |
number |
true |
|
The amount this feature's value affected the prediction. |
PredictionExplanationsCreate
{
"properties": {
"classNames": {
"description": "List of class names that will be explained for each row for multiclass. Mutually exclusive with numTopClasses. If neither specified - we assume numTopClasses=1.",
"items": {
"type": "string"
},
"maxItems": 10,
"type": "array",
"x-versionadded": "v2.29"
},
"datasetId": {
"description": "The dataset ID.",
"type": "string"
},
"maxExplanations": {
"default": 3,
"description": "The maximum number of prediction explanations to supply per row of the dataset.",
"maximum": 10,
"minimum": 0,
"type": "integer"
},
"modelId": {
"description": "The model ID.",
"type": "string"
},
"numTopClasses": {
"description": "Number of top predicted classes for each row that will be explained for multiclass. Mutually exclusive with classNames. If neither specified - we assume numTopClasses=1.",
"maximum": 10,
"minimum": 1,
"type": "integer",
"x-versionadded": "v2.29"
},
"thresholdHigh": {
"default": null,
"description": "The high threshold, above which a prediction must score in order for prediction explanations to be computed. If neither thresholdHigh nor thresholdLow is specified, prediction explanations will be computed for all rows.",
"type": [
"number",
"null"
]
},
"thresholdLow": {
"default": null,
"description": "The lower threshold, below which a prediction must score in order for prediction explanations to be computed for a row in the dataset. If neither thresholdHigh nor thresholdLow is specified, prediction explanations will be computed for all rows.",
"type": [
"number",
"null"
]
}
},
"required": [
"datasetId",
"modelId"
],
"type": "object"
}
Properties
Name |
Type |
Required |
Restrictions |
Description |
classNames |
[string] |
false |
maxItems: 10
|
List of class names that will be explained for each row for multiclass. Mutually exclusive with numTopClasses. If neither specified - we assume numTopClasses=1. |
datasetId |
string |
true |
|
The dataset ID. |
maxExplanations |
integer |
false |
maximum: 10 minimum: 0
|
The maximum number of prediction explanations to supply per row of the dataset. |
modelId |
string |
true |
|
The model ID. |
numTopClasses |
integer |
false |
maximum: 10 minimum: 1
|
Number of top predicted classes for each row that will be explained for multiclass. Mutually exclusive with classNames. If neither specified - we assume numTopClasses=1. |
thresholdHigh |
number,null |
false |
|
The high threshold, above which a prediction must score in order for prediction explanations to be computed. If neither thresholdHigh nor thresholdLow is specified, prediction explanations will be computed for all rows. |
thresholdLow |
number,null |
false |
|
The lower threshold, below which a prediction must score in order for prediction explanations to be computed for a row in the dataset. If neither thresholdHigh nor thresholdLow is specified, prediction explanations will be computed for all rows. |
PredictionExplanationsInitializationCreate
{
"properties": {
"maxExplanations": {
"default": 3,
"description": "The maximum number of prediction explanations to supply per row of the dataset.",
"maximum": 10,
"minimum": 1,
"type": "integer"
},
"thresholdHigh": {
"default": null,
"description": "The high threshold, above which a prediction must score in order for prediction explanations to be computed. If neither thresholdHigh nor thresholdLow is specified, prediction explanations will be computed for all rows.",
"type": [
"number",
"null"
]
},
"thresholdLow": {
"default": null,
"description": "The lower threshold, below which a prediction must score in order for prediction explanations to be computed for a row in the dataset. If neither thresholdHigh nor thresholdLow is specified, prediction explanations will be computed for all rows.",
"type": [
"number",
"null"
]
}
},
"type": "object"
}
Properties
Name |
Type |
Required |
Restrictions |
Description |
maxExplanations |
integer |
false |
maximum: 10 minimum: 1
|
The maximum number of prediction explanations to supply per row of the dataset. |
thresholdHigh |
number,null |
false |
|
The high threshold, above which a prediction must score in order for prediction explanations to be computed. If neither thresholdHigh nor thresholdLow is specified, prediction explanations will be computed for all rows. |
thresholdLow |
number,null |
false |
|
The lower threshold, below which a prediction must score in order for prediction explanations to be computed for a row in the dataset. If neither thresholdHigh nor thresholdLow is specified, prediction explanations will be computed for all rows. |
PredictionExplanationsInitializationRetrieve
{
"properties": {
"modelId": {
"description": "The model ID.",
"type": "string"
},
"predictionExplanationsSample": {
"description": "Each is a PredictionExplanationsRow. They represent a small sample of prediction explanations that could be generated for a particular dataset. They will have the same schema as the `data` array in the response from [GET /api/v2/projects/{projectId}/predictionExplanations/{predictionExplanationsId}/][get-apiv2projectsprojectidpredictionexplanationspredictionexplanationsid]. As of v2.21 only difference is that there is no forecastPoint in response for time series projects.",
"items": {
"properties": {
"adjustedPrediction": {
"description": "The exposure-adjusted output of the model for this row.",
"type": "number",
"x-versionadded": "v2.8"
},
"adjustedPredictionValues": {
"description": "The exposure-adjusted output of the model for this row.",
"items": {
"properties": {
"label": {
"description": "Describes what this model output corresponds to. For regression projects, it is the name of the target feature. For classification projects, it is a level from the target feature. For Anomaly Detection models it is an Anomaly Score.",
"type": "string"
},
"value": {
"description": "The output of the prediction. For regression projects, it is the predicted value of the target. For classification projects, it is the predicted probability the row belongs to the class identified by the label.",
"type": "number"
}
},
"required": [
"label",
"value"
],
"type": "object"
},
"type": "array",
"x-versionadded": "v2.8"
},
"forecastDistance": {
"description": "Forecast distance for the row. For time series projects only.",
"type": "integer",
"x-versionadded": "v2.21"
},
"forecastPoint": {
"description": "Forecast point for the row. For time series projects only.",
"type": "string",
"x-versionadded": "v2.21"
},
"prediction": {
"description": "The output of the model for this row.",
"type": "number"
},
"predictionExplanations": {
"description": "A list of prediction explanations.",
"items": {
"properties": {
"feature": {
"description": "The name of the feature contributing to the prediction.",
"type": "string"
},
"featureValue": {
"description": "The value the feature took on for this row. For image features, this value is the URL of the input image (New in v2.21).",
"type": "string"
},
"imageExplanationUrl": {
"description": "For image features, the URL of the image containing the input image overlaid by the activation heatmap. For non-image features, this field is null.",
"type": [
"string",
"null"
],
"x-versionadded": "v2.21"
},
"label": {
"description": "Describes what this model output corresponds to. For regression projects, it is the name of the target feature. For classification projects, it is a level from the target feature. For Anomaly Detection models it is an Anomaly Score.",
"type": "string"
},
"perNgramTextExplanations": {
"description": "For text features, an array of JSON object containing the per ngram based text prediction explanations.",
"items": {
"properties": {
"isUnknown": {
"description": "Whether the ngram is identifiable by the blueprint or not.",
"type": "boolean",
"x-versionadded": "v2.28"
},
"ngrams": {
"description": "List of JSON objects with the ngram starting index, ngram ending index and unknown ngram information.",
"items": {
"properties": {
"label": {
"description": "Describes what this model output corresponds to. For regression projects, it is the name of the target feature. For classification projects, it is a level from the target feature. For Anomaly Detection models it is an Anomaly Score.",
"type": "string"
},
"value": {
"description": "The output of the prediction. For regression projects, it is the predicted value of the target. For classification projects, it is the predicted probability the row belongs to the class identified by the label.",
"type": "number"
}
},
"required": [
"label",
"value"
],
"type": "object"
},
"maxItems": 1000,
"type": "array",
"x-versionadded": "v2.28"
},
"qualitativateStrength": {
"description": "A human-readable description of how strongly these ngrams's affected the prediction(e.g. '+++', '--', '+', '<+', '<-').",
"type": "string",
"x-versionadded": "v2.28"
},
"strength": {
"description": "The amount these ngrams's affected the prediction.",
"type": "number",
"x-versionadded": "v2.28"
}
},
"required": [
"isUnknown",
"ngrams",
"qualitativateStrength",
"strength"
],
"type": "object"
},
"maxItems": 10000,
"type": "array",
"x-versionadded": "v2.28"
},
"qualitativateStrength": {
"description": "A human-readable description of how strongly the feature affected the prediction. A large positive effect is denoted '+++', medium '++', small '+', very small '<+'. A large negative effect is denoted '---', medium '--', small '-', very small '<-'.",
"type": "string"
},
"strength": {
"description": "The amount this feature's value affected the prediction.",
"type": "number"
}
},
"required": [
"feature",
"featureValue",
"imageExplanationUrl",
"label",
"qualitativateStrength",
"strength"
],
"type": "object"
},
"type": "array"
},
"predictionThreshold": {
"description": "The threshold value used for classification prediction.",
"type": [
"number",
"null"
]
},
"predictionValues": {
"description": "A list of prediction values.",
"items": {
"properties": {
"label": {
"description": "Describes what this model output corresponds to. For regression projects, it is the name of the target feature. For classification projects, it is a level from the target feature. For Anomaly Detection models it is an Anomaly Score.",
"type": "string"
},
"value": {
"description": "The output of the prediction. For regression projects, it is the predicted value of the target. For classification projects, it is the predicted probability the row belongs to the class identified by the label.",
"type": "number"
}
},
"required": [
"label",
"value"
],
"type": "object"
},
"type": "array"
},
"rowId": {
"description": "Which row this PredictionExplanationsRow describes.",
"type": "integer"
},
"seriesId": {
"description": "The ID of the series value for the row in a multiseries project. For a single series project this will be null. For time series projects only.",
"type": [
"string",
"null"
],
"x-versionadded": "v2.21"
},
"timestamp": {
"description": "Timestamp for the row. For time series projects only.",
"type": "string",
"x-versionadded": "v2.21"
}
},
"required": [
"adjustedPrediction",
"adjustedPredictionValues",
"forecastDistance",
"forecastPoint",
"prediction",
"predictionExplanations",
"predictionThreshold",
"predictionValues",
"rowId",
"seriesId",
"timestamp"
],
"type": "object"
},
"type": "array"
},
"projectId": {
"description": "The project ID.",
"type": "string"
}
},
"required": [
"modelId",
"predictionExplanationsSample",
"projectId"
],
"type": "object"
}
Properties
Name |
Type |
Required |
Restrictions |
Description |
modelId |
string |
true |
|
The model ID. |
predictionExplanationsSample |
[PredictionExplanationsRow] |
true |
|
Each is a PredictionExplanationsRow. They represent a small sample of prediction explanations that could be generated for a particular dataset. They will have the same schema as the data array in the response from GET /api/v2/projects/{projectId}/predictionExplanations/{predictionExplanationsId}/. As of v2.21 only difference is that there is no forecastPoint in response for time series projects. |
projectId |
string |
true |
|
The project ID. |
{
"description": "Prediction explanation metadata.",
"properties": {
"shapRemainingTotal": {
"description": "Will be present only if `explanationAlgorithm` = 'shap' and `maxExplanations` is nonzero. The total of SHAP values for features beyond the `maxExplanations`. This can be identically 0 in all rows, if `maxExplanations` is greater than the number of features and thus all features are returned.",
"type": "integer"
}
},
"type": "object"
}
Prediction explanation metadata.
Properties
Name |
Type |
Required |
Restrictions |
Description |
shapRemainingTotal |
integer |
false |
|
Will be present only if explanationAlgorithm = 'shap' and maxExplanations is nonzero. The total of SHAP values for features beyond the maxExplanations . This can be identically 0 in all rows, if maxExplanations is greater than the number of features and thus all features are returned. |
PredictionExplanationsObject
{
"description": "Prediction explanation result.",
"properties": {
"feature": {
"description": "The name of the feature contributing to the prediction.",
"type": "string"
},
"featureValue": {
"description": "The value the feature took on for this row. The type corresponds to the feature (bool, int, float, str, etc.).",
"oneOf": [
{
"type": "integer"
},
{
"type": "boolean"
},
{
"type": "string"
},
{
"type": "number"
}
]
},
"label": {
"description": "Describes what output was driven by this prediction explanation. For regression projects, it is the name of the target feature. For classification projects, it is the class whose probability increasing would correspond to a positive strength of this prediction explanation. For predictions made using anomaly detection models, it is the `Anomaly Score`.",
"oneOf": [
{
"type": "string"
},
{
"type": "number"
}
]
},
"strength": {
"description": "Algorithm-specific explanation value attributed to `feature` in this row. If `explanationAlgorithm` = `shap`, this is the SHAP value.",
"type": [
"number",
"null"
]
}
},
"required": [
"feature",
"featureValue",
"label"
],
"type": "object"
}
Prediction explanation result.
Properties
Name |
Type |
Required |
Restrictions |
Description |
feature |
string |
true |
|
The name of the feature contributing to the prediction. |
featureValue |
any |
true |
|
The value the feature took on for this row. The type corresponds to the feature (bool, int, float, str, etc.). |
oneOf
Name |
Type |
Required |
Restrictions |
Description |
» anonymous |
integer |
false |
|
none |
xor
Name |
Type |
Required |
Restrictions |
Description |
» anonymous |
boolean |
false |
|
none |
xor
Name |
Type |
Required |
Restrictions |
Description |
» anonymous |
string |
false |
|
none |
xor
Name |
Type |
Required |
Restrictions |
Description |
» anonymous |
number |
false |
|
none |
continued
Name |
Type |
Required |
Restrictions |
Description |
label |
any |
true |
|
Describes what output was driven by this prediction explanation. For regression projects, it is the name of the target feature. For classification projects, it is the class whose probability increasing would correspond to a positive strength of this prediction explanation. For predictions made using anomaly detection models, it is the Anomaly Score . |
oneOf
Name |
Type |
Required |
Restrictions |
Description |
» anonymous |
string |
false |
|
none |
xor
Name |
Type |
Required |
Restrictions |
Description |
» anonymous |
number |
false |
|
none |
continued
Name |
Type |
Required |
Restrictions |
Description |
strength |
number,null |
false |
|
Algorithm-specific explanation value attributed to feature in this row. If explanationAlgorithm = shap , this is the SHAP value. |
PredictionExplanationsPredictionValues
{
"properties": {
"label": {
"description": "Describes what this model output corresponds to. For regression projects, it is the name of the target feature. For classification projects, it is a level from the target feature. For Anomaly Detection models it is an Anomaly Score.",
"type": "string"
},
"value": {
"description": "The output of the prediction. For regression projects, it is the predicted value of the target. For classification projects, it is the predicted probability the row belongs to the class identified by the label.",
"type": "number"
}
},
"required": [
"label",
"value"
],
"type": "object"
}
Properties
Name |
Type |
Required |
Restrictions |
Description |
label |
string |
true |
|
Describes what this model output corresponds to. For regression projects, it is the name of the target feature. For classification projects, it is a level from the target feature. For Anomaly Detection models it is an Anomaly Score. |
value |
number |
true |
|
The output of the prediction. For regression projects, it is the predicted value of the target. For classification projects, it is the predicted probability the row belongs to the class identified by the label. |
PredictionExplanationsRecord
{
"properties": {
"datasetId": {
"description": "The dataset ID.",
"type": "string"
},
"finishTime": {
"description": "Timestamp referencing when computation for these prediction explanations finished.",
"type": "number"
},
"id": {
"description": "The PredictionExplanationsRecord ID.",
"type": "string"
},
"maxExplanations": {
"description": "The maximum number of codes generated per prediction.",
"type": "integer"
},
"modelId": {
"description": "The model ID.",
"type": "string"
},
"numColumns": {
"description": "The number of columns prediction explanations were computed for.",
"type": "integer"
},
"predictionExplanationsLocation": {
"description": "Where to retrieve the prediction explanations.",
"type": "string"
},
"predictionThreshold": {
"description": "The threshold value used for binary classification prediction.",
"type": [
"number",
"null"
]
},
"projectId": {
"description": "The project ID.",
"type": "string"
},
"thresholdHigh": {
"description": "The prediction explanation high threshold. Predictions must be above this value (or below the thresholdLow value) to have PredictionExplanations computed.",
"type": [
"number",
"null"
]
},
"thresholdLow": {
"description": "The prediction explanation low threshold. Predictions must be below this value (or above the thresholdHigh value) to have PredictionExplanations computed.",
"type": [
"number",
"null"
]
}
},
"required": [
"datasetId",
"finishTime",
"id",
"maxExplanations",
"modelId",
"numColumns",
"predictionExplanationsLocation",
"predictionThreshold",
"projectId",
"thresholdHigh",
"thresholdLow"
],
"type": "object"
}
Properties
Name |
Type |
Required |
Restrictions |
Description |
datasetId |
string |
true |
|
The dataset ID. |
finishTime |
number |
true |
|
Timestamp referencing when computation for these prediction explanations finished. |
id |
string |
true |
|
The PredictionExplanationsRecord ID. |
maxExplanations |
integer |
true |
|
The maximum number of codes generated per prediction. |
modelId |
string |
true |
|
The model ID. |
numColumns |
integer |
true |
|
The number of columns prediction explanations were computed for. |
predictionExplanationsLocation |
string |
true |
|
Where to retrieve the prediction explanations. |
predictionThreshold |
number,null |
true |
|
The threshold value used for binary classification prediction. |
projectId |
string |
true |
|
The project ID. |
thresholdHigh |
number,null |
true |
|
The prediction explanation high threshold. Predictions must be above this value (or below the thresholdLow value) to have PredictionExplanations computed. |
thresholdLow |
number,null |
true |
|
The prediction explanation low threshold. Predictions must be below this value (or above the thresholdHigh value) to have PredictionExplanations computed. |
PredictionExplanationsRecordList
{
"properties": {
"count": {
"description": "The number of items returned on this page.",
"minimum": 0,
"type": "integer"
},
"data": {
"description": "Each has the same schema as if retrieving the prediction explanations individually from [GET /api/v2/projects/{projectId}/predictionExplanationsRecords/{predictionExplanationsId}/][get-apiv2projectsprojectidpredictionexplanationsrecordspredictionexplanationsid].",
"items": {
"properties": {
"datasetId": {
"description": "The dataset ID.",
"type": "string"
},
"finishTime": {
"description": "Timestamp referencing when computation for these prediction explanations finished.",
"type": "number"
},
"id": {
"description": "The PredictionExplanationsRecord ID.",
"type": "string"
},
"maxExplanations": {
"description": "The maximum number of codes generated per prediction.",
"type": "integer"
},
"modelId": {
"description": "The model ID.",
"type": "string"
},
"numColumns": {
"description": "The number of columns prediction explanations were computed for.",
"type": "integer"
},
"predictionExplanationsLocation": {
"description": "Where to retrieve the prediction explanations.",
"type": "string"
},
"predictionThreshold": {
"description": "The threshold value used for binary classification prediction.",
"type": [
"number",
"null"
]
},
"projectId": {
"description": "The project ID.",
"type": "string"
},
"thresholdHigh": {
"description": "The prediction explanation high threshold. Predictions must be above this value (or below the thresholdLow value) to have PredictionExplanations computed.",
"type": [
"number",
"null"
]
},
"thresholdLow": {
"description": "The prediction explanation low threshold. Predictions must be below this value (or above the thresholdHigh value) to have PredictionExplanations computed.",
"type": [
"number",
"null"
]
}
},
"required": [
"datasetId",
"finishTime",
"id",
"maxExplanations",
"modelId",
"numColumns",
"predictionExplanationsLocation",
"predictionThreshold",
"projectId",
"thresholdHigh",
"thresholdLow"
],
"type": "object"
},
"type": "array"
},
"next": {
"description": "A URL pointing to the next page (if `null`, there is no next page).",
"type": [
"string",
"null"
]
},
"previous": {
"description": "A URL pointing to the previous page (if `null`, there is no previous page).",
"type": [
"string",
"null"
]
}
},
"required": [
"count",
"data",
"next",
"previous"
],
"type": "object"
}
Properties
PredictionExplanationsRetrieve
{
"properties": {
"adjustmentMethod": {
"description": "'exposureNormalized' (for regression projects with exposure) or 'N/A' (for classification projects) The value of 'exposureNormalized' indicates that prediction outputs are adjusted (or divided) by exposure. The value of 'N/A' indicates that no adjustments are applied to the adjusted predictions and they are identical to the unadjusted predictions.",
"type": "string",
"x-versionadded": "v2.8"
},
"count": {
"description": "How many rows of prediction explanations were returned.",
"type": "integer"
},
"data": {
"description": "Each is a PredictionExplanationsRow corresponding to one row of the prediction dataset.",
"items": {
"properties": {
"adjustedPrediction": {
"description": "The exposure-adjusted output of the model for this row.",
"type": "number",
"x-versionadded": "v2.8"
},
"adjustedPredictionValues": {
"description": "The exposure-adjusted output of the model for this row.",
"items": {
"properties": {
"label": {
"description": "Describes what this model output corresponds to. For regression projects, it is the name of the target feature. For classification projects, it is a level from the target feature. For Anomaly Detection models it is an Anomaly Score.",
"type": "string"
},
"value": {
"description": "The output of the prediction. For regression projects, it is the predicted value of the target. For classification projects, it is the predicted probability the row belongs to the class identified by the label.",
"type": "number"
}
},
"required": [
"label",
"value"
],
"type": "object"
},
"type": "array",
"x-versionadded": "v2.8"
},
"forecastDistance": {
"description": "Forecast distance for the row. For time series projects only.",
"type": "integer",
"x-versionadded": "v2.21"
},
"forecastPoint": {
"description": "Forecast point for the row. For time series projects only.",
"type": "string",
"x-versionadded": "v2.21"
},
"prediction": {
"description": "The output of the model for this row.",
"type": "number"
},
"predictionExplanations": {
"description": "A list of prediction explanations.",
"items": {
"properties": {
"feature": {
"description": "The name of the feature contributing to the prediction.",
"type": "string"
},
"featureValue": {
"description": "The value the feature took on for this row. For image features, this value is the URL of the input image (New in v2.21).",
"type": "string"
},
"imageExplanationUrl": {
"description": "For image features, the URL of the image containing the input image overlaid by the activation heatmap. For non-image features, this field is null.",
"type": [
"string",
"null"
],
"x-versionadded": "v2.21"
},
"label": {
"description": "Describes what this model output corresponds to. For regression projects, it is the name of the target feature. For classification projects, it is a level from the target feature. For Anomaly Detection models it is an Anomaly Score.",
"type": "string"
},
"perNgramTextExplanations": {
"description": "For text features, an array of JSON object containing the per ngram based text prediction explanations.",
"items": {
"properties": {
"isUnknown": {
"description": "Whether the ngram is identifiable by the blueprint or not.",
"type": "boolean",
"x-versionadded": "v2.28"
},
"ngrams": {
"description": "List of JSON objects with the ngram starting index, ngram ending index and unknown ngram information.",
"items": {
"properties": {
"label": {
"description": "Describes what this model output corresponds to. For regression projects, it is the name of the target feature. For classification projects, it is a level from the target feature. For Anomaly Detection models it is an Anomaly Score.",
"type": "string"
},
"value": {
"description": "The output of the prediction. For regression projects, it is the predicted value of the target. For classification projects, it is the predicted probability the row belongs to the class identified by the label.",
"type": "number"
}
},
"required": [
"label",
"value"
],
"type": "object"
},
"maxItems": 1000,
"type": "array",
"x-versionadded": "v2.28"
},
"qualitativateStrength": {
"description": "A human-readable description of how strongly these ngrams's affected the prediction(e.g. '+++', '--', '+', '<+', '<-').",
"type": "string",
"x-versionadded": "v2.28"
},
"strength": {
"description": "The amount these ngrams's affected the prediction.",
"type": "number",
"x-versionadded": "v2.28"
}
},
"required": [
"isUnknown",
"ngrams",
"qualitativateStrength",
"strength"
],
"type": "object"
},
"maxItems": 10000,
"type": "array",
"x-versionadded": "v2.28"
},
"qualitativateStrength": {
"description": "A human-readable description of how strongly the feature affected the prediction. A large positive effect is denoted '+++', medium '++', small '+', very small '<+'. A large negative effect is denoted '---', medium '--', small '-', very small '<-'.",
"type": "string"
},
"strength": {
"description": "The amount this feature's value affected the prediction.",
"type": "number"
}
},
"required": [
"feature",
"featureValue",
"imageExplanationUrl",
"label",
"qualitativateStrength",
"strength"
],
"type": "object"
},
"type": "array"
},
"predictionThreshold": {
"description": "The threshold value used for classification prediction.",
"type": [
"number",
"null"
]
},
"predictionValues": {
"description": "A list of prediction values.",
"items": {
"properties": {
"label": {
"description": "Describes what this model output corresponds to. For regression projects, it is the name of the target feature. For classification projects, it is a level from the target feature. For Anomaly Detection models it is an Anomaly Score.",
"type": "string"
},
"value": {
"description": "The output of the prediction. For regression projects, it is the predicted value of the target. For classification projects, it is the predicted probability the row belongs to the class identified by the label.",
"type": "number"
}
},
"required": [
"label",
"value"
],
"type": "object"
},
"type": "array"
},
"rowId": {
"description": "Which row this PredictionExplanationsRow describes.",
"type": "integer"
},
"seriesId": {
"description": "The ID of the series value for the row in a multiseries project. For a single series project this will be null. For time series projects only.",
"type": [
"string",
"null"
],
"x-versionadded": "v2.21"
},
"timestamp": {
"description": "Timestamp for the row. For time series projects only.",
"type": "string",
"x-versionadded": "v2.21"
}
},
"required": [
"adjustedPrediction",
"adjustedPredictionValues",
"forecastDistance",
"forecastPoint",
"prediction",
"predictionExplanations",
"predictionThreshold",
"predictionValues",
"rowId",
"seriesId",
"timestamp"
],
"type": "object"
},
"type": "array"
},
"id": {
"description": "The ID of this group of prediction explanations.",
"type": "string"
},
"next": {
"description": "URL pointing to the next page (if null, there is no next page).",
"format": "uri",
"type": [
"string",
"null"
]
},
"predictionExplanationsRecordLocation": {
"description": "The URL of the PredictionExplanationsRecord associated with these prediction explanations.",
"type": "string"
},
"previous": {
"description": "URL pointing to the previous page (if null, there is no previous page).",
"format": "uri",
"type": [
"string",
"null"
]
}
},
"required": [
"adjustmentMethod",
"count",
"data",
"id",
"next",
"predictionExplanationsRecordLocation",
"previous"
],
"type": "object"
}
Properties
Name |
Type |
Required |
Restrictions |
Description |
adjustmentMethod |
string |
true |
|
'exposureNormalized' (for regression projects with exposure) or 'N/A' (for classification projects) The value of 'exposureNormalized' indicates that prediction outputs are adjusted (or divided) by exposure. The value of 'N/A' indicates that no adjustments are applied to the adjusted predictions and they are identical to the unadjusted predictions. |
count |
integer |
true |
|
How many rows of prediction explanations were returned. |
data |
[PredictionExplanationsRow] |
true |
|
Each is a PredictionExplanationsRow corresponding to one row of the prediction dataset. |
id |
string |
true |
|
The ID of this group of prediction explanations. |
next |
string,null(uri) |
true |
|
URL pointing to the next page (if null, there is no next page). |
predictionExplanationsRecordLocation |
string |
true |
|
The URL of the PredictionExplanationsRecord associated with these prediction explanations. |
previous |
string,null(uri) |
true |
|
URL pointing to the previous page (if null, there is no previous page). |
PredictionExplanationsRow
{
"properties": {
"adjustedPrediction": {
"description": "The exposure-adjusted output of the model for this row.",
"type": "number",
"x-versionadded": "v2.8"
},
"adjustedPredictionValues": {
"description": "The exposure-adjusted output of the model for this row.",
"items": {
"properties": {
"label": {
"description": "Describes what this model output corresponds to. For regression projects, it is the name of the target feature. For classification projects, it is a level from the target feature. For Anomaly Detection models it is an Anomaly Score.",
"type": "string"
},
"value": {
"description": "The output of the prediction. For regression projects, it is the predicted value of the target. For classification projects, it is the predicted probability the row belongs to the class identified by the label.",
"type": "number"
}
},
"required": [
"label",
"value"
],
"type": "object"
},
"type": "array",
"x-versionadded": "v2.8"
},
"forecastDistance": {
"description": "Forecast distance for the row. For time series projects only.",
"type": "integer",
"x-versionadded": "v2.21"
},
"forecastPoint": {
"description": "Forecast point for the row. For time series projects only.",
"type": "string",
"x-versionadded": "v2.21"
},
"prediction": {
"description": "The output of the model for this row.",
"type": "number"
},
"predictionExplanations": {
"description": "A list of prediction explanations.",
"items": {
"properties": {
"feature": {
"description": "The name of the feature contributing to the prediction.",
"type": "string"
},
"featureValue": {
"description": "The value the feature took on for this row. For image features, this value is the URL of the input image (New in v2.21).",
"type": "string"
},
"imageExplanationUrl": {
"description": "For image features, the URL of the image containing the input image overlaid by the activation heatmap. For non-image features, this field is null.",
"type": [
"string",
"null"
],
"x-versionadded": "v2.21"
},
"label": {
"description": "Describes what this model output corresponds to. For regression projects, it is the name of the target feature. For classification projects, it is a level from the target feature. For Anomaly Detection models it is an Anomaly Score.",
"type": "string"
},
"perNgramTextExplanations": {
"description": "For text features, an array of JSON object containing the per ngram based text prediction explanations.",
"items": {
"properties": {
"isUnknown": {
"description": "Whether the ngram is identifiable by the blueprint or not.",
"type": "boolean",
"x-versionadded": "v2.28"
},
"ngrams": {
"description": "List of JSON objects with the ngram starting index, ngram ending index and unknown ngram information.",
"items": {
"properties": {
"label": {
"description": "Describes what this model output corresponds to. For regression projects, it is the name of the target feature. For classification projects, it is a level from the target feature. For Anomaly Detection models it is an Anomaly Score.",
"type": "string"
},
"value": {
"description": "The output of the prediction. For regression projects, it is the predicted value of the target. For classification projects, it is the predicted probability the row belongs to the class identified by the label.",
"type": "number"
}
},
"required": [
"label",
"value"
],
"type": "object"
},
"maxItems": 1000,
"type": "array",
"x-versionadded": "v2.28"
},
"qualitativateStrength": {
"description": "A human-readable description of how strongly these ngrams's affected the prediction(e.g. '+++', '--', '+', '<+', '<-').",
"type": "string",
"x-versionadded": "v2.28"
},
"strength": {
"description": "The amount these ngrams's affected the prediction.",
"type": "number",
"x-versionadded": "v2.28"
}
},
"required": [
"isUnknown",
"ngrams",
"qualitativateStrength",
"strength"
],
"type": "object"
},
"maxItems": 10000,
"type": "array",
"x-versionadded": "v2.28"
},
"qualitativateStrength": {
"description": "A human-readable description of how strongly the feature affected the prediction. A large positive effect is denoted '+++', medium '++', small '+', very small '<+'. A large negative effect is denoted '---', medium '--', small '-', very small '<-'.",
"type": "string"
},
"strength": {
"description": "The amount this feature's value affected the prediction.",
"type": "number"
}
},
"required": [
"feature",
"featureValue",
"imageExplanationUrl",
"label",
"qualitativateStrength",
"strength"
],
"type": "object"
},
"type": "array"
},
"predictionThreshold": {
"description": "The threshold value used for classification prediction.",
"type": [
"number",
"null"
]
},
"predictionValues": {
"description": "A list of prediction values.",
"items": {
"properties": {
"label": {
"description": "Describes what this model output corresponds to. For regression projects, it is the name of the target feature. For classification projects, it is a level from the target feature. For Anomaly Detection models it is an Anomaly Score.",
"type": "string"
},
"value": {
"description": "The output of the prediction. For regression projects, it is the predicted value of the target. For classification projects, it is the predicted probability the row belongs to the class identified by the label.",
"type": "number"
}
},
"required": [
"label",
"value"
],
"type": "object"
},
"type": "array"
},
"rowId": {
"description": "Which row this PredictionExplanationsRow describes.",
"type": "integer"
},
"seriesId": {
"description": "The ID of the series value for the row in a multiseries project. For a single series project this will be null. For time series projects only.",
"type": [
"string",
"null"
],
"x-versionadded": "v2.21"
},
"timestamp": {
"description": "Timestamp for the row. For time series projects only.",
"type": "string",
"x-versionadded": "v2.21"
}
},
"required": [
"adjustedPrediction",
"adjustedPredictionValues",
"forecastDistance",
"forecastPoint",
"prediction",
"predictionExplanations",
"predictionThreshold",
"predictionValues",
"rowId",
"seriesId",
"timestamp"
],
"type": "object"
}
Properties
Name |
Type |
Required |
Restrictions |
Description |
adjustedPrediction |
number |
true |
|
The exposure-adjusted output of the model for this row. |
adjustedPredictionValues |
[PredictionExplanationsPredictionValues] |
true |
|
The exposure-adjusted output of the model for this row. |
forecastDistance |
integer |
true |
|
Forecast distance for the row. For time series projects only. |
forecastPoint |
string |
true |
|
Forecast point for the row. For time series projects only. |
prediction |
number |
true |
|
The output of the model for this row. |
predictionExplanations |
[PredictionExplanation] |
true |
|
A list of prediction explanations. |
predictionThreshold |
number,null |
true |
|
The threshold value used for classification prediction. |
predictionValues |
[PredictionExplanationsPredictionValues] |
true |
|
A list of prediction values. |
rowId |
integer |
true |
|
Which row this PredictionExplanationsRow describes. |
seriesId |
string,null |
true |
|
The ID of the series value for the row in a multiseries project. For a single series project this will be null. For time series projects only. |
timestamp |
string |
true |
|
Timestamp for the row. For time series projects only. |
PredictionFileUpload
{
"properties": {
"actualValueColumn": {
"description": "Actual value column name, valid for the prediction files if the project is unsupervised and the dataset is considered as bulk predictions dataset. ",
"type": "string",
"x-versionadded": "v2.21"
},
"credentials": {
"description": "A list of credentials for the secondary datasets used in feature discovery project",
"type": "string",
"x-versionadded": "v2.19"
},
"file": {
"description": "The dataset file to upload for prediction.",
"format": "binary",
"type": "string"
},
"forecastPoint": {
"description": "For time series projects only. The time in the dataset relative to which predictions are generated. If not specified the default value is the value in the row with the latest specified timestamp. Specifying this value for a project that is not a time series project will result in an error.",
"format": "date-time",
"type": "string",
"x-versionadded": "v2.8"
},
"predictionsEndDate": {
"description": "Used for time series projects only. The end date for bulk predictions. Note that this parameter is used for generating historical predictions using the training data, not for future predictions. If not specified, the dataset is not considered as a bulk predictions dataset. This parameter should be provided in conjunction with a ``predictionsStartDate``, and cannot be provided with the ``forecastPoint`` parameter.",
"format": "date-time",
"type": "string"
},
"predictionsStartDate": {
"description": "Used for time series projects only. The start date for bulk predictions. Note that this parameter is used for generating historical predictions using the training data, not for future predictions. If not specified, the dataset is not considered as a bulk predictions dataset. This parameter should be provided in conjunction with a ``predictionsEndDate``, and cannot be provided with the ``forecastPoint`` parameter.",
"format": "date-time",
"type": "string"
},
"relaxKnownInAdvanceFeaturesCheck": {
"description": "A boolean flag. If true, missing values in the known in advance features are allowed in the forecast window at the prediction time. If omitted or false, missing values are not allowed. For time series projects only.",
"enum": [
"false",
"False",
"true",
"True"
],
"type": "string",
"x-versionadded": "v2.15"
},
"secondaryDatasetsConfigId": {
"description": "Optional, for feature discovery projects only. The Id of the alternative secondary dataset config to use during prediction.",
"type": "string",
"x-versionadded": "v2.19"
}
},
"required": [
"file"
],
"type": "object"
}
Properties
Name |
Type |
Required |
Restrictions |
Description |
actualValueColumn |
string |
false |
|
Actual value column name, valid for the prediction files if the project is unsupervised and the dataset is considered as bulk predictions dataset. |
credentials |
string |
false |
|
A list of credentials for the secondary datasets used in feature discovery project |
file |
string(binary) |
true |
|
The dataset file to upload for prediction. |
forecastPoint |
string(date-time) |
false |
|
For time series projects only. The time in the dataset relative to which predictions are generated. If not specified the default value is the value in the row with the latest specified timestamp. Specifying this value for a project that is not a time series project will result in an error. |
predictionsEndDate |
string(date-time) |
false |
|
Used for time series projects only. The end date for bulk predictions. Note that this parameter is used for generating historical predictions using the training data, not for future predictions. If not specified, the dataset is not considered as a bulk predictions dataset. This parameter should be provided in conjunction with a predictionsStartDate , and cannot be provided with the forecastPoint parameter. |
predictionsStartDate |
string(date-time) |
false |
|
Used for time series projects only. The start date for bulk predictions. Note that this parameter is used for generating historical predictions using the training data, not for future predictions. If not specified, the dataset is not considered as a bulk predictions dataset. This parameter should be provided in conjunction with a predictionsEndDate , and cannot be provided with the forecastPoint parameter. |
relaxKnownInAdvanceFeaturesCheck |
string |
false |
|
A boolean flag. If true, missing values in the known in advance features are allowed in the forecast window at the prediction time. If omitted or false, missing values are not allowed. For time series projects only. |
secondaryDatasetsConfigId |
string |
false |
|
Optional, for feature discovery projects only. The Id of the alternative secondary dataset config to use during prediction. |
Enumerated Values
Property |
Value |
relaxKnownInAdvanceFeaturesCheck |
[false , False , true , True ] |
PredictionFromCatalogDataset
{
"properties": {
"actualValueColumn": {
"description": "Actual value column name, valid for the prediction files if the project is unsupervised and the dataset is considered as bulk predictions dataset.",
"type": "string",
"x-versionadded": "v2.21"
},
"credentialData": {
"description": "The credentials to authenticate with the database, to be used instead of credential ID.",
"oneOf": [
{
"properties": {
"credentialType": {
"description": "The type of these credentials, 'basic' here.",
"enum": [
"basic"
],
"type": "string"
},
"password": {
"description": "The password for database authentication. The password is encrypted at rest and never saved / stored.",
"type": "string"
},
"user": {
"description": "The username for database authentication.",
"type": "string"
}
},
"required": [
"credentialType",
"password",
"user"
],
"type": "object"
},
{
"properties": {
"awsAccessKeyId": {
"description": "The S3 AWS access key ID. Required if configId is not specified.Cannot include this parameter if configId is specified.",
"type": "string"
},
"awsSecretAccessKey": {
"description": "The S3 AWS secret access key. Required if configId is not specified.Cannot include this parameter if configId is specified.",
"type": "string"
},
"awsSessionToken": {
"default": null,
"description": "The S3 AWS session token for AWS temporary credentials.Cannot include this parameter if configId is specified.",
"type": [
"string",
"null"
]
},
"configId": {
"description": "ID of Secure configurations of credentials shared by admin.If specified, cannot include awsAccessKeyId, awsSecretAccessKey or awsSessionToken",
"type": "string"
},
"credentialType": {
"description": "The type of these credentials, 's3' here.",
"enum": [
"s3"
],
"type": "string"
}
},
"required": [
"credentialType"
],
"type": "object"
},
{
"properties": {
"credentialType": {
"description": "The type of these credentials, 'oauth' here.",
"enum": [
"oauth"
],
"type": "string"
},
"oauthAccessToken": {
"default": null,
"description": "The oauth access token.",
"type": [
"string",
"null"
]
},
"oauthClientId": {
"default": null,
"description": "The oauth client ID.",
"type": [
"string",
"null"
]
},
"oauthClientSecret": {
"default": null,
"description": "The oauth client secret.",
"type": [
"string",
"null"
]
},
"oauthRefreshToken": {
"description": "The oauth refresh token.",
"type": "string"
}
},
"required": [
"credentialType",
"oauthRefreshToken"
],
"type": "object"
},
{
"properties": {
"configId": {
"description": "The ID of the saved shared credentials. If specified, cannot include user, privateKeyStr or passphrase.",
"type": "string"
},
"credentialType": {
"description": "The type of these credentials, 'snowflake_key_pair_user_account' here.",
"enum": [
"snowflake_key_pair_user_account"
],
"type": "string"
},
"passphrase": {
"description": "Optional passphrase to decrypt private key. Cannot include this parameter if configId is specified.",
"type": "string"
},
"privateKeyStr": {
"description": "Private key for key pair authentication. Required if configId is not specified. Cannot include this parameter if configId is specified.",
"type": "string"
},
"user": {
"description": "Username for this credential. Required if configId is not specified. Cannot include this parameter if configId is specified.",
"type": "string"
}
},
"required": [
"credentialType"
],
"type": "object"
},
{
"properties": {
"configId": {
"description": "ID of Secure configurations shared by admin.Alternative to googleConfigId (deprecated). If specified, cannot include gcpKey.",
"type": "string"
},
"credentialType": {
"description": "The type of these credentials, 'gcp' here.",
"enum": [
"gcp"
],
"type": "string"
},
"gcpKey": {
"description": "The Google Cloud Platform (GCP) key. Output is the downloaded JSON resulting from creating a service account *User Managed Key* (in the *IAM & admin > Service accounts section* of GCP).Required if googleConfigId/configId is not specified.Cannot include this parameter if googleConfigId/configId is specified.",
"properties": {
"authProviderX509CertUrl": {
"description": "Auth provider X509 certificate URL.",
"format": "uri",
"type": "string"
},
"authUri": {
"description": "Auth URI.",
"format": "uri",
"type": "string"
},
"clientEmail": {
"description": "Client email address.",
"type": "string"
},
"clientId": {
"description": "Client ID.",
"type": "string"
},
"clientX509CertUrl": {
"description": "Client X509 certificate URL.",
"format": "uri",
"type": "string"
},
"privateKey": {
"description": "Private key.",
"type": "string"
},
"privateKeyId": {
"description": "Private key ID",
"type": "string"
},
"projectId": {
"description": "Project ID.",
"type": "string"
},
"tokenUri": {
"description": "Token URI.",
"format": "uri",
"type": "string"
},
"type": {
"description": "GCP account type.",
"enum": [
"service_account"
],
"type": "string"
}
},
"required": [
"type"
],
"type": "object"
},
"googleConfigId": {
"description": "ID of Secure configurations shared by admin. This is deprecated.Please use configId instead. If specified, cannot include gcpKey.",
"type": "string"
}
},
"required": [
"credentialType"
],
"type": "object"
},
{
"properties": {
"credentialType": {
"description": "The type of these credentials, 'databricks_access_token_account' here.",
"enum": [
"databricks_access_token_account"
],
"type": "string"
},
"databricksAccessToken": {
"description": "Databricks personal access token.",
"minLength": 1,
"type": "string"
}
},
"required": [
"credentialType",
"databricksAccessToken"
],
"type": "object"
},
{
"properties": {
"azureTenantId": {
"description": "Tenant ID of the Azure AD service principal.",
"type": "string"
},
"clientId": {
"description": "Client ID of the Azure AD service principal.",
"type": "string"
},
"clientSecret": {
"description": "Client Secret of the Azure AD service principal.",
"type": "string"
},
"configId": {
"description": "ID of secure configurations of credentials shared by admin.",
"type": "string",
"x-versionadded": "v2.35"
},
"credentialType": {
"description": "The type of these credentials, 'azure_service_principal' here.",
"enum": [
"azure_service_principal"
],
"type": "string"
}
},
"required": [
"credentialType"
],
"type": "object"
}
],
"x-versionadded": "v2.23"
},
"credentialId": {
"description": "The ID of the set of credentials to authenticate with the database.",
"type": "string",
"x-versionadded": "v2.19"
},
"credentials": {
"description": "List of credentials for the secondary datasets used in feature discovery project.",
"items": {
"oneOf": [
{
"properties": {
"catalogVersionId": {
"description": "The ID of the latest version of the catalog entry.",
"type": "string"
},
"password": {
"description": "The password (in cleartext) for database authentication. The password will be encrypted on the server side in scope of HTTP request and never saved or stored.",
"type": "string"
},
"url": {
"description": "The link to retrieve more detailed information about the entity that uses this catalog dataset.",
"type": "string"
},
"user": {
"description": "The username for database authentication.",
"type": "string"
}
},
"required": [
"password",
"user"
],
"type": "object"
},
{
"properties": {
"catalogVersionId": {
"description": "The ID of the latest version of the catalog entry.",
"type": "string"
},
"credentialId": {
"description": "The ID of the set of credentials to use instead of user and password. Note that with this change, username and password will become optional.",
"type": "string"
},
"url": {
"description": "The link to retrieve more detailed information about the entity that uses this catalog dataset.",
"type": "string"
}
},
"required": [
"credentialId"
],
"type": "object"
}
]
},
"maxItems": 30,
"type": "array",
"x-versionadded": "v2.19"
},
"datasetId": {
"description": "The ID of the dataset entry to use for prediction dataset.",
"type": "string"
},
"datasetVersionId": {
"description": "The ID of the dataset version to use for the prediction dataset. If not specified - uses latest version associated with datasetId.",
"type": "string"
},
"forecastPoint": {
"description": "For time series projects only. The time in the dataset relative to which predictions are generated. This value is optional. If not specified the default value is the value in the row with the latest specified timestamp. Specifying this value for a project that is not a time series project will result in an error.",
"format": "date-time",
"type": "string",
"x-versionadded": "v2.8"
},
"password": {
"description": "The password (in cleartext) for database authentication. The password will be encrypted on the server side in scope of HTTP request and never saved or stored.DEPRECATED: please use credentialId or credentialData instead.",
"type": "string",
"x-versiondeprecated": "v2.23"
},
"predictionsEndDate": {
"description": "The end date for bulk predictions, exclusive. Used for time series projects only. Note that this parameter is used for generating historical predictions using the training data, not for future predictions. If not specified, the dataset is not considered as a bulk predictions dataset. This parameter should be provided in conjunction with a ``predictionsStartDate``, and cannot be provided with the ``forecastPoint`` parameter.",
"format": "date-time",
"type": "string"
},
"predictionsStartDate": {
"description": "The start date for bulk predictions. Used for time series projects only. Note that this parameter is used for generating historical predictions using the training data, not for future predictions. If not specified, the dataset is not considered as a bulk predictions dataset. This parameter should be provided in conjunction with a ``predictionsEndDate``, and cannot be provided with the ``forecastPoint`` parameter.",
"format": "date-time",
"type": "string"
},
"relaxKnownInAdvanceFeaturesCheck": {
"description": "For time series projects only. If True, missing values in the known in advance features are allowed in the forecast window at the prediction time. If omitted or False, missing values are not allowed.",
"type": "boolean"
},
"secondaryDatasetsConfigId": {
"description": "For feature discovery projects only. The Id of the alternative secondary dataset config to use during prediction.",
"type": "string",
"x-versionadded": "v2.19"
},
"useKerberos": {
"default": false,
"description": "If true, use kerberos authentication for database authentication. Default is false.",
"type": "boolean",
"x-versionadded": "v2.19"
},
"user": {
"description": "The username for database authentication. DEPRECATED: please use credentialId or credentialData instead.",
"type": "string",
"x-versiondeprecated": "v2.23"
}
},
"required": [
"datasetId"
],
"type": "object"
}
Properties
Name |
Type |
Required |
Restrictions |
Description |
actualValueColumn |
string |
false |
|
Actual value column name, valid for the prediction files if the project is unsupervised and the dataset is considered as bulk predictions dataset. |
credentialData |
any |
false |
|
The credentials to authenticate with the database, to be used instead of credential ID. |
oneOf
Name |
Type |
Required |
Restrictions |
Description |
» anonymous |
BasicCredentials |
false |
|
none |
xor
Name |
Type |
Required |
Restrictions |
Description |
» anonymous |
S3Credentials |
false |
|
none |
xor
Name |
Type |
Required |
Restrictions |
Description |
» anonymous |
OAuthCredentials |
false |
|
none |
xor
xor
xor
xor
continued
Name |
Type |
Required |
Restrictions |
Description |
credentialId |
string |
false |
|
The ID of the set of credentials to authenticate with the database. |
credentials |
[oneOf] |
false |
maxItems: 30
|
List of credentials for the secondary datasets used in feature discovery project. |
oneOf
xor
Name |
Type |
Required |
Restrictions |
Description |
» anonymous |
CredentialId |
false |
|
none |
continued
Name |
Type |
Required |
Restrictions |
Description |
datasetId |
string |
true |
|
The ID of the dataset entry to use for prediction dataset. |
datasetVersionId |
string |
false |
|
The ID of the dataset version to use for the prediction dataset. If not specified - uses latest version associated with datasetId. |
forecastPoint |
string(date-time) |
false |
|
For time series projects only. The time in the dataset relative to which predictions are generated. This value is optional. If not specified the default value is the value in the row with the latest specified timestamp. Specifying this value for a project that is not a time series project will result in an error. |
password |
string |
false |
|
The password (in cleartext) for database authentication. The password will be encrypted on the server side in scope of HTTP request and never saved or stored.DEPRECATED: please use credentialId or credentialData instead. |
predictionsEndDate |
string(date-time) |
false |
|
The end date for bulk predictions, exclusive. Used for time series projects only. Note that this parameter is used for generating historical predictions using the training data, not for future predictions. If not specified, the dataset is not considered as a bulk predictions dataset. This parameter should be provided in conjunction with a predictionsStartDate , and cannot be provided with the forecastPoint parameter. |
predictionsStartDate |
string(date-time) |
false |
|
The start date for bulk predictions. Used for time series projects only. Note that this parameter is used for generating historical predictions using the training data, not for future predictions. If not specified, the dataset is not considered as a bulk predictions dataset. This parameter should be provided in conjunction with a predictionsEndDate , and cannot be provided with the forecastPoint parameter. |
relaxKnownInAdvanceFeaturesCheck |
boolean |
false |
|
For time series projects only. If True, missing values in the known in advance features are allowed in the forecast window at the prediction time. If omitted or False, missing values are not allowed. |
secondaryDatasetsConfigId |
string |
false |
|
For feature discovery projects only. The Id of the alternative secondary dataset config to use during prediction. |
useKerberos |
boolean |
false |
|
If true, use kerberos authentication for database authentication. Default is false. |
user |
string |
false |
|
The username for database authentication. DEPRECATED: please use credentialId or credentialData instead. |
PredictionObject
{
"properties": {
"actualValue": {
"description": "In the case of an unsupervised time series project with a dataset using ``predictionsStartDate`` and ``predictionsEndDate`` for bulk predictions and a specified actual value column, the predictions will be a json array in the same format as with a forecast point with one additional element - `actualValues`. It is the actual value in the row.",
"type": [
"string",
"null"
],
"x-versionadded": "v2.21"
},
"forecastDistance": {
"description": "(if time series project) The number of time units this prediction is away from the forecastPoint. The unit of time is determined by the timeUnit of the datetime partition column.",
"type": [
"integer",
"null"
]
},
"forecastPoint": {
"description": "(if time series project) The forecastPoint of the predictions. Either provided or inferred.",
"format": "date-time",
"type": [
"string",
"null"
]
},
"originalFormatTimestamp": {
"description": "The timestamp of this row in the prediction dataset. Unlike the ``timestamp`` field, this field will keep the same DateTime formatting as the uploaded prediction dataset. (This column is shown if enabled by your administrator.)",
"type": "string",
"x-versionadded": "v2.17"
},
"positiveProbability": {
"description": "For binary classification, the probability the row belongs to the positive class.",
"minimum": 0,
"type": [
"number",
"null"
]
},
"prediction": {
"description": "The prediction of the model.",
"oneOf": [
{
"description": "If using a regressor model, will be the numeric value of the target.",
"type": "number"
},
{
"description": "If using a binary or muliclass classifier model, will be the predicted class.",
"type": "string"
},
{
"description": "If using a multilabel classifier model, will be a list of predicted classes.",
"items": {
"type": "string"
},
"type": "array"
}
]
},
"predictionExplanationMetadata": {
"description": "Array containing algorithm-specific values. Varies depending on the value of `explanationAlgorithm`.",
"items": {
"description": "Prediction explanation metadata.",
"properties": {
"shapRemainingTotal": {
"description": "Will be present only if `explanationAlgorithm` = 'shap' and `maxExplanations` is nonzero. The total of SHAP values for features beyond the `maxExplanations`. This can be identically 0 in all rows, if `maxExplanations` is greater than the number of features and thus all features are returned.",
"type": "integer"
}
},
"type": "object"
},
"type": "array",
"x-versionadded": "v2.21"
},
"predictionExplanations": {
"description": "Array contains `predictionExplanation` objects. The total elements in the array are bounded by maxExplanations and feature count. It will be present only if `explanationAlgorithm` is not null (prediction explanations were requested).",
"items": {
"description": "Prediction explanation result.",
"properties": {
"feature": {
"description": "The name of the feature contributing to the prediction.",
"type": "string"
},
"featureValue": {
"description": "The value the feature took on for this row. The type corresponds to the feature (bool, int, float, str, etc.).",
"oneOf": [
{
"type": "integer"
},
{
"type": "boolean"
},
{
"type": "string"
},
{
"type": "number"
}
]
},
"label": {
"description": "Describes what output was driven by this prediction explanation. For regression projects, it is the name of the target feature. For classification projects, it is the class whose probability increasing would correspond to a positive strength of this prediction explanation. For predictions made using anomaly detection models, it is the `Anomaly Score`.",
"oneOf": [
{
"type": "string"
},
{
"type": "number"
}
]
},
"strength": {
"description": "Algorithm-specific explanation value attributed to `feature` in this row. If `explanationAlgorithm` = `shap`, this is the SHAP value.",
"type": [
"number",
"null"
]
}
},
"required": [
"feature",
"featureValue",
"label"
],
"type": "object"
},
"type": "array",
"x-versionadded": "v2.21"
},
"predictionIntervalLowerBound": {
"description": "Present if ``includePredictionIntervals`` is True. Indicates a lower bound of the estimate of error based on test data.",
"type": "number",
"x-versionadded": "v2.16"
},
"predictionIntervalUpperBound": {
"description": "Present if ``includePredictionIntervals`` is True. Indicates an upper bound of the estimate of error based on test data.",
"type": "number",
"x-versionadded": "v2.16"
},
"predictionThreshold": {
"description": "Threshold used for binary classification in predictions.",
"maximum": 1,
"minimum": 0,
"type": "number"
},
"predictionValues": {
"description": "A list of predicted values for this row.",
"items": {
"description": "Predicted values",
"properties": {
"label": {
"description": "For regression problems this will be the name of the target column, 'Anomaly score' or ignored field. For classification projects this will be the name of the class.",
"oneOf": [
{
"type": "string"
},
{
"type": "number"
}
]
},
"threshold": {
"description": "Threshold used in multilabel classification for this class.",
"maximum": 1,
"minimum": 0,
"type": "number"
},
"value": {
"description": "The predicted probability of the class identified by the label.",
"type": "number"
}
},
"required": [
"label",
"value"
],
"type": "object"
},
"type": "array"
},
"rowId": {
"description": "The row in the prediction dataset this prediction corresponds to.",
"minimum": 0,
"type": "integer"
},
"segmentId": {
"description": "The ID of the segment value for a segmented project.",
"type": "string",
"x-versionadded": "v2.27"
},
"seriesId": {
"description": "The ID of the series value for a multiseries project. For time series projects that are not a multiseries this will be a NaN.",
"type": [
"string",
"null"
]
},
"target": {
"description": "In the case of a time series project with a dataset using predictionsStartDate and predictionsEndDate for bulk predictions, the predictions will be a json array in the same format as with a forecast point with one additional element - `target`. It is the target value in the row.",
"type": [
"string",
"null"
]
},
"timestamp": {
"description": "(if time series project) The timestamp of this row in the prediction dataset.",
"format": "date-time",
"type": [
"string",
"null"
]
}
},
"required": [
"prediction",
"rowId"
],
"type": "object"
}
Properties
Name |
Type |
Required |
Restrictions |
Description |
actualValue |
string,null |
false |
|
In the case of an unsupervised time series project with a dataset using predictionsStartDate and predictionsEndDate for bulk predictions and a specified actual value column, the predictions will be a json array in the same format as with a forecast point with one additional element - actualValues . It is the actual value in the row. |
forecastDistance |
integer,null |
false |
|
(if time series project) The number of time units this prediction is away from the forecastPoint. The unit of time is determined by the timeUnit of the datetime partition column. |
forecastPoint |
string,null(date-time) |
false |
|
(if time series project) The forecastPoint of the predictions. Either provided or inferred. |
originalFormatTimestamp |
string |
false |
|
The timestamp of this row in the prediction dataset. Unlike the timestamp field, this field will keep the same DateTime formatting as the uploaded prediction dataset. (This column is shown if enabled by your administrator.) |
positiveProbability |
number,null |
false |
minimum: 0
|
For binary classification, the probability the row belongs to the positive class. |
prediction |
any |
true |
|
The prediction of the model. |
oneOf
Name |
Type |
Required |
Restrictions |
Description |
» anonymous |
number |
false |
|
If using a regressor model, will be the numeric value of the target. |
xor
Name |
Type |
Required |
Restrictions |
Description |
» anonymous |
string |
false |
|
If using a binary or muliclass classifier model, will be the predicted class. |
xor
Name |
Type |
Required |
Restrictions |
Description |
» anonymous |
[string] |
false |
|
If using a multilabel classifier model, will be a list of predicted classes. |
continued
Name |
Type |
Required |
Restrictions |
Description |
predictionExplanationMetadata |
[PredictionExplanationsMetadataValues] |
false |
|
Array containing algorithm-specific values. Varies depending on the value of explanationAlgorithm . |
predictionExplanations |
[PredictionExplanationsObject] |
false |
|
Array contains predictionExplanation objects. The total elements in the array are bounded by maxExplanations and feature count. It will be present only if explanationAlgorithm is not null (prediction explanations were requested). |
predictionIntervalLowerBound |
number |
false |
|
Present if includePredictionIntervals is True. Indicates a lower bound of the estimate of error based on test data. |
predictionIntervalUpperBound |
number |
false |
|
Present if includePredictionIntervals is True. Indicates an upper bound of the estimate of error based on test data. |
predictionThreshold |
number |
false |
maximum: 1 minimum: 0
|
Threshold used for binary classification in predictions. |
predictionValues |
[PredictionArrayObjectValues] |
false |
|
A list of predicted values for this row. |
rowId |
integer |
true |
minimum: 0
|
The row in the prediction dataset this prediction corresponds to. |
segmentId |
string |
false |
|
The ID of the segment value for a segmented project. |
seriesId |
string,null |
false |
|
The ID of the series value for a multiseries project. For time series projects that are not a multiseries this will be a NaN. |
target |
string,null |
false |
|
In the case of a time series project with a dataset using predictionsStartDate and predictionsEndDate for bulk predictions, the predictions will be a json array in the same format as with a forecast point with one additional element - target . It is the target value in the row. |
timestamp |
string,null(date-time) |
false |
|
(if time series project) The timestamp of this row in the prediction dataset. |
PredictionRetrieveResponse
{
"properties": {
"actualValueColumn": {
"description": "For time series unsupervised projects only. Will be present only if the prediction dataset has an actual value column. The name of the column with actuals that was used to calculate the scores and insights.",
"type": [
"string",
"null"
],
"x-versionadded": "v2.21"
},
"explanationAlgorithm": {
"description": "The selected algorithm to use for prediction explanations. At present, the only acceptable value is 'shap', which selects the SHapley Additive exPlanations (SHAP) explainer. Defaults to null (no prediction explanations).",
"type": [
"string",
"null"
],
"x-versionadded": "v2.21"
},
"featureDerivationWindowCounts": {
"description": "For time series projects with partial history only. Indicates how many points were used during feature derivation in feature derivation window.",
"type": [
"integer",
"null"
],
"x-versionadded": "v2.24"
},
"includesPredictionIntervals": {
"description": "For time series projects only. Indicates if prediction intervals will be part of the response. Defaults to False.",
"type": "boolean",
"x-versionadded": "v2.16"
},
"maxExplanations": {
"description": "The maximum number of prediction explanations values to be returned with each row in the `predictions` json array. Null indicates 'no limit'. Will be present only if `explanationAlgorithm` was set.",
"type": [
"integer",
"null"
],
"x-versionadded": "v2.21"
},
"positiveClass": {
"description": "For binary classification, the class of the target deemed the positive class. For all other project types this field will be null.",
"oneOf": [
{
"type": "string"
},
{
"type": "integer"
},
{
"type": "number"
}
]
},
"predictionIntervalsSize": {
"description": "For time series projects only. Will be present only if `includePredictionIntervals` is True. Indicates the percentile used for prediction intervals calculation. Defaults to 80.",
"type": [
"integer",
"null"
],
"x-versionadded": "v2.16"
},
"predictions": {
"description": "The json array of predictions. The predictions in the response will have slightly different formats, depending on the project type.",
"items": {
"properties": {
"actualValue": {
"description": "In the case of an unsupervised time series project with a dataset using ``predictionsStartDate`` and ``predictionsEndDate`` for bulk predictions and a specified actual value column, the predictions will be a json array in the same format as with a forecast point with one additional element - `actualValues`. It is the actual value in the row.",
"type": [
"string",
"null"
],
"x-versionadded": "v2.21"
},
"forecastDistance": {
"description": "(if time series project) The number of time units this prediction is away from the forecastPoint. The unit of time is determined by the timeUnit of the datetime partition column.",
"type": [
"integer",
"null"
]
},
"forecastPoint": {
"description": "(if time series project) The forecastPoint of the predictions. Either provided or inferred.",
"format": "date-time",
"type": [
"string",
"null"
]
},
"originalFormatTimestamp": {
"description": "The timestamp of this row in the prediction dataset. Unlike the ``timestamp`` field, this field will keep the same DateTime formatting as the uploaded prediction dataset. (This column is shown if enabled by your administrator.)",
"type": "string",
"x-versionadded": "v2.17"
},
"positiveProbability": {
"description": "For binary classification, the probability the row belongs to the positive class.",
"minimum": 0,
"type": [
"number",
"null"
]
},
"prediction": {
"description": "The prediction of the model.",
"oneOf": [
{
"description": "If using a regressor model, will be the numeric value of the target.",
"type": "number"
},
{
"description": "If using a binary or muliclass classifier model, will be the predicted class.",
"type": "string"
},
{
"description": "If using a multilabel classifier model, will be a list of predicted classes.",
"items": {
"type": "string"
},
"type": "array"
}
]
},
"predictionExplanationMetadata": {
"description": "Array containing algorithm-specific values. Varies depending on the value of `explanationAlgorithm`.",
"items": {
"description": "Prediction explanation metadata.",
"properties": {
"shapRemainingTotal": {
"description": "Will be present only if `explanationAlgorithm` = 'shap' and `maxExplanations` is nonzero. The total of SHAP values for features beyond the `maxExplanations`. This can be identically 0 in all rows, if `maxExplanations` is greater than the number of features and thus all features are returned.",
"type": "integer"
}
},
"type": "object"
},
"type": "array",
"x-versionadded": "v2.21"
},
"predictionExplanations": {
"description": "Array contains `predictionExplanation` objects. The total elements in the array are bounded by maxExplanations and feature count. It will be present only if `explanationAlgorithm` is not null (prediction explanations were requested).",
"items": {
"description": "Prediction explanation result.",
"properties": {
"feature": {
"description": "The name of the feature contributing to the prediction.",
"type": "string"
},
"featureValue": {
"description": "The value the feature took on for this row. The type corresponds to the feature (bool, int, float, str, etc.).",
"oneOf": [
{
"type": "integer"
},
{
"type": "boolean"
},
{
"type": "string"
},
{
"type": "number"
}
]
},
"label": {
"description": "Describes what output was driven by this prediction explanation. For regression projects, it is the name of the target feature. For classification projects, it is the class whose probability increasing would correspond to a positive strength of this prediction explanation. For predictions made using anomaly detection models, it is the `Anomaly Score`.",
"oneOf": [
{
"type": "string"
},
{
"type": "number"
}
]
},
"strength": {
"description": "Algorithm-specific explanation value attributed to `feature` in this row. If `explanationAlgorithm` = `shap`, this is the SHAP value.",
"type": [
"number",
"null"
]
}
},
"required": [
"feature",
"featureValue",
"label"
],
"type": "object"
},
"type": "array",
"x-versionadded": "v2.21"
},
"predictionIntervalLowerBound": {
"description": "Present if ``includePredictionIntervals`` is True. Indicates a lower bound of the estimate of error based on test data.",
"type": "number",
"x-versionadded": "v2.16"
},
"predictionIntervalUpperBound": {
"description": "Present if ``includePredictionIntervals`` is True. Indicates an upper bound of the estimate of error based on test data.",
"type": "number",
"x-versionadded": "v2.16"
},
"predictionThreshold": {
"description": "Threshold used for binary classification in predictions.",
"maximum": 1,
"minimum": 0,
"type": "number"
},
"predictionValues": {
"description": "A list of predicted values for this row.",
"items": {
"description": "Predicted values",
"properties": {
"label": {
"description": "For regression problems this will be the name of the target column, 'Anomaly score' or ignored field. For classification projects this will be the name of the class.",
"oneOf": [
{
"type": "string"
},
{
"type": "number"
}
]
},
"threshold": {
"description": "Threshold used in multilabel classification for this class.",
"maximum": 1,
"minimum": 0,
"type": "number"
},
"value": {
"description": "The predicted probability of the class identified by the label.",
"type": "number"
}
},
"required": [
"label",
"value"
],
"type": "object"
},
"type": "array"
},
"rowId": {
"description": "The row in the prediction dataset this prediction corresponds to.",
"minimum": 0,
"type": "integer"
},
"segmentId": {
"description": "The ID of the segment value for a segmented project.",
"type": "string",
"x-versionadded": "v2.27"
},
"seriesId": {
"description": "The ID of the series value for a multiseries project. For time series projects that are not a multiseries this will be a NaN.",
"type": [
"string",
"null"
]
},
"target": {
"description": "In the case of a time series project with a dataset using predictionsStartDate and predictionsEndDate for bulk predictions, the predictions will be a json array in the same format as with a forecast point with one additional element - `target`. It is the target value in the row.",
"type": [
"string",
"null"
]
},
"timestamp": {
"description": "(if time series project) The timestamp of this row in the prediction dataset.",
"format": "date-time",
"type": [
"string",
"null"
]
}
},
"required": [
"prediction",
"rowId"
],
"type": "object"
},
"type": "array"
},
"shapBaseValue": {
"description": "Will be present only if `explanationAlgorithm` = 'shap'. The model's average prediction over the training data. SHAP values are deviations from the base value.",
"type": [
"number",
"null"
],
"x-versionadded": "v2.21"
},
"shapWarnings": {
"description": "Will be present if `explanationAlgorithm` was set to `shap` and there were additivity failures during SHAP values calculation.",
"items": {
"description": "Mismatch information",
"properties": {
"maxNormalizedMismatch": {
"description": "The maximal relative normalized mismatch value.",
"type": "number"
},
"mismatchRowCount": {
"description": "The count of rows for which additivity check failed.",
"type": "integer"
}
},
"required": [
"maxNormalizedMismatch",
"mismatchRowCount"
],
"type": "object"
},
"type": "array",
"x-versionadded": "v2.21"
},
"task": {
"description": "The prediction task.",
"enum": [
"Regression",
"Binary",
"Multiclass",
"Multilabel"
],
"type": "string"
}
},
"required": [
"positiveClass",
"predictions",
"task"
],
"type": "object"
}
Properties
Name |
Type |
Required |
Restrictions |
Description |
actualValueColumn |
string,null |
false |
|
For time series unsupervised projects only. Will be present only if the prediction dataset has an actual value column. The name of the column with actuals that was used to calculate the scores and insights. |
explanationAlgorithm |
string,null |
false |
|
The selected algorithm to use for prediction explanations. At present, the only acceptable value is 'shap', which selects the SHapley Additive exPlanations (SHAP) explainer. Defaults to null (no prediction explanations). |
featureDerivationWindowCounts |
integer,null |
false |
|
For time series projects with partial history only. Indicates how many points were used during feature derivation in feature derivation window. |
includesPredictionIntervals |
boolean |
false |
|
For time series projects only. Indicates if prediction intervals will be part of the response. Defaults to False. |
maxExplanations |
integer,null |
false |
|
The maximum number of prediction explanations values to be returned with each row in the predictions json array. Null indicates 'no limit'. Will be present only if explanationAlgorithm was set. |
positiveClass |
any |
true |
|
For binary classification, the class of the target deemed the positive class. For all other project types this field will be null. |
oneOf
Name |
Type |
Required |
Restrictions |
Description |
» anonymous |
string |
false |
|
none |
xor
Name |
Type |
Required |
Restrictions |
Description |
» anonymous |
integer |
false |
|
none |
xor
Name |
Type |
Required |
Restrictions |
Description |
» anonymous |
number |
false |
|
none |
continued
Name |
Type |
Required |
Restrictions |
Description |
predictionIntervalsSize |
integer,null |
false |
|
For time series projects only. Will be present only if includePredictionIntervals is True. Indicates the percentile used for prediction intervals calculation. Defaults to 80. |
predictions |
[PredictionObject] |
true |
|
The json array of predictions. The predictions in the response will have slightly different formats, depending on the project type. |
shapBaseValue |
number,null |
false |
|
Will be present only if explanationAlgorithm = 'shap'. The model's average prediction over the training data. SHAP values are deviations from the base value. |
shapWarnings |
[ShapWarningValues] |
false |
|
Will be present if explanationAlgorithm was set to shap and there were additivity failures during SHAP values calculation. |
task |
string |
true |
|
The prediction task. |
Enumerated Values
Property |
Value |
task |
[Regression , Binary , Multiclass , Multilabel ] |
PredictionURLUpload
{
"properties": {
"actualValueColumn": {
"description": "Actual value column name, valid for the prediction files if the project is unsupervised and the dataset is considered as bulk predictions dataset. This value is optional.",
"type": "string",
"x-versionadded": "v2.21"
},
"credentials": {
"description": "A list of credentials for the secondary datasets used in feature discovery project",
"items": {
"oneOf": [
{
"properties": {
"catalogVersionId": {
"description": "The ID of the latest version of the catalog entry.",
"type": "string"
},
"password": {
"description": "The password (in cleartext) for database authentication. The password will be encrypted on the server side in scope of HTTP request and never saved or stored.",
"type": "string"
},
"url": {
"description": "The link to retrieve more detailed information about the entity that uses this catalog dataset.",
"type": "string"
},
"user": {
"description": "The username for database authentication.",
"type": "string"
}
},
"required": [
"password",
"user"
],
"type": "object"
},
{
"properties": {
"catalogVersionId": {
"description": "The ID of the latest version of the catalog entry.",
"type": "string"
},
"credentialId": {
"description": "The ID of the set of credentials to use instead of user and password. Note that with this change, username and password will become optional.",
"type": "string"
},
"url": {
"description": "The link to retrieve more detailed information about the entity that uses this catalog dataset.",
"type": "string"
}
},
"required": [
"credentialId"
],
"type": "object"
}
]
},
"maxItems": 30,
"type": "array",
"x-versionadded": "v2.19"
},
"forecastPoint": {
"description": "For time series projects only. The time in the dataset relative to which predictions are generated. If not specified the default value is the value in the row with the latest specified timestamp. Specifying this value for a project that is not a time series project will result in an error.",
"format": "date-time",
"type": "string",
"x-versionadded": "v2.8"
},
"predictionsEndDate": {
"description": "Used for time series projects only. The end date for bulk predictions, exclusive. Note that this parameter is used for generating historical predictions using the training data, not for future predictions. If not specified, the dataset is not considered as a bulk predictions dataset. This parameter should be provided in conjunction with a ``predictionsStartDate``, and cannot be provided with the ``forecastPoint`` parameter.",
"format": "date-time",
"type": "string"
},
"predictionsStartDate": {
"description": "Used for time series projects only. The start date for bulk predictions. Note that this parameter is used for generating historical predictions using the training data, not for future predictions. If not specified, the dataset is not considered as a bulk predictions dataset. This parameter should be provided in conjunction with a ``predictionsEndDate``, and cannot be provided with the ``forecastPoint`` parameter.",
"format": "date-time",
"type": "string"
},
"relaxKnownInAdvanceFeaturesCheck": {
"description": "For time series projects only. If true, missing values in the known in advance features are allowed in the forecast window at the prediction time. This value is optional. If omitted or false, missing values are not allowed.",
"type": "boolean",
"x-versionadded": "v2.15"
},
"secondaryDatasetsConfigId": {
"description": "For feature discovery projects only. The ID of the alternative secondary dataset config to use during prediction.",
"type": "string",
"x-versionadded": "v2.19"
},
"url": {
"description": "The URL to download the dataset from.",
"format": "url",
"type": "string"
}
},
"required": [
"url"
],
"type": "object"
}
Properties
Name |
Type |
Required |
Restrictions |
Description |
actualValueColumn |
string |
false |
|
Actual value column name, valid for the prediction files if the project is unsupervised and the dataset is considered as bulk predictions dataset. This value is optional. |
credentials |
[oneOf] |
false |
maxItems: 30
|
A list of credentials for the secondary datasets used in feature discovery project |
oneOf
xor
Name |
Type |
Required |
Restrictions |
Description |
» anonymous |
CredentialId |
false |
|
none |
continued
Name |
Type |
Required |
Restrictions |
Description |
forecastPoint |
string(date-time) |
false |
|
For time series projects only. The time in the dataset relative to which predictions are generated. If not specified the default value is the value in the row with the latest specified timestamp. Specifying this value for a project that is not a time series project will result in an error. |
predictionsEndDate |
string(date-time) |
false |
|
Used for time series projects only. The end date for bulk predictions, exclusive. Note that this parameter is used for generating historical predictions using the training data, not for future predictions. If not specified, the dataset is not considered as a bulk predictions dataset. This parameter should be provided in conjunction with a predictionsStartDate , and cannot be provided with the forecastPoint parameter. |
predictionsStartDate |
string(date-time) |
false |
|
Used for time series projects only. The start date for bulk predictions. Note that this parameter is used for generating historical predictions using the training data, not for future predictions. If not specified, the dataset is not considered as a bulk predictions dataset. This parameter should be provided in conjunction with a predictionsEndDate , and cannot be provided with the forecastPoint parameter. |
relaxKnownInAdvanceFeaturesCheck |
boolean |
false |
|
For time series projects only. If true, missing values in the known in advance features are allowed in the forecast window at the prediction time. This value is optional. If omitted or false, missing values are not allowed. |
secondaryDatasetsConfigId |
string |
false |
|
For feature discovery projects only. The ID of the alternative secondary dataset config to use during prediction. |
url |
string(url) |
true |
|
The URL to download the dataset from. |
{
"properties": {
"count": {
"description": "The number of items returned on this page.",
"type": "integer"
},
"data": {
"description": "An array of the metadata records.",
"items": {
"properties": {
"actualValueColumn": {
"description": "For time series unsupervised projects only. Actual value column can be used to calculate the classification metrics and insights.",
"type": [
"string",
"null"
],
"x-versionadded": "v2.21"
},
"datasetId": {
"description": "Deprecated alias for `predictionDatasetId`.",
"type": [
"string",
"null"
]
},
"explanationAlgorithm": {
"description": "The selected algorithm to use for prediction explanations. At present, the only acceptable value is `shap`, which selects the SHapley Additive exPlanations (SHAP) explainer. Defaults to null (no prediction explanations).",
"type": [
"string",
"null"
],
"x-versionadded": "v2.21"
},
"featureDerivationWindowCounts": {
"description": "For time series projects with partial history only. Indicates how many points were used in during feature derivation.",
"type": [
"integer",
"null"
],
"x-versionadded": "v2.24"
},
"forecastPoint": {
"description": "For time series projects only. The time in the dataset relative to which predictions were generated.",
"format": "date-time",
"type": [
"string",
"null"
],
"x-versionadded": "v2.20"
},
"id": {
"description": "The id of the prediction record.",
"type": "string"
},
"includesPredictionIntervals": {
"description": "Whether the predictions include prediction intervals.",
"type": "boolean"
},
"maxExplanations": {
"description": "The maximum number of prediction explanations values to be returned with each row in the `predictions` json array. Null indicates `no limit`. Will be present only if `explanationAlgorithm` was set.",
"type": [
"integer",
"null"
],
"x-versionadded": "v2.21"
},
"modelId": {
"description": "The model id used for predictions.",
"type": "string"
},
"predictionDatasetId": {
"description": "The dataset id where the prediction data comes from. The field is available via `/api/v2/projects/<projectId>/predictionsMetadata/` route and replaced on `datasetId`in deprecated `/api/v2/projects/<projectId>/predictions/` endpoint.",
"type": [
"string",
"null"
]
},
"predictionIntervalsSize": {
"description": "For time series projects only. If prediction intervals were computed, what percentile they represent. Will be ``None`` if ``includePredictionIntervals`` is ``False``.",
"type": [
"integer",
"null"
]
},
"predictionThreshold": {
"description": "Threshold used for binary classification in predictions.",
"type": [
"number",
"null"
],
"x-versionadded": "v2.22"
},
"predictionsEndDate": {
"description": "For time series projects only. The end date for bulk predictions, exclusive. Note that this parameter was used for generating historical predictions using the training data, not for future predictions.",
"format": "date-time",
"type": [
"string",
"null"
],
"x-versionadded": "v2.20"
},
"predictionsStartDate": {
"description": "For time series projects only. The start date for bulk predictions. Note that this parameter was used for generating historical predictions using the training data, not for future predictions.",
"format": "date-time",
"type": [
"string",
"null"
],
"x-versionadded": "v2.20"
},
"projectId": {
"description": "The project id of the predictions.",
"type": "string"
},
"shapWarnings": {
"description": "Will be present if `explanationAlgorithm` was set to `shap` and there were additivity failures during SHAP values calculation.",
"properties": {
"maxNormalizedMismatch": {
"description": "The maximal relative normalized mismatch value.",
"type": "number",
"x-versionadded": "v2.21"
},
"mismatchRowCount": {
"description": "The count of rows for which additivity check failed.",
"type": "integer",
"x-versionadded": "v2.21"
}
},
"required": [
"maxNormalizedMismatch",
"mismatchRowCount"
],
"type": "object"
},
"url": {
"description": "The url at which you can download the predictions.",
"type": "string"
}
},
"required": [
"id",
"includesPredictionIntervals",
"modelId",
"predictionIntervalsSize",
"projectId",
"url"
],
"type": "object"
},
"type": "array"
},
"next": {
"description": "URL pointing to the next page (if null, there is no next page).",
"format": "uri",
"type": [
"string",
"null"
]
},
"previous": {
"description": "URL pointing to the previous page (if null, there is no previous page).",
"format": "uri",
"type": [
"string",
"null"
]
}
},
"required": [
"count",
"data",
"next",
"previous"
],
"type": "object"
}
Properties
Name |
Type |
Required |
Restrictions |
Description |
count |
integer |
true |
|
The number of items returned on this page. |
data |
[RetrievePredictionMetadataObject] |
true |
|
An array of the metadata records. |
next |
string,null(uri) |
true |
|
URL pointing to the next page (if null, there is no next page). |
previous |
string,null(uri) |
true |
|
URL pointing to the previous page (if null, there is no previous page). |
{
"properties": {
"actualValueColumn": {
"description": "For time series unsupervised projects only. Actual value column can be used to calculate the classification metrics and insights.",
"type": [
"string",
"null"
],
"x-versionadded": "v2.21"
},
"datasetId": {
"description": "Deprecated alias for `predictionDatasetId`.",
"type": [
"string",
"null"
]
},
"explanationAlgorithm": {
"description": "The selected algorithm to use for prediction explanations. At present, the only acceptable value is `shap`, which selects the SHapley Additive exPlanations (SHAP) explainer. Defaults to null (no prediction explanations).",
"type": [
"string",
"null"
],
"x-versionadded": "v2.21"
},
"featureDerivationWindowCounts": {
"description": "For time series projects with partial history only. Indicates how many points were used in during feature derivation.",
"type": [
"integer",
"null"
],
"x-versionadded": "v2.24"
},
"forecastPoint": {
"description": "For time series projects only. The time in the dataset relative to which predictions were generated.",
"format": "date-time",
"type": [
"string",
"null"
],
"x-versionadded": "v2.20"
},
"id": {
"description": "The id of the prediction record.",
"type": "string"
},
"includesPredictionIntervals": {
"description": "Whether the predictions include prediction intervals.",
"type": "boolean"
},
"maxExplanations": {
"description": "The maximum number of prediction explanations values to be returned with each row in the `predictions` json array. Null indicates `no limit`. Will be present only if `explanationAlgorithm` was set.",
"type": [
"integer",
"null"
],
"x-versionadded": "v2.21"
},
"modelId": {
"description": "The model id used for predictions.",
"type": "string"
},
"predictionDatasetId": {
"description": "The dataset id where the prediction data comes from. The field is available via `/api/v2/projects/<projectId>/predictionsMetadata/` route and replaced on `datasetId`in deprecated `/api/v2/projects/<projectId>/predictions/` endpoint.",
"type": [
"string",
"null"
]
},
"predictionIntervalsSize": {
"description": "For time series projects only. If prediction intervals were computed, what percentile they represent. Will be ``None`` if ``includePredictionIntervals`` is ``False``.",
"type": [
"integer",
"null"
]
},
"predictionThreshold": {
"description": "Threshold used for binary classification in predictions.",
"type": [
"number",
"null"
],
"x-versionadded": "v2.22"
},
"predictionsEndDate": {
"description": "For time series projects only. The end date for bulk predictions, exclusive. Note that this parameter was used for generating historical predictions using the training data, not for future predictions.",
"format": "date-time",
"type": [
"string",
"null"
],
"x-versionadded": "v2.20"
},
"predictionsStartDate": {
"description": "For time series projects only. The start date for bulk predictions. Note that this parameter was used for generating historical predictions using the training data, not for future predictions.",
"format": "date-time",
"type": [
"string",
"null"
],
"x-versionadded": "v2.20"
},
"projectId": {
"description": "The project id of the predictions.",
"type": "string"
},
"shapWarnings": {
"description": "Will be present if `explanationAlgorithm` was set to `shap` and there were additivity failures during SHAP values calculation.",
"properties": {
"maxNormalizedMismatch": {
"description": "The maximal relative normalized mismatch value.",
"type": "number",
"x-versionadded": "v2.21"
},
"mismatchRowCount": {
"description": "The count of rows for which additivity check failed.",
"type": "integer",
"x-versionadded": "v2.21"
}
},
"required": [
"maxNormalizedMismatch",
"mismatchRowCount"
],
"type": "object"
},
"url": {
"description": "The url at which you can download the predictions.",
"type": "string"
}
},
"required": [
"id",
"includesPredictionIntervals",
"modelId",
"predictionIntervalsSize",
"projectId",
"url"
],
"type": "object"
}
Properties
Name |
Type |
Required |
Restrictions |
Description |
actualValueColumn |
string,null |
false |
|
For time series unsupervised projects only. Actual value column can be used to calculate the classification metrics and insights. |
datasetId |
string,null |
false |
|
Deprecated alias for predictionDatasetId . |
explanationAlgorithm |
string,null |
false |
|
The selected algorithm to use for prediction explanations. At present, the only acceptable value is shap , which selects the SHapley Additive exPlanations (SHAP) explainer. Defaults to null (no prediction explanations). |
featureDerivationWindowCounts |
integer,null |
false |
|
For time series projects with partial history only. Indicates how many points were used in during feature derivation. |
forecastPoint |
string,null(date-time) |
false |
|
For time series projects only. The time in the dataset relative to which predictions were generated. |
id |
string |
true |
|
The id of the prediction record. |
includesPredictionIntervals |
boolean |
true |
|
Whether the predictions include prediction intervals. |
maxExplanations |
integer,null |
false |
|
The maximum number of prediction explanations values to be returned with each row in the predictions json array. Null indicates no limit . Will be present only if explanationAlgorithm was set. |
modelId |
string |
true |
|
The model id used for predictions. |
predictionDatasetId |
string,null |
false |
|
The dataset id where the prediction data comes from. The field is available via /api/v2/projects/<projectId>/predictionsMetadata/ route and replaced on datasetId in deprecated /api/v2/projects/<projectId>/predictions/ endpoint. |
predictionIntervalsSize |
integer,null |
true |
|
For time series projects only. If prediction intervals were computed, what percentile they represent. Will be None if includePredictionIntervals is False . |
predictionThreshold |
number,null |
false |
|
Threshold used for binary classification in predictions. |
predictionsEndDate |
string,null(date-time) |
false |
|
For time series projects only. The end date for bulk predictions, exclusive. Note that this parameter was used for generating historical predictions using the training data, not for future predictions. |
predictionsStartDate |
string,null(date-time) |
false |
|
For time series projects only. The start date for bulk predictions. Note that this parameter was used for generating historical predictions using the training data, not for future predictions. |
projectId |
string |
true |
|
The project id of the predictions. |
shapWarnings |
ShapWarnings |
false |
|
Will be present if explanationAlgorithm was set to shap and there were additivity failures during SHAP values calculation. |
url |
string |
true |
|
The url at which you can download the predictions. |
S3Credentials
{
"properties": {
"awsAccessKeyId": {
"description": "The S3 AWS access key ID. Required if configId is not specified.Cannot include this parameter if configId is specified.",
"type": "string"
},
"awsSecretAccessKey": {
"description": "The S3 AWS secret access key. Required if configId is not specified.Cannot include this parameter if configId is specified.",
"type": "string"
},
"awsSessionToken": {
"default": null,
"description": "The S3 AWS session token for AWS temporary credentials.Cannot include this parameter if configId is specified.",
"type": [
"string",
"null"
]
},
"configId": {
"description": "ID of Secure configurations of credentials shared by admin.If specified, cannot include awsAccessKeyId, awsSecretAccessKey or awsSessionToken",
"type": "string"
},
"credentialType": {
"description": "The type of these credentials, 's3' here.",
"enum": [
"s3"
],
"type": "string"
}
},
"required": [
"credentialType"
],
"type": "object"
}
Properties
Name |
Type |
Required |
Restrictions |
Description |
awsAccessKeyId |
string |
false |
|
The S3 AWS access key ID. Required if configId is not specified.Cannot include this parameter if configId is specified. |
awsSecretAccessKey |
string |
false |
|
The S3 AWS secret access key. Required if configId is not specified.Cannot include this parameter if configId is specified. |
awsSessionToken |
string,null |
false |
|
The S3 AWS session token for AWS temporary credentials.Cannot include this parameter if configId is specified. |
configId |
string |
false |
|
ID of Secure configurations of credentials shared by admin.If specified, cannot include awsAccessKeyId, awsSecretAccessKey or awsSessionToken |
credentialType |
string |
true |
|
The type of these credentials, 's3' here. |
Enumerated Values
Property |
Value |
credentialType |
s3 |
S3DataStreamer
{
"description": "Stream CSV data chunks from Amazon Cloud Storage S3",
"properties": {
"credentialId": {
"description": "Either the populated value of the field or [redacted] due to permission settings",
"oneOf": [
{
"description": "Use the specified credential to access the url",
"type": [
"string",
"null"
]
},
{
"enum": [
"[redacted]"
],
"type": "string"
}
]
},
"endpointUrl": {
"description": "Endpoint URL for the S3 connection (omit to use the default)",
"format": "url",
"type": "string",
"x-versionadded": "v2.29"
},
"format": {
"default": "csv",
"description": "Type of input file format",
"enum": [
"csv",
"parquet"
],
"type": "string",
"x-versionadded": "v2.25"
},
"type": {
"description": "Type name for this intake type",
"enum": [
"s3"
],
"type": "string"
},
"url": {
"description": "URL for the CSV file",
"format": "url",
"type": "string"
}
},
"required": [
"type",
"url"
],
"type": "object"
}
Stream CSV data chunks from Amazon Cloud Storage S3
Properties
Name |
Type |
Required |
Restrictions |
Description |
credentialId |
any |
false |
|
Either the populated value of the field or [redacted] due to permission settings |
oneOf
Name |
Type |
Required |
Restrictions |
Description |
» anonymous |
string,null |
false |
|
Use the specified credential to access the url |
xor
Name |
Type |
Required |
Restrictions |
Description |
» anonymous |
string |
false |
|
none |
continued
Name |
Type |
Required |
Restrictions |
Description |
endpointUrl |
string(url) |
false |
|
Endpoint URL for the S3 connection (omit to use the default) |
format |
string |
false |
|
Type of input file format |
type |
string |
true |
|
Type name for this intake type |
url |
string(url) |
true |
|
URL for the CSV file |
Enumerated Values
Property |
Value |
anonymous |
[redacted] |
format |
[csv , parquet ] |
type |
s3 |
S3Intake
{
"description": "Stream CSV data chunks from Amazon Cloud Storage S3",
"properties": {
"credentialId": {
"description": "Use the specified credential to access the url",
"type": [
"string",
"null"
]
},
"endpointUrl": {
"description": "Endpoint URL for the S3 connection (omit to use the default)",
"format": "url",
"type": "string",
"x-versionadded": "v2.29"
},
"format": {
"default": "csv",
"description": "Type of input file format",
"enum": [
"csv",
"parquet"
],
"type": "string",
"x-versionadded": "v2.25"
},
"type": {
"description": "Type name for this intake type",
"enum": [
"s3"
],
"type": "string"
},
"url": {
"description": "URL for the CSV file",
"format": "url",
"type": "string"
}
},
"required": [
"type",
"url"
],
"type": "object"
}
Stream CSV data chunks from Amazon Cloud Storage S3
Properties
Name |
Type |
Required |
Restrictions |
Description |
credentialId |
string,null |
false |
|
Use the specified credential to access the url |
endpointUrl |
string(url) |
false |
|
Endpoint URL for the S3 connection (omit to use the default) |
format |
string |
false |
|
Type of input file format |
type |
string |
true |
|
Type name for this intake type |
url |
string(url) |
true |
|
URL for the CSV file |
Enumerated Values
Property |
Value |
format |
[csv , parquet ] |
type |
s3 |
S3Output
{
"description": "Saves CSV data chunks to Amazon Cloud Storage S3",
"properties": {
"credentialId": {
"description": "Use the specified credential to access the url",
"type": [
"string",
"null"
]
},
"endpointUrl": {
"description": "Endpoint URL for the S3 connection (omit to use the default)",
"format": "url",
"type": "string",
"x-versionadded": "v2.29"
},
"format": {
"default": "csv",
"description": "Type of output file format",
"enum": [
"csv",
"parquet"
],
"type": "string",
"x-versionadded": "v2.25"
},
"partitionColumns": {
"description": "For Parquet directory-scoring only. The column names of the intake data of which to partition the dataset. Columns are partitioned in the order they are given. At least one value is required if scoring to a directory (meaning the output url ends with a slash (\"/\").",
"items": {
"type": "string"
},
"maxItems": 100,
"type": "array",
"x-versionadded": "v2.26"
},
"serverSideEncryption": {
"description": "Configure Server-Side Encryption for S3 output",
"properties": {
"algorithm": {
"description": "The server-side encryption algorithm used when storing this object in Amazon S3 (for example, AES256, aws:kms).",
"type": "string"
},
"customerAlgorithm": {
"description": "Specifies the algorithm to use to when encrypting the object (for example, AES256).",
"type": "string"
},
"customerKey": {
"description": "Specifies the customer-provided encryption key for Amazon S3 to use in encrypting data. This value is used to store the object and then it is discarded; Amazon S3 does not store the encryption key. The key must be appropriate for use with the algorithm specified in customerAlgorithm. The key must be sent as an base64 encoded string.",
"type": "string"
},
"kmsEncryptionContext": {
"description": "Specifies the Amazon Web Services KMS Encryption Context to use for object encryption. The value of this header is a base64-encoded UTF-8 string holding JSON with the encryption context key-value pairs.",
"type": "string"
},
"kmsKeyId": {
"description": "Specifies the ID of the symmetric customer managed key to use for object encryption.",
"type": "string"
}
},
"type": "object"
},
"type": {
"description": "Type name for this output type",
"enum": [
"s3"
],
"type": "string"
},
"url": {
"description": "URL for the CSV file",
"format": "url",
"type": "string"
}
},
"required": [
"type",
"url"
],
"type": "object"
}
Saves CSV data chunks to Amazon Cloud Storage S3
Properties
Name |
Type |
Required |
Restrictions |
Description |
credentialId |
string,null |
false |
|
Use the specified credential to access the url |
endpointUrl |
string(url) |
false |
|
Endpoint URL for the S3 connection (omit to use the default) |
format |
string |
false |
|
Type of output file format |
partitionColumns |
[string] |
false |
maxItems: 100
|
For Parquet directory-scoring only. The column names of the intake data of which to partition the dataset. Columns are partitioned in the order they are given. At least one value is required if scoring to a directory (meaning the output url ends with a slash ("/"). |
serverSideEncryption |
ServerSideEncryption |
false |
|
Configure Server-Side Encryption for S3 output |
type |
string |
true |
|
Type name for this output type |
url |
string(url) |
true |
|
URL for the CSV file |
Enumerated Values
Property |
Value |
format |
[csv , parquet ] |
type |
s3 |
S3OutputAdaptor
{
"description": "Saves CSV data chunks to Amazon Cloud Storage S3",
"properties": {
"credentialId": {
"description": "Either the populated value of the field or [redacted] due to permission settings",
"oneOf": [
{
"description": "Use the specified credential to access the url",
"type": [
"string",
"null"
]
},
{
"enum": [
"[redacted]"
],
"type": "string"
}
]
},
"endpointUrl": {
"description": "Endpoint URL for the S3 connection (omit to use the default)",
"format": "url",
"type": "string",
"x-versionadded": "v2.29"
},
"format": {
"default": "csv",
"description": "Type of output file format",
"enum": [
"csv",
"parquet"
],
"type": "string",
"x-versionadded": "v2.25"
},
"partitionColumns": {
"description": "For Parquet directory-scoring only. The column names of the intake data of which to partition the dataset. Columns are partitioned in the order they are given. At least one value is required if scoring to a directory (meaning the output url ends with a slash (\"/\").",
"items": {
"type": "string"
},
"maxItems": 100,
"type": "array",
"x-versionadded": "v2.26"
},
"serverSideEncryption": {
"description": "Configure Server-Side Encryption for S3 output",
"properties": {
"algorithm": {
"description": "The server-side encryption algorithm used when storing this object in Amazon S3 (for example, AES256, aws:kms).",
"type": "string"
},
"customerAlgorithm": {
"description": "Specifies the algorithm to use to when encrypting the object (for example, AES256).",
"type": "string"
},
"customerKey": {
"description": "Specifies the customer-provided encryption key for Amazon S3 to use in encrypting data. This value is used to store the object and then it is discarded; Amazon S3 does not store the encryption key. The key must be appropriate for use with the algorithm specified in customerAlgorithm. The key must be sent as an base64 encoded string.",
"type": "string"
},
"kmsEncryptionContext": {
"description": "Specifies the Amazon Web Services KMS Encryption Context to use for object encryption. The value of this header is a base64-encoded UTF-8 string holding JSON with the encryption context key-value pairs.",
"type": "string"
},
"kmsKeyId": {
"description": "Specifies the ID of the symmetric customer managed key to use for object encryption.",
"type": "string"
}
},
"type": "object"
},
"type": {
"description": "Type name for this output type",
"enum": [
"s3"
],
"type": "string"
},
"url": {
"description": "URL for the CSV file",
"format": "url",
"type": "string"
}
},
"required": [
"type",
"url"
],
"type": "object"
}
Saves CSV data chunks to Amazon Cloud Storage S3
Properties
Name |
Type |
Required |
Restrictions |
Description |
credentialId |
any |
false |
|
Either the populated value of the field or [redacted] due to permission settings |
oneOf
Name |
Type |
Required |
Restrictions |
Description |
» anonymous |
string,null |
false |
|
Use the specified credential to access the url |
xor
Name |
Type |
Required |
Restrictions |
Description |
» anonymous |
string |
false |
|
none |
continued
Name |
Type |
Required |
Restrictions |
Description |
endpointUrl |
string(url) |
false |
|
Endpoint URL for the S3 connection (omit to use the default) |
format |
string |
false |
|
Type of output file format |
partitionColumns |
[string] |
false |
maxItems: 100
|
For Parquet directory-scoring only. The column names of the intake data of which to partition the dataset. Columns are partitioned in the order they are given. At least one value is required if scoring to a directory (meaning the output url ends with a slash ("/"). |
serverSideEncryption |
ServerSideEncryption |
false |
|
Configure Server-Side Encryption for S3 output |
type |
string |
true |
|
Type name for this output type |
url |
string(url) |
true |
|
URL for the CSV file |
Enumerated Values
Property |
Value |
anonymous |
[redacted] |
format |
[csv , parquet ] |
type |
s3 |
Schedule
{
"description": "The scheduling information defining how often and when to execute this job to the Job Scheduling service. Optional if enabled = False.",
"properties": {
"dayOfMonth": {
"description": "The date(s) of the month that the job will run. Allowed values are either ``[1 ... 31]`` or ``[\"*\"]`` for all days of the month. This field is additive with ``dayOfWeek``, meaning the job will run both on the date(s) defined in this field and the day specified by ``dayOfWeek`` (for example, dates 1st, 2nd, 3rd, plus every Tuesday). If ``dayOfMonth`` is set to ``[\"*\"]`` and ``dayOfWeek`` is defined, the scheduler will trigger on every day of the month that matches ``dayOfWeek`` (for example, Tuesday the 2nd, 9th, 16th, 23rd, 30th). Invalid dates such as February 31st are ignored.",
"items": {
"enum": [
"*",
1,
2,
3,
4,
5,
6,
7,
8,
9,
10,
11,
12,
13,
14,
15,
16,
17,
18,
19,
20,
21,
22,
23,
24,
25,
26,
27,
28,
29,
30,
31
],
"type": [
"number",
"string"
]
},
"maxItems": 31,
"type": "array"
},
"dayOfWeek": {
"description": "The day(s) of the week that the job will run. Allowed values are ``[0 .. 6]``, where (Sunday=0), or ``[\"*\"]``, for all days of the week. Strings, either 3-letter abbreviations or the full name of the day, can be used interchangeably (e.g., \"sunday\", \"Sunday\", \"sun\", or \"Sun\", all map to ``[0]``. This field is additive with ``dayOfMonth``, meaning the job will run both on the date specified by ``dayOfMonth`` and the day defined in this field.",
"items": {
"enum": [
"*",
0,
1,
2,
3,
4,
5,
6,
"sunday",
"SUNDAY",
"Sunday",
"monday",
"MONDAY",
"Monday",
"tuesday",
"TUESDAY",
"Tuesday",
"wednesday",
"WEDNESDAY",
"Wednesday",
"thursday",
"THURSDAY",
"Thursday",
"friday",
"FRIDAY",
"Friday",
"saturday",
"SATURDAY",
"Saturday",
"sun",
"SUN",
"Sun",
"mon",
"MON",
"Mon",
"tue",
"TUE",
"Tue",
"wed",
"WED",
"Wed",
"thu",
"THU",
"Thu",
"fri",
"FRI",
"Fri",
"sat",
"SAT",
"Sat"
],
"type": [
"number",
"string"
]
},
"maxItems": 7,
"type": "array"
},
"hour": {
"description": "The hour(s) of the day that the job will run. Allowed values are either ``[\"*\"]`` meaning every hour of the day or ``[0 ... 23]``.",
"items": {
"enum": [
"*",
0,
1,
2,
3,
4,
5,
6,
7,
8,
9,
10,
11,
12,
13,
14,
15,
16,
17,
18,
19,
20,
21,
22,
23
],
"type": [
"number",
"string"
]
},
"maxItems": 24,
"type": "array"
},
"minute": {
"description": "The minute(s) of the day that the job will run. Allowed values are either ``[\"*\"]`` meaning every minute of the day or``[0 ... 59]``.",
"items": {
"enum": [
"*",
0,
1,
2,
3,
4,
5,
6,
7,
8,
9,
10,
11,
12,
13,
14,
15,
16,
17,
18,
19,
20,
21,
22,
23,
24,
25,
26,
27,
28,
29,
30,
31,
32,
33,
34,
35,
36,
37,
38,
39,
40,
41,
42,
43,
44,
45,
46,
47,
48,
49,
50,
51,
52,
53,
54,
55,
56,
57,
58,
59
],
"type": [
"number",
"string"
]
},
"maxItems": 60,
"type": "array"
},
"month": {
"description": "The month(s) of the year that the job will run. Allowed values are either ``[1 ... 12]`` or ``[\"*\"]`` for all months of the year. Strings, either 3-letter abbreviations or the full name of the month, can be used interchangeably (e.g., \"jan\" or \"october\"). Months that are not compatible with ``dayOfMonth`` are ignored, for example ``{\"dayOfMonth\": [31], \"month\":[\"feb\"]}``.",
"items": {
"enum": [
"*",
1,
2,
3,
4,
5,
6,
7,
8,
9,
10,
11,
12,
"january",
"JANUARY",
"January",
"february",
"FEBRUARY",
"February",
"march",
"MARCH",
"March",
"april",
"APRIL",
"April",
"may",
"MAY",
"May",
"june",
"JUNE",
"June",
"july",
"JULY",
"July",
"august",
"AUGUST",
"August",
"september",
"SEPTEMBER",
"September",
"october",
"OCTOBER",
"October",
"november",
"NOVEMBER",
"November",
"december",
"DECEMBER",
"December",
"jan",
"JAN",
"Jan",
"feb",
"FEB",
"Feb",
"mar",
"MAR",
"Mar",
"apr",
"APR",
"Apr",
"jun",
"JUN",
"Jun",
"jul",
"JUL",
"Jul",
"aug",
"AUG",
"Aug",
"sep",
"SEP",
"Sep",
"oct",
"OCT",
"Oct",
"nov",
"NOV",
"Nov",
"dec",
"DEC",
"Dec"
],
"type": [
"number",
"string"
]
},
"maxItems": 12,
"type": "array"
}
},
"required": [
"dayOfMonth",
"dayOfWeek",
"hour",
"minute",
"month"
],
"type": "object"
}
The scheduling information defining how often and when to execute this job to the Job Scheduling service. Optional if enabled = False.
Properties
Name |
Type |
Required |
Restrictions |
Description |
dayOfMonth |
[number,string] |
true |
maxItems: 31
|
The date(s) of the month that the job will run. Allowed values are either [1 ... 31] or ["*"] for all days of the month. This field is additive with dayOfWeek , meaning the job will run both on the date(s) defined in this field and the day specified by dayOfWeek (for example, dates 1st, 2nd, 3rd, plus every Tuesday). If dayOfMonth is set to ["*"] and dayOfWeek is defined, the scheduler will trigger on every day of the month that matches dayOfWeek (for example, Tuesday the 2nd, 9th, 16th, 23rd, 30th). Invalid dates such as February 31st are ignored. |
dayOfWeek |
[number,string] |
true |
maxItems: 7
|
The day(s) of the week that the job will run. Allowed values are [0 .. 6] , where (Sunday=0), or ["*"] , for all days of the week. Strings, either 3-letter abbreviations or the full name of the day, can be used interchangeably (e.g., "sunday", "Sunday", "sun", or "Sun", all map to [0] . This field is additive with dayOfMonth , meaning the job will run both on the date specified by dayOfMonth and the day defined in this field. |
hour |
[number,string] |
true |
maxItems: 24
|
The hour(s) of the day that the job will run. Allowed values are either ["*"] meaning every hour of the day or [0 ... 23] . |
minute |
[number,string] |
true |
maxItems: 60
|
The minute(s) of the day that the job will run. Allowed values are either ["*"] meaning every minute of the day or[0 ... 59] . |
month |
[number,string] |
true |
maxItems: 12
|
The month(s) of the year that the job will run. Allowed values are either [1 ... 12] or ["*"] for all months of the year. Strings, either 3-letter abbreviations or the full name of the month, can be used interchangeably (e.g., "jan" or "october"). Months that are not compatible with dayOfMonth are ignored, for example {"dayOfMonth": [31], "month":["feb"]} . |
ScheduledJobResponse
{
"properties": {
"createdBy": {
"description": "User name of the creator",
"type": [
"string",
"null"
]
},
"deploymentId": {
"description": "ID of the deployment this scheduled job is created from.",
"type": [
"string",
"null"
]
},
"enabled": {
"description": "True if the job is enabled and false if the job is disabled.",
"type": "boolean"
},
"id": {
"description": "ID of scheduled prediction job",
"type": "string"
},
"name": {
"description": "Name of the scheduled job.",
"type": [
"string",
"null"
]
},
"schedule": {
"description": "The scheduling information defining how often and when to execute this job to the Job Scheduling service. Optional if enabled = False.",
"properties": {
"dayOfMonth": {
"description": "The date(s) of the month that the job will run. Allowed values are either ``[1 ... 31]`` or ``[\"*\"]`` for all days of the month. This field is additive with ``dayOfWeek``, meaning the job will run both on the date(s) defined in this field and the day specified by ``dayOfWeek`` (for example, dates 1st, 2nd, 3rd, plus every Tuesday). If ``dayOfMonth`` is set to ``[\"*\"]`` and ``dayOfWeek`` is defined, the scheduler will trigger on every day of the month that matches ``dayOfWeek`` (for example, Tuesday the 2nd, 9th, 16th, 23rd, 30th). Invalid dates such as February 31st are ignored.",
"items": {
"enum": [
"*",
1,
2,
3,
4,
5,
6,
7,
8,
9,
10,
11,
12,
13,
14,
15,
16,
17,
18,
19,
20,
21,
22,
23,
24,
25,
26,
27,
28,
29,
30,
31
],
"type": [
"number",
"string"
]
},
"maxItems": 31,
"type": "array"
},
"dayOfWeek": {
"description": "The day(s) of the week that the job will run. Allowed values are ``[0 .. 6]``, where (Sunday=0), or ``[\"*\"]``, for all days of the week. Strings, either 3-letter abbreviations or the full name of the day, can be used interchangeably (e.g., \"sunday\", \"Sunday\", \"sun\", or \"Sun\", all map to ``[0]``. This field is additive with ``dayOfMonth``, meaning the job will run both on the date specified by ``dayOfMonth`` and the day defined in this field.",
"items": {
"enum": [
"*",
0,
1,
2,
3,
4,
5,
6,
"sunday",
"SUNDAY",
"Sunday",
"monday",
"MONDAY",
"Monday",
"tuesday",
"TUESDAY",
"Tuesday",
"wednesday",
"WEDNESDAY",
"Wednesday",
"thursday",
"THURSDAY",
"Thursday",
"friday",
"FRIDAY",
"Friday",
"saturday",
"SATURDAY",
"Saturday",
"sun",
"SUN",
"Sun",
"mon",
"MON",
"Mon",
"tue",
"TUE",
"Tue",
"wed",
"WED",
"Wed",
"thu",
"THU",
"Thu",
"fri",
"FRI",
"Fri",
"sat",
"SAT",
"Sat"
],
"type": [
"number",
"string"
]
},
"maxItems": 7,
"type": "array"
},
"hour": {
"description": "The hour(s) of the day that the job will run. Allowed values are either ``[\"*\"]`` meaning every hour of the day or ``[0 ... 23]``.",
"items": {
"enum": [
"*",
0,
1,
2,
3,
4,
5,
6,
7,
8,
9,
10,
11,
12,
13,
14,
15,
16,
17,
18,
19,
20,
21,
22,
23
],
"type": [
"number",
"string"
]
},
"maxItems": 24,
"type": "array"
},
"minute": {
"description": "The minute(s) of the day that the job will run. Allowed values are either ``[\"*\"]`` meaning every minute of the day or``[0 ... 59]``.",
"items": {
"enum": [
"*",
0,
1,
2,
3,
4,
5,
6,
7,
8,
9,
10,
11,
12,
13,
14,
15,
16,
17,
18,
19,
20,
21,
22,
23,
24,
25,
26,
27,
28,
29,
30,
31,
32,
33,
34,
35,
36,
37,
38,
39,
40,
41,
42,
43,
44,
45,
46,
47,
48,
49,
50,
51,
52,
53,
54,
55,
56,
57,
58,
59
],
"type": [
"number",
"string"
]
},
"maxItems": 60,
"type": "array"
},
"month": {
"description": "The month(s) of the year that the job will run. Allowed values are either ``[1 ... 12]`` or ``[\"*\"]`` for all months of the year. Strings, either 3-letter abbreviations or the full name of the month, can be used interchangeably (e.g., \"jan\" or \"october\"). Months that are not compatible with ``dayOfMonth`` are ignored, for example ``{\"dayOfMonth\": [31], \"month\":[\"feb\"]}``.",
"items": {
"enum": [
"*",
1,
2,
3,
4,
5,
6,
7,
8,
9,
10,
11,
12,
"january",
"JANUARY",
"January",
"february",
"FEBRUARY",
"February",
"march",
"MARCH",
"March",
"april",
"APRIL",
"April",
"may",
"MAY",
"May",
"june",
"JUNE",
"June",
"july",
"JULY",
"July",
"august",
"AUGUST",
"August",
"september",
"SEPTEMBER",
"September",
"october",
"OCTOBER",
"October",
"november",
"NOVEMBER",
"November",
"december",
"DECEMBER",
"December",
"jan",
"JAN",
"Jan",
"feb",
"FEB",
"Feb",
"mar",
"MAR",
"Mar",
"apr",
"APR",
"Apr",
"jun",
"JUN",
"Jun",
"jul",
"JUL",
"Jul",
"aug",
"AUG",
"Aug",
"sep",
"SEP",
"Sep",
"oct",
"OCT",
"Oct",
"nov",
"NOV",
"Nov",
"dec",
"DEC",
"Dec"
],
"type": [
"number",
"string"
]
},
"maxItems": 12,
"type": "array"
}
},
"required": [
"dayOfMonth",
"dayOfWeek",
"hour",
"minute",
"month"
],
"type": "object"
},
"scheduledJobId": {
"description": "ID of this scheduled job.",
"type": "string"
},
"status": {
"description": "Object containing status information about the scheduled job.",
"properties": {
"lastFailedRun": {
"description": "Date and time of the last failed run.",
"format": "date-time",
"type": [
"string",
"null"
]
},
"lastSuccessfulRun": {
"description": "Date and time of the last successful run.",
"format": "date-time",
"type": [
"string",
"null"
]
},
"nextRunTime": {
"description": "Date and time of the next run.",
"format": "date-time",
"type": [
"string",
"null"
]
},
"queuePosition": {
"description": "Position of the job in the queue Job. The value will show 0 if the job is about to run, otherwise, the number will be greater than 0 if currently queued, or None if the job is not currently running.",
"minimum": 0,
"type": [
"integer",
"null"
]
},
"running": {
"description": "`true` or `false` depending on whether the job is currently running.",
"type": "boolean"
}
},
"required": [
"running"
],
"type": "object"
},
"typeId": {
"description": "Job type of the scheduled job",
"type": "string"
},
"updatedAt": {
"description": "Time of last modification",
"format": "date-time",
"type": [
"string",
"null"
]
}
},
"required": [
"enabled",
"id",
"schedule",
"scheduledJobId",
"status",
"typeId"
],
"type": "object"
}
Properties
Name |
Type |
Required |
Restrictions |
Description |
createdBy |
string,null |
false |
|
User name of the creator |
deploymentId |
string,null |
false |
|
ID of the deployment this scheduled job is created from. |
enabled |
boolean |
true |
|
True if the job is enabled and false if the job is disabled. |
id |
string |
true |
|
ID of scheduled prediction job |
name |
string,null |
false |
|
Name of the scheduled job. |
schedule |
Schedule |
true |
|
The scheduling information defining how often and when to execute this job to the Job Scheduling service. Optional if enabled = False. |
scheduledJobId |
string |
true |
|
ID of this scheduled job. |
status |
ScheduledJobStatus |
true |
|
Object containing status information about the scheduled job. |
typeId |
string |
true |
|
Job type of the scheduled job |
updatedAt |
string,null(date-time) |
false |
|
Time of last modification |
ScheduledJobStatus
{
"description": "Object containing status information about the scheduled job.",
"properties": {
"lastFailedRun": {
"description": "Date and time of the last failed run.",
"format": "date-time",
"type": [
"string",
"null"
]
},
"lastSuccessfulRun": {
"description": "Date and time of the last successful run.",
"format": "date-time",
"type": [
"string",
"null"
]
},
"nextRunTime": {
"description": "Date and time of the next run.",
"format": "date-time",
"type": [
"string",
"null"
]
},
"queuePosition": {
"description": "Position of the job in the queue Job. The value will show 0 if the job is about to run, otherwise, the number will be greater than 0 if currently queued, or None if the job is not currently running.",
"minimum": 0,
"type": [
"integer",
"null"
]
},
"running": {
"description": "`true` or `false` depending on whether the job is currently running.",
"type": "boolean"
}
},
"required": [
"running"
],
"type": "object"
}
Object containing status information about the scheduled job.
Properties
Name |
Type |
Required |
Restrictions |
Description |
lastFailedRun |
string,null(date-time) |
false |
|
Date and time of the last failed run. |
lastSuccessfulRun |
string,null(date-time) |
false |
|
Date and time of the last successful run. |
nextRunTime |
string,null(date-time) |
false |
|
Date and time of the next run. |
queuePosition |
integer,null |
false |
minimum: 0
|
Position of the job in the queue Job. The value will show 0 if the job is about to run, otherwise, the number will be greater than 0 if currently queued, or None if the job is not currently running. |
running |
boolean |
true |
|
true or false depending on whether the job is currently running. |
ScheduledJobsListResponse
{
"properties": {
"count": {
"description": "Number of items returned on this page.",
"type": "integer"
},
"data": {
"description": "List of scheduled jobs",
"items": {
"properties": {
"createdBy": {
"description": "User name of the creator",
"type": [
"string",
"null"
]
},
"deploymentId": {
"description": "ID of the deployment this scheduled job is created from.",
"type": [
"string",
"null"
]
},
"enabled": {
"description": "True if the job is enabled and false if the job is disabled.",
"type": "boolean"
},
"id": {
"description": "ID of scheduled prediction job",
"type": "string"
},
"name": {
"description": "Name of the scheduled job.",
"type": [
"string",
"null"
]
},
"schedule": {
"description": "The scheduling information defining how often and when to execute this job to the Job Scheduling service. Optional if enabled = False.",
"properties": {
"dayOfMonth": {
"description": "The date(s) of the month that the job will run. Allowed values are either ``[1 ... 31]`` or ``[\"*\"]`` for all days of the month. This field is additive with ``dayOfWeek``, meaning the job will run both on the date(s) defined in this field and the day specified by ``dayOfWeek`` (for example, dates 1st, 2nd, 3rd, plus every Tuesday). If ``dayOfMonth`` is set to ``[\"*\"]`` and ``dayOfWeek`` is defined, the scheduler will trigger on every day of the month that matches ``dayOfWeek`` (for example, Tuesday the 2nd, 9th, 16th, 23rd, 30th). Invalid dates such as February 31st are ignored.",
"items": {
"enum": [
"*",
1,
2,
3,
4,
5,
6,
7,
8,
9,
10,
11,
12,
13,
14,
15,
16,
17,
18,
19,
20,
21,
22,
23,
24,
25,
26,
27,
28,
29,
30,
31
],
"type": [
"number",
"string"
]
},
"maxItems": 31,
"type": "array"
},
"dayOfWeek": {
"description": "The day(s) of the week that the job will run. Allowed values are ``[0 .. 6]``, where (Sunday=0), or ``[\"*\"]``, for all days of the week. Strings, either 3-letter abbreviations or the full name of the day, can be used interchangeably (e.g., \"sunday\", \"Sunday\", \"sun\", or \"Sun\", all map to ``[0]``. This field is additive with ``dayOfMonth``, meaning the job will run both on the date specified by ``dayOfMonth`` and the day defined in this field.",
"items": {
"enum": [
"*",
0,
1,
2,
3,
4,
5,
6,
"sunday",
"SUNDAY",
"Sunday",
"monday",
"MONDAY",
"Monday",
"tuesday",
"TUESDAY",
"Tuesday",
"wednesday",
"WEDNESDAY",
"Wednesday",
"thursday",
"THURSDAY",
"Thursday",
"friday",
"FRIDAY",
"Friday",
"saturday",
"SATURDAY",
"Saturday",
"sun",
"SUN",
"Sun",
"mon",
"MON",
"Mon",
"tue",
"TUE",
"Tue",
"wed",
"WED",
"Wed",
"thu",
"THU",
"Thu",
"fri",
"FRI",
"Fri",
"sat",
"SAT",
"Sat"
],
"type": [
"number",
"string"
]
},
"maxItems": 7,
"type": "array"
},
"hour": {
"description": "The hour(s) of the day that the job will run. Allowed values are either ``[\"*\"]`` meaning every hour of the day or ``[0 ... 23]``.",
"items": {
"enum": [
"*",
0,
1,
2,
3,
4,
5,
6,
7,
8,
9,
10,
11,
12,
13,
14,
15,
16,
17,
18,
19,
20,
21,
22,
23
],
"type": [
"number",
"string"
]
},
"maxItems": 24,
"type": "array"
},
"minute": {
"description": "The minute(s) of the day that the job will run. Allowed values are either ``[\"*\"]`` meaning every minute of the day or``[0 ... 59]``.",
"items": {
"enum": [
"*",
0,
1,
2,
3,
4,
5,
6,
7,
8,
9,
10,
11,
12,
13,
14,
15,
16,
17,
18,
19,
20,
21,
22,
23,
24,
25,
26,
27,
28,
29,
30,
31,
32,
33,
34,
35,
36,
37,
38,
39,
40,
41,
42,
43,
44,
45,
46,
47,
48,
49,
50,
51,
52,
53,
54,
55,
56,
57,
58,
59
],
"type": [
"number",
"string"
]
},
"maxItems": 60,
"type": "array"
},
"month": {
"description": "The month(s) of the year that the job will run. Allowed values are either ``[1 ... 12]`` or ``[\"*\"]`` for all months of the year. Strings, either 3-letter abbreviations or the full name of the month, can be used interchangeably (e.g., \"jan\" or \"october\"). Months that are not compatible with ``dayOfMonth`` are ignored, for example ``{\"dayOfMonth\": [31], \"month\":[\"feb\"]}``.",
"items": {
"enum": [
"*",
1,
2,
3,
4,
5,
6,
7,
8,
9,
10,
11,
12,
"january",
"JANUARY",
"January",
"february",
"FEBRUARY",
"February",
"march",
"MARCH",
"March",
"april",
"APRIL",
"April",
"may",
"MAY",
"May",
"june",
"JUNE",
"June",
"july",
"JULY",
"July",
"august",
"AUGUST",
"August",
"september",
"SEPTEMBER",
"September",
"october",
"OCTOBER",
"October",
"november",
"NOVEMBER",
"November",
"december",
"DECEMBER",
"December",
"jan",
"JAN",
"Jan",
"feb",
"FEB",
"Feb",
"mar",
"MAR",
"Mar",
"apr",
"APR",
"Apr",
"jun",
"JUN",
"Jun",
"jul",
"JUL",
"Jul",
"aug",
"AUG",
"Aug",
"sep",
"SEP",
"Sep",
"oct",
"OCT",
"Oct",
"nov",
"NOV",
"Nov",
"dec",
"DEC",
"Dec"
],
"type": [
"number",
"string"
]
},
"maxItems": 12,
"type": "array"
}
},
"required": [
"dayOfMonth",
"dayOfWeek",
"hour",
"minute",
"month"
],
"type": "object"
},
"scheduledJobId": {
"description": "ID of this scheduled job.",
"type": "string"
},
"status": {
"description": "Object containing status information about the scheduled job.",
"properties": {
"lastFailedRun": {
"description": "Date and time of the last failed run.",
"format": "date-time",
"type": [
"string",
"null"
]
},
"lastSuccessfulRun": {
"description": "Date and time of the last successful run.",
"format": "date-time",
"type": [
"string",
"null"
]
},
"nextRunTime": {
"description": "Date and time of the next run.",
"format": "date-time",
"type": [
"string",
"null"
]
},
"queuePosition": {
"description": "Position of the job in the queue Job. The value will show 0 if the job is about to run, otherwise, the number will be greater than 0 if currently queued, or None if the job is not currently running.",
"minimum": 0,
"type": [
"integer",
"null"
]
},
"running": {
"description": "`true` or `false` depending on whether the job is currently running.",
"type": "boolean"
}
},
"required": [
"running"
],
"type": "object"
},
"typeId": {
"description": "Job type of the scheduled job",
"type": "string"
},
"updatedAt": {
"description": "Time of last modification",
"format": "date-time",
"type": [
"string",
"null"
]
}
},
"required": [
"enabled",
"id",
"schedule",
"scheduledJobId",
"status",
"typeId"
],
"type": "object"
},
"maxItems": 100,
"type": "array"
},
"next": {
"description": "URL pointing to the next page (if null, there is no next page).",
"format": "uri",
"type": [
"string",
"null"
]
},
"previous": {
"description": "URL pointing to the previous page (if null, there is no previous page).",
"format": "uri",
"type": [
"string",
"null"
]
},
"totalCount": {
"description": "The total number of items across all pages.",
"type": "integer"
},
"updatedAt": {
"description": "Time of last modification",
"format": "date-time",
"type": "string"
},
"updatedBy": {
"description": "User ID of last modifier",
"type": "string"
}
},
"required": [
"data",
"next",
"previous",
"totalCount"
],
"type": "object"
}
Properties
Name |
Type |
Required |
Restrictions |
Description |
count |
integer |
false |
|
Number of items returned on this page. |
data |
[ScheduledJobResponse] |
true |
maxItems: 100
|
List of scheduled jobs |
next |
string,null(uri) |
true |
|
URL pointing to the next page (if null, there is no next page). |
previous |
string,null(uri) |
true |
|
URL pointing to the previous page (if null, there is no previous page). |
totalCount |
integer |
true |
|
The total number of items across all pages. |
updatedAt |
string(date-time) |
false |
|
Time of last modification |
updatedBy |
string |
false |
|
User ID of last modifier |
ServerSideEncryption
{
"description": "Configure Server-Side Encryption for S3 output",
"properties": {
"algorithm": {
"description": "The server-side encryption algorithm used when storing this object in Amazon S3 (for example, AES256, aws:kms).",
"type": "string"
},
"customerAlgorithm": {
"description": "Specifies the algorithm to use to when encrypting the object (for example, AES256).",
"type": "string"
},
"customerKey": {
"description": "Specifies the customer-provided encryption key for Amazon S3 to use in encrypting data. This value is used to store the object and then it is discarded; Amazon S3 does not store the encryption key. The key must be appropriate for use with the algorithm specified in customerAlgorithm. The key must be sent as an base64 encoded string.",
"type": "string"
},
"kmsEncryptionContext": {
"description": "Specifies the Amazon Web Services KMS Encryption Context to use for object encryption. The value of this header is a base64-encoded UTF-8 string holding JSON with the encryption context key-value pairs.",
"type": "string"
},
"kmsKeyId": {
"description": "Specifies the ID of the symmetric customer managed key to use for object encryption.",
"type": "string"
}
},
"type": "object"
}
Configure Server-Side Encryption for S3 output
Properties
Name |
Type |
Required |
Restrictions |
Description |
algorithm |
string |
false |
|
The server-side encryption algorithm used when storing this object in Amazon S3 (for example, AES256, aws:kms). |
customerAlgorithm |
string |
false |
|
Specifies the algorithm to use to when encrypting the object (for example, AES256). |
customerKey |
string |
false |
|
Specifies the customer-provided encryption key for Amazon S3 to use in encrypting data. This value is used to store the object and then it is discarded; Amazon S3 does not store the encryption key. The key must be appropriate for use with the algorithm specified in customerAlgorithm. The key must be sent as an base64 encoded string. |
kmsEncryptionContext |
string |
false |
|
Specifies the Amazon Web Services KMS Encryption Context to use for object encryption. The value of this header is a base64-encoded UTF-8 string holding JSON with the encryption context key-value pairs. |
kmsKeyId |
string |
false |
|
Specifies the ID of the symmetric customer managed key to use for object encryption. |
ShapWarning
{
"description": "A training prediction job",
"properties": {
"partitionName": {
"description": "The partition used for the prediction record.",
"type": "string"
},
"value": {
"description": "The warnings related to this partition",
"properties": {
"maxNormalizedMismatch": {
"description": "The maximal relative normalized mismatch value",
"type": "number"
},
"mismatchRowCount": {
"description": "The count of rows for which additivity check failed",
"type": "integer"
}
},
"required": [
"maxNormalizedMismatch",
"mismatchRowCount"
],
"type": "object"
}
},
"required": [
"partitionName",
"value"
],
"type": "object"
}
A training prediction job
Properties
Name |
Type |
Required |
Restrictions |
Description |
partitionName |
string |
true |
|
The partition used for the prediction record. |
value |
ShapWarningItems |
true |
|
The warnings related to this partition |
ShapWarningItems
{
"description": "The warnings related to this partition",
"properties": {
"maxNormalizedMismatch": {
"description": "The maximal relative normalized mismatch value",
"type": "number"
},
"mismatchRowCount": {
"description": "The count of rows for which additivity check failed",
"type": "integer"
}
},
"required": [
"maxNormalizedMismatch",
"mismatchRowCount"
],
"type": "object"
}
The warnings related to this partition
Properties
Name |
Type |
Required |
Restrictions |
Description |
maxNormalizedMismatch |
number |
true |
|
The maximal relative normalized mismatch value |
mismatchRowCount |
integer |
true |
|
The count of rows for which additivity check failed |
ShapWarningValues
{
"description": "Mismatch information",
"properties": {
"maxNormalizedMismatch": {
"description": "The maximal relative normalized mismatch value.",
"type": "number"
},
"mismatchRowCount": {
"description": "The count of rows for which additivity check failed.",
"type": "integer"
}
},
"required": [
"maxNormalizedMismatch",
"mismatchRowCount"
],
"type": "object"
}
Mismatch information
Properties
Name |
Type |
Required |
Restrictions |
Description |
maxNormalizedMismatch |
number |
true |
|
The maximal relative normalized mismatch value. |
mismatchRowCount |
integer |
true |
|
The count of rows for which additivity check failed. |
ShapWarnings
{
"description": "Will be present if `explanationAlgorithm` was set to `shap` and there were additivity failures during SHAP values calculation.",
"properties": {
"maxNormalizedMismatch": {
"description": "The maximal relative normalized mismatch value.",
"type": "number",
"x-versionadded": "v2.21"
},
"mismatchRowCount": {
"description": "The count of rows for which additivity check failed.",
"type": "integer",
"x-versionadded": "v2.21"
}
},
"required": [
"maxNormalizedMismatch",
"mismatchRowCount"
],
"type": "object"
}
Will be present if explanationAlgorithm
was set to shap
and there were additivity failures during SHAP values calculation.
Properties
Name |
Type |
Required |
Restrictions |
Description |
maxNormalizedMismatch |
number |
true |
|
The maximal relative normalized mismatch value. |
mismatchRowCount |
integer |
true |
|
The count of rows for which additivity check failed. |
SnowflakeDataStreamer
{
"description": "Stream CSV data chunks from Snowflake",
"properties": {
"catalog": {
"description": "The name of the specified database catalog to read input data from.",
"type": "string",
"x-versionadded": "v2.28"
},
"cloudStorageCredentialId": {
"description": "Either the populated value of the field or [redacted] due to permission settings",
"oneOf": [
{
"description": "The ID of the credential holding information about a user with read access to the cloud storage.",
"type": [
"string",
"null"
]
},
{
"enum": [
"[redacted]"
],
"type": "string"
}
]
},
"cloudStorageType": {
"default": "s3",
"description": "Type name for cloud storage",
"enum": [
"azure",
"gcp",
"s3"
],
"type": "string"
},
"credentialId": {
"description": "Either the populated value of the field or [redacted] due to permission settings",
"oneOf": [
{
"description": "The ID of the credential holding information about a user with read access to the Snowflake data source.",
"type": [
"string",
"null"
]
},
{
"enum": [
"[redacted]"
],
"type": "string"
}
]
},
"dataStoreId": {
"description": "Either the populated value of the field or [redacted] due to permission settings",
"oneOf": [
{
"description": "ID of the data store to connect to",
"type": "string"
},
{
"enum": [
"[redacted]"
],
"type": "string"
}
]
},
"externalStage": {
"description": "External storage",
"type": "string"
},
"query": {
"description": "A self-supplied SELECT statement of the dataset you wish to score. Helpful for supplying a more fine-grained selection of data not achievable through specification of \"table\" and/or \"schema\" parameters exclusively.If this job is executed with a job definition, then template variables are available which will be substituted for timestamps: {{ current_run_timestamp }}, {{ last_completed_run_time }}, {{ last_scheduled_run_time }}, {{ next_scheduled_run_time }}, {{ current_run_time }}",
"type": "string"
},
"schema": {
"description": "The name of the specified database schema to read input data from.",
"type": "string"
},
"table": {
"description": "The name of the specified database table to read input data from.",
"type": "string"
},
"type": {
"description": "Type name for this intake type",
"enum": [
"snowflake"
],
"type": "string"
}
},
"required": [
"dataStoreId",
"externalStage",
"type"
],
"type": "object"
}
Stream CSV data chunks from Snowflake
Properties
Name |
Type |
Required |
Restrictions |
Description |
catalog |
string |
false |
|
The name of the specified database catalog to read input data from. |
cloudStorageCredentialId |
any |
false |
|
Either the populated value of the field or [redacted] due to permission settings |
oneOf
Name |
Type |
Required |
Restrictions |
Description |
» anonymous |
string,null |
false |
|
The ID of the credential holding information about a user with read access to the cloud storage. |
xor
Name |
Type |
Required |
Restrictions |
Description |
» anonymous |
string |
false |
|
none |
continued
Name |
Type |
Required |
Restrictions |
Description |
cloudStorageType |
string |
false |
|
Type name for cloud storage |
credentialId |
any |
false |
|
Either the populated value of the field or [redacted] due to permission settings |
oneOf
Name |
Type |
Required |
Restrictions |
Description |
» anonymous |
string,null |
false |
|
The ID of the credential holding information about a user with read access to the Snowflake data source. |
xor
Name |
Type |
Required |
Restrictions |
Description |
» anonymous |
string |
false |
|
none |
continued
Name |
Type |
Required |
Restrictions |
Description |
dataStoreId |
any |
true |
|
Either the populated value of the field or [redacted] due to permission settings |
oneOf
Name |
Type |
Required |
Restrictions |
Description |
» anonymous |
string |
false |
|
ID of the data store to connect to |
xor
Name |
Type |
Required |
Restrictions |
Description |
» anonymous |
string |
false |
|
none |
continued
Name |
Type |
Required |
Restrictions |
Description |
externalStage |
string |
true |
|
External storage |
query |
string |
false |
|
A self-supplied SELECT statement of the dataset you wish to score. Helpful for supplying a more fine-grained selection of data not achievable through specification of "table" and/or "schema" parameters exclusively.If this job is executed with a job definition, then template variables are available which will be substituted for timestamps: {{ current_run_timestamp }}, {{ last_completed_run_time }}, {{ last_scheduled_run_time }}, {{ next_scheduled_run_time }}, {{ current_run_time }} |
schema |
string |
false |
|
The name of the specified database schema to read input data from. |
table |
string |
false |
|
The name of the specified database table to read input data from. |
type |
string |
true |
|
Type name for this intake type |
Enumerated Values
Property |
Value |
anonymous |
[redacted] |
cloudStorageType |
[azure , gcp , s3 ] |
anonymous |
[redacted] |
anonymous |
[redacted] |
type |
snowflake |
SnowflakeIntake
{
"description": "Stream CSV data chunks from Snowflake",
"properties": {
"catalog": {
"description": "The name of the specified database catalog to read input data from.",
"type": "string",
"x-versionadded": "v2.28"
},
"cloudStorageCredentialId": {
"description": "The ID of the credential holding information about a user with read access to the cloud storage.",
"type": [
"string",
"null"
]
},
"cloudStorageType": {
"default": "s3",
"description": "Type name for cloud storage",
"enum": [
"azure",
"gcp",
"s3"
],
"type": "string"
},
"credentialId": {
"description": "The ID of the credential holding information about a user with read access to the Snowflake data source.",
"type": [
"string",
"null"
]
},
"dataStoreId": {
"description": "ID of the data store to connect to",
"type": "string"
},
"externalStage": {
"description": "External storage",
"type": "string"
},
"query": {
"description": "A self-supplied SELECT statement of the dataset you wish to score. Helpful for supplying a more fine-grained selection of data not achievable through specification of \"table\" and/or \"schema\" parameters exclusively.If this job is executed with a job definition, then template variables are available which will be substituted for timestamps: {{ current_run_timestamp }}, {{ last_completed_run_time }}, {{ last_scheduled_run_time }}, {{ next_scheduled_run_time }}, {{ current_run_time }}",
"type": "string"
},
"schema": {
"description": "The name of the specified database schema to read input data from.",
"type": "string"
},
"table": {
"description": "The name of the specified database table to read input data from.",
"type": "string"
},
"type": {
"description": "Type name for this intake type",
"enum": [
"snowflake"
],
"type": "string"
}
},
"required": [
"dataStoreId",
"externalStage",
"type"
],
"type": "object"
}
Stream CSV data chunks from Snowflake
Properties
Name |
Type |
Required |
Restrictions |
Description |
catalog |
string |
false |
|
The name of the specified database catalog to read input data from. |
cloudStorageCredentialId |
string,null |
false |
|
The ID of the credential holding information about a user with read access to the cloud storage. |
cloudStorageType |
string |
false |
|
Type name for cloud storage |
credentialId |
string,null |
false |
|
The ID of the credential holding information about a user with read access to the Snowflake data source. |
dataStoreId |
string |
true |
|
ID of the data store to connect to |
externalStage |
string |
true |
|
External storage |
query |
string |
false |
|
A self-supplied SELECT statement of the dataset you wish to score. Helpful for supplying a more fine-grained selection of data not achievable through specification of "table" and/or "schema" parameters exclusively.If this job is executed with a job definition, then template variables are available which will be substituted for timestamps: {{ current_run_timestamp }}, {{ last_completed_run_time }}, {{ last_scheduled_run_time }}, {{ next_scheduled_run_time }}, {{ current_run_time }} |
schema |
string |
false |
|
The name of the specified database schema to read input data from. |
table |
string |
false |
|
The name of the specified database table to read input data from. |
type |
string |
true |
|
Type name for this intake type |
Enumerated Values
Property |
Value |
cloudStorageType |
[azure , gcp , s3 ] |
type |
snowflake |
SnowflakeKeyPairCredentials
{
"properties": {
"configId": {
"description": "The ID of the saved shared credentials. If specified, cannot include user, privateKeyStr or passphrase.",
"type": "string"
},
"credentialType": {
"description": "The type of these credentials, 'snowflake_key_pair_user_account' here.",
"enum": [
"snowflake_key_pair_user_account"
],
"type": "string"
},
"passphrase": {
"description": "Optional passphrase to decrypt private key. Cannot include this parameter if configId is specified.",
"type": "string"
},
"privateKeyStr": {
"description": "Private key for key pair authentication. Required if configId is not specified. Cannot include this parameter if configId is specified.",
"type": "string"
},
"user": {
"description": "Username for this credential. Required if configId is not specified. Cannot include this parameter if configId is specified.",
"type": "string"
}
},
"required": [
"credentialType"
],
"type": "object"
}
Properties
Name |
Type |
Required |
Restrictions |
Description |
configId |
string |
false |
|
The ID of the saved shared credentials. If specified, cannot include user, privateKeyStr or passphrase. |
credentialType |
string |
true |
|
The type of these credentials, 'snowflake_key_pair_user_account' here. |
passphrase |
string |
false |
|
Optional passphrase to decrypt private key. Cannot include this parameter if configId is specified. |
privateKeyStr |
string |
false |
|
Private key for key pair authentication. Required if configId is not specified. Cannot include this parameter if configId is specified. |
user |
string |
false |
|
Username for this credential. Required if configId is not specified. Cannot include this parameter if configId is specified. |
Enumerated Values
Property |
Value |
credentialType |
snowflake_key_pair_user_account |
SnowflakeOutput
{
"description": "Save CSV data chunks to Snowflake in bulk",
"properties": {
"catalog": {
"description": "The name of the specified database catalog to write output data to.",
"type": "string",
"x-versionadded": "v2.28"
},
"cloudStorageCredentialId": {
"description": "The ID of the credential holding information about a user with write access to the cloud storage.",
"type": [
"string",
"null"
]
},
"cloudStorageType": {
"default": "s3",
"description": "Type name for cloud storage",
"enum": [
"azure",
"gcp",
"s3"
],
"type": "string"
},
"createTableIfNotExists": {
"default": false,
"description": "Attempt to create the table first if no existing one is detected, before writing data with the strategy defined in the `statementType` parameter.",
"type": "boolean",
"x-versionadded": "v2.25"
},
"credentialId": {
"description": "The ID of the credential holding information about a user with write access to the Snowflake data source.",
"type": [
"string",
"null"
]
},
"dataStoreId": {
"description": "ID of the data store to connect to",
"type": "string"
},
"externalStage": {
"description": "External storage",
"type": "string"
},
"schema": {
"description": "The name of the specified database schema to write results to.",
"type": "string"
},
"statementType": {
"description": "The statement type to use when writing the results.",
"enum": [
"insert",
"create_table",
"createTable"
],
"type": "string"
},
"table": {
"description": "The name of the specified database table to write results to.",
"type": "string"
},
"type": {
"description": "Type name for this output type",
"enum": [
"snowflake"
],
"type": "string"
}
},
"required": [
"dataStoreId",
"externalStage",
"statementType",
"table",
"type"
],
"type": "object"
}
Save CSV data chunks to Snowflake in bulk
Properties
Name |
Type |
Required |
Restrictions |
Description |
catalog |
string |
false |
|
The name of the specified database catalog to write output data to. |
cloudStorageCredentialId |
string,null |
false |
|
The ID of the credential holding information about a user with write access to the cloud storage. |
cloudStorageType |
string |
false |
|
Type name for cloud storage |
createTableIfNotExists |
boolean |
false |
|
Attempt to create the table first if no existing one is detected, before writing data with the strategy defined in the statementType parameter. |
credentialId |
string,null |
false |
|
The ID of the credential holding information about a user with write access to the Snowflake data source. |
dataStoreId |
string |
true |
|
ID of the data store to connect to |
externalStage |
string |
true |
|
External storage |
schema |
string |
false |
|
The name of the specified database schema to write results to. |
statementType |
string |
true |
|
The statement type to use when writing the results. |
table |
string |
true |
|
The name of the specified database table to write results to. |
type |
string |
true |
|
Type name for this output type |
Enumerated Values
Property |
Value |
cloudStorageType |
[azure , gcp , s3 ] |
statementType |
[insert , create_table , createTable ] |
type |
snowflake |
SnowflakeOutputAdaptor
{
"description": "Save CSV data chunks to Snowflake in bulk",
"properties": {
"catalog": {
"description": "The name of the specified database catalog to write output data to.",
"type": "string",
"x-versionadded": "v2.28"
},
"cloudStorageCredentialId": {
"description": "Either the populated value of the field or [redacted] due to permission settings",
"oneOf": [
{
"description": "The ID of the credential holding information about a user with write access to the cloud storage.",
"type": [
"string",
"null"
]
},
{
"enum": [
"[redacted]"
],
"type": "string"
}
]
},
"cloudStorageType": {
"default": "s3",
"description": "Type name for cloud storage",
"enum": [
"azure",
"gcp",
"s3"
],
"type": "string"
},
"createTableIfNotExists": {
"default": false,
"description": "Attempt to create the table first if no existing one is detected, before writing data with the strategy defined in the `statementType` parameter.",
"type": "boolean",
"x-versionadded": "v2.25"
},
"credentialId": {
"description": "Either the populated value of the field or [redacted] due to permission settings",
"oneOf": [
{
"description": "The ID of the credential holding information about a user with write access to the Snowflake data source.",
"type": [
"string",
"null"
]
},
{
"enum": [
"[redacted]"
],
"type": "string"
}
]
},
"dataStoreId": {
"description": "Either the populated value of the field or [redacted] due to permission settings",
"oneOf": [
{
"description": "ID of the data store to connect to",
"type": "string"
},
{
"enum": [
"[redacted]"
],
"type": "string"
}
]
},
"externalStage": {
"description": "External storage",
"type": "string"
},
"schema": {
"description": "The name of the specified database schema to write results to.",
"type": "string"
},
"statementType": {
"description": "The statement type to use when writing the results.",
"enum": [
"insert",
"create_table",
"createTable"
],
"type": "string"
},
"table": {
"description": "The name of the specified database table to write results to.",
"type": "string"
},
"type": {
"description": "Type name for this output type",
"enum": [
"snowflake"
],
"type": "string"
}
},
"required": [
"dataStoreId",
"externalStage",
"statementType",
"table",
"type"
],
"type": "object"
}
Save CSV data chunks to Snowflake in bulk
Properties
Name |
Type |
Required |
Restrictions |
Description |
catalog |
string |
false |
|
The name of the specified database catalog to write output data to. |
cloudStorageCredentialId |
any |
false |
|
Either the populated value of the field or [redacted] due to permission settings |
oneOf
Name |
Type |
Required |
Restrictions |
Description |
» anonymous |
string,null |
false |
|
The ID of the credential holding information about a user with write access to the cloud storage. |
xor
Name |
Type |
Required |
Restrictions |
Description |
» anonymous |
string |
false |
|
none |
continued
Name |
Type |
Required |
Restrictions |
Description |
cloudStorageType |
string |
false |
|
Type name for cloud storage |
createTableIfNotExists |
boolean |
false |
|
Attempt to create the table first if no existing one is detected, before writing data with the strategy defined in the statementType parameter. |
credentialId |
any |
false |
|
Either the populated value of the field or [redacted] due to permission settings |
oneOf
Name |
Type |
Required |
Restrictions |
Description |
» anonymous |
string,null |
false |
|
The ID of the credential holding information about a user with write access to the Snowflake data source. |
xor
Name |
Type |
Required |
Restrictions |
Description |
» anonymous |
string |
false |
|
none |
continued
Name |
Type |
Required |
Restrictions |
Description |
dataStoreId |
any |
true |
|
Either the populated value of the field or [redacted] due to permission settings |
oneOf
Name |
Type |
Required |
Restrictions |
Description |
» anonymous |
string |
false |
|
ID of the data store to connect to |
xor
Name |
Type |
Required |
Restrictions |
Description |
» anonymous |
string |
false |
|
none |
continued
Name |
Type |
Required |
Restrictions |
Description |
externalStage |
string |
true |
|
External storage |
schema |
string |
false |
|
The name of the specified database schema to write results to. |
statementType |
string |
true |
|
The statement type to use when writing the results. |
table |
string |
true |
|
The name of the specified database table to write results to. |
type |
string |
true |
|
Type name for this output type |
Enumerated Values
Property |
Value |
anonymous |
[redacted] |
cloudStorageType |
[azure , gcp , s3 ] |
anonymous |
[redacted] |
anonymous |
[redacted] |
statementType |
[insert , create_table , createTable ] |
type |
snowflake |
SynapseDataStreamer
{
"description": "Stream CSV data chunks from Azure Synapse",
"properties": {
"cloudStorageCredentialId": {
"description": "Either the populated value of the field or [redacted] due to permission settings",
"oneOf": [
{
"description": "The ID of the Azure credential holding information about a user with read access to the cloud storage.",
"type": [
"string",
"null"
]
},
{
"enum": [
"[redacted]"
],
"type": "string"
}
]
},
"credentialId": {
"description": "Either the populated value of the field or [redacted] due to permission settings",
"oneOf": [
{
"description": "The ID of the credential holding information about a user with read access to the JDBC data source.",
"type": [
"string",
"null"
]
},
{
"enum": [
"[redacted]"
],
"type": "string"
}
]
},
"dataStoreId": {
"description": "Either the populated value of the field or [redacted] due to permission settings",
"oneOf": [
{
"description": "ID of the data store to connect to",
"type": "string"
},
{
"enum": [
"[redacted]"
],
"type": "string"
}
]
},
"externalDataSource": {
"description": "External datasource name",
"type": "string"
},
"query": {
"description": "A self-supplied SELECT statement of the dataset you wish to score. Helpful for supplying a more fine-grained selection of data not achievable through specification of \"table\" and/or \"schema\" parameters exclusively.If this job is executed with a job definition, then template variables are available which will be substituted for timestamps: {{ current_run_timestamp }}, {{ last_completed_run_time }}, {{ last_scheduled_run_time }}, {{ next_scheduled_run_time }}, {{ current_run_time }}",
"type": "string"
},
"schema": {
"description": "The name of the specified database schema to read input data from.",
"type": "string"
},
"table": {
"description": "The name of the specified database table to read input data from.",
"type": "string"
},
"type": {
"description": "Type name for this intake type",
"enum": [
"synapse"
],
"type": "string"
}
},
"required": [
"dataStoreId",
"externalDataSource",
"type"
],
"type": "object"
}
Stream CSV data chunks from Azure Synapse
Properties
Name |
Type |
Required |
Restrictions |
Description |
cloudStorageCredentialId |
any |
false |
|
Either the populated value of the field or [redacted] due to permission settings |
oneOf
Name |
Type |
Required |
Restrictions |
Description |
» anonymous |
string,null |
false |
|
The ID of the Azure credential holding information about a user with read access to the cloud storage. |
xor
Name |
Type |
Required |
Restrictions |
Description |
» anonymous |
string |
false |
|
none |
continued
Name |
Type |
Required |
Restrictions |
Description |
credentialId |
any |
false |
|
Either the populated value of the field or [redacted] due to permission settings |
oneOf
Name |
Type |
Required |
Restrictions |
Description |
» anonymous |
string,null |
false |
|
The ID of the credential holding information about a user with read access to the JDBC data source. |
xor
Name |
Type |
Required |
Restrictions |
Description |
» anonymous |
string |
false |
|
none |
continued
Name |
Type |
Required |
Restrictions |
Description |
dataStoreId |
any |
true |
|
Either the populated value of the field or [redacted] due to permission settings |
oneOf
Name |
Type |
Required |
Restrictions |
Description |
» anonymous |
string |
false |
|
ID of the data store to connect to |
xor
Name |
Type |
Required |
Restrictions |
Description |
» anonymous |
string |
false |
|
none |
continued
Name |
Type |
Required |
Restrictions |
Description |
externalDataSource |
string |
true |
|
External datasource name |
query |
string |
false |
|
A self-supplied SELECT statement of the dataset you wish to score. Helpful for supplying a more fine-grained selection of data not achievable through specification of "table" and/or "schema" parameters exclusively.If this job is executed with a job definition, then template variables are available which will be substituted for timestamps: {{ current_run_timestamp }}, {{ last_completed_run_time }}, {{ last_scheduled_run_time }}, {{ next_scheduled_run_time }}, {{ current_run_time }} |
schema |
string |
false |
|
The name of the specified database schema to read input data from. |
table |
string |
false |
|
The name of the specified database table to read input data from. |
type |
string |
true |
|
Type name for this intake type |
Enumerated Values
Property |
Value |
anonymous |
[redacted] |
anonymous |
[redacted] |
anonymous |
[redacted] |
type |
synapse |
SynapseIntake
{
"description": "Stream CSV data chunks from Azure Synapse",
"properties": {
"cloudStorageCredentialId": {
"description": "The ID of the Azure credential holding information about a user with read access to the cloud storage.",
"type": [
"string",
"null"
]
},
"credentialId": {
"description": "The ID of the credential holding information about a user with read access to the JDBC data source.",
"type": [
"string",
"null"
]
},
"dataStoreId": {
"description": "ID of the data store to connect to",
"type": "string"
},
"externalDataSource": {
"description": "External datasource name",
"type": "string"
},
"query": {
"description": "A self-supplied SELECT statement of the dataset you wish to score. Helpful for supplying a more fine-grained selection of data not achievable through specification of \"table\" and/or \"schema\" parameters exclusively.If this job is executed with a job definition, then template variables are available which will be substituted for timestamps: {{ current_run_timestamp }}, {{ last_completed_run_time }}, {{ last_scheduled_run_time }}, {{ next_scheduled_run_time }}, {{ current_run_time }}",
"type": "string"
},
"schema": {
"description": "The name of the specified database schema to read input data from.",
"type": "string"
},
"table": {
"description": "The name of the specified database table to read input data from.",
"type": "string"
},
"type": {
"description": "Type name for this intake type",
"enum": [
"synapse"
],
"type": "string"
}
},
"required": [
"dataStoreId",
"externalDataSource",
"type"
],
"type": "object"
}
Stream CSV data chunks from Azure Synapse
Properties
Name |
Type |
Required |
Restrictions |
Description |
cloudStorageCredentialId |
string,null |
false |
|
The ID of the Azure credential holding information about a user with read access to the cloud storage. |
credentialId |
string,null |
false |
|
The ID of the credential holding information about a user with read access to the JDBC data source. |
dataStoreId |
string |
true |
|
ID of the data store to connect to |
externalDataSource |
string |
true |
|
External datasource name |
query |
string |
false |
|
A self-supplied SELECT statement of the dataset you wish to score. Helpful for supplying a more fine-grained selection of data not achievable through specification of "table" and/or "schema" parameters exclusively.If this job is executed with a job definition, then template variables are available which will be substituted for timestamps: {{ current_run_timestamp }}, {{ last_completed_run_time }}, {{ last_scheduled_run_time }}, {{ next_scheduled_run_time }}, {{ current_run_time }} |
schema |
string |
false |
|
The name of the specified database schema to read input data from. |
table |
string |
false |
|
The name of the specified database table to read input data from. |
type |
string |
true |
|
Type name for this intake type |
Enumerated Values
Property |
Value |
type |
synapse |
SynapseOutput
{
"description": "Save CSV data chunks to Azure Synapse in bulk",
"properties": {
"cloudStorageCredentialId": {
"description": "The ID of the credential holding information about a user with write access to the cloud storage.",
"type": [
"string",
"null"
]
},
"createTableIfNotExists": {
"default": false,
"description": "Attempt to create the table first if no existing one is detected, before writing data with the strategy defined in the `statementType` parameter.",
"type": "boolean",
"x-versionadded": "v2.25"
},
"credentialId": {
"description": "The ID of the credential holding information about a user with write access to the JDBC data source.",
"type": [
"string",
"null"
]
},
"dataStoreId": {
"description": "ID of the data store to connect to",
"type": "string"
},
"externalDataSource": {
"description": "External data source name",
"type": "string"
},
"schema": {
"description": "The name of the specified database schema to write results to.",
"type": "string"
},
"statementType": {
"description": "The statement type to use when writing the results.",
"enum": [
"insert",
"create_table",
"createTable"
],
"type": "string"
},
"table": {
"description": "The name of the specified database table to write results to.",
"type": "string"
},
"type": {
"description": "Type name for this output type",
"enum": [
"synapse"
],
"type": "string"
}
},
"required": [
"dataStoreId",
"externalDataSource",
"statementType",
"table",
"type"
],
"type": "object"
}
Save CSV data chunks to Azure Synapse in bulk
Properties
Name |
Type |
Required |
Restrictions |
Description |
cloudStorageCredentialId |
string,null |
false |
|
The ID of the credential holding information about a user with write access to the cloud storage. |
createTableIfNotExists |
boolean |
false |
|
Attempt to create the table first if no existing one is detected, before writing data with the strategy defined in the statementType parameter. |
credentialId |
string,null |
false |
|
The ID of the credential holding information about a user with write access to the JDBC data source. |
dataStoreId |
string |
true |
|
ID of the data store to connect to |
externalDataSource |
string |
true |
|
External data source name |
schema |
string |
false |
|
The name of the specified database schema to write results to. |
statementType |
string |
true |
|
The statement type to use when writing the results. |
table |
string |
true |
|
The name of the specified database table to write results to. |
type |
string |
true |
|
Type name for this output type |
Enumerated Values
Property |
Value |
statementType |
[insert , create_table , createTable ] |
type |
synapse |
SynapseOutputAdaptor
{
"description": "Save CSV data chunks to Azure Synapse in bulk",
"properties": {
"cloudStorageCredentialId": {
"description": "Either the populated value of the field or [redacted] due to permission settings",
"oneOf": [
{
"description": "The ID of the credential holding information about a user with write access to the cloud storage.",
"type": [
"string",
"null"
]
},
{
"enum": [
"[redacted]"
],
"type": "string"
}
]
},
"createTableIfNotExists": {
"default": false,
"description": "Attempt to create the table first if no existing one is detected, before writing data with the strategy defined in the `statementType` parameter.",
"type": "boolean",
"x-versionadded": "v2.25"
},
"credentialId": {
"description": "Either the populated value of the field or [redacted] due to permission settings",
"oneOf": [
{
"description": "The ID of the credential holding information about a user with write access to the JDBC data source.",
"type": [
"string",
"null"
]
},
{
"enum": [
"[redacted]"
],
"type": "string"
}
]
},
"dataStoreId": {
"description": "Either the populated value of the field or [redacted] due to permission settings",
"oneOf": [
{
"description": "ID of the data store to connect to",
"type": "string"
},
{
"enum": [
"[redacted]"
],
"type": "string"
}
]
},
"externalDataSource": {
"description": "External data source name",
"type": "string"
},
"schema": {
"description": "The name of the specified database schema to write results to.",
"type": "string"
},
"statementType": {
"description": "The statement type to use when writing the results.",
"enum": [
"insert",
"create_table",
"createTable"
],
"type": "string"
},
"table": {
"description": "The name of the specified database table to write results to.",
"type": "string"
},
"type": {
"description": "Type name for this output type",
"enum": [
"synapse"
],
"type": "string"
}
},
"required": [
"dataStoreId",
"externalDataSource",
"statementType",
"table",
"type"
],
"type": "object"
}
Save CSV data chunks to Azure Synapse in bulk
Properties
Name |
Type |
Required |
Restrictions |
Description |
cloudStorageCredentialId |
any |
false |
|
Either the populated value of the field or [redacted] due to permission settings |
oneOf
Name |
Type |
Required |
Restrictions |
Description |
» anonymous |
string,null |
false |
|
The ID of the credential holding information about a user with write access to the cloud storage. |
xor
Name |
Type |
Required |
Restrictions |
Description |
» anonymous |
string |
false |
|
none |
continued
Name |
Type |
Required |
Restrictions |
Description |
createTableIfNotExists |
boolean |
false |
|
Attempt to create the table first if no existing one is detected, before writing data with the strategy defined in the statementType parameter. |
credentialId |
any |
false |
|
Either the populated value of the field or [redacted] due to permission settings |
oneOf
Name |
Type |
Required |
Restrictions |
Description |
» anonymous |
string,null |
false |
|
The ID of the credential holding information about a user with write access to the JDBC data source. |
xor
Name |
Type |
Required |
Restrictions |
Description |
» anonymous |
string |
false |
|
none |
continued
Name |
Type |
Required |
Restrictions |
Description |
dataStoreId |
any |
true |
|
Either the populated value of the field or [redacted] due to permission settings |
oneOf
Name |
Type |
Required |
Restrictions |
Description |
» anonymous |
string |
false |
|
ID of the data store to connect to |
xor
Name |
Type |
Required |
Restrictions |
Description |
» anonymous |
string |
false |
|
none |
continued
Name |
Type |
Required |
Restrictions |
Description |
externalDataSource |
string |
true |
|
External data source name |
schema |
string |
false |
|
The name of the specified database schema to write results to. |
statementType |
string |
true |
|
The statement type to use when writing the results. |
table |
string |
true |
|
The name of the specified database table to write results to. |
type |
string |
true |
|
Type name for this output type |
Enumerated Values
Property |
Value |
anonymous |
[redacted] |
anonymous |
[redacted] |
anonymous |
[redacted] |
statementType |
[insert , create_table , createTable ] |
type |
synapse |
TrainingPredictionsListResponse
{
"properties": {
"count": {
"description": "Number of items returned on this page.",
"type": "integer"
},
"data": {
"description": "A list of training prediction jobs",
"items": {
"description": "A training prediction job",
"properties": {
"dataSubset": {
"description": "Subset of data predicted on",
"enum": [
"all",
"validationAndHoldout",
"holdout",
"allBacktests",
"validation",
"crossValidation"
],
"type": "string",
"x-enum-versionadded": [
{
"value": "validation",
"x-versionadded": "v2.21"
}
]
},
"explanationAlgorithm": {
"description": "The method used for calculating prediction explanations",
"enum": [
"shap"
],
"type": [
"string",
"null"
],
"x-versionadded": "v2.21"
},
"id": {
"description": "ID of the training prediction job",
"type": "string"
},
"maxExplanations": {
"description": "the number of top contributors that are included in prediction explanations. Defaults to null for datasets narrower than 100 columns, defaults to 100 for datasets wider than 100 columns",
"maximum": 100,
"minimum": 0,
"type": [
"integer",
"null"
]
},
"modelId": {
"description": "ID of the model",
"type": "string"
},
"shapWarnings": {
"description": "Will be present if \"explanationAlgorithm\" was set to \"shap\" and there were additivity failures during SHAP values calculation",
"items": {
"description": "A training prediction job",
"properties": {
"partitionName": {
"description": "The partition used for the prediction record.",
"type": "string"
},
"value": {
"description": "The warnings related to this partition",
"properties": {
"maxNormalizedMismatch": {
"description": "The maximal relative normalized mismatch value",
"type": "number"
},
"mismatchRowCount": {
"description": "The count of rows for which additivity check failed",
"type": "integer"
}
},
"required": [
"maxNormalizedMismatch",
"mismatchRowCount"
],
"type": "object"
}
},
"required": [
"partitionName",
"value"
],
"type": "object"
},
"type": "array",
"x-versionadded": "v2.21"
},
"url": {
"description": "The location of these predictions",
"format": "uri",
"type": "string"
}
},
"required": [
"dataSubset",
"id",
"modelId",
"url"
],
"type": "object"
},
"type": "array"
},
"next": {
"description": "URL pointing to the next page (if null, there is no next page).",
"format": "uri",
"type": [
"string",
"null"
]
},
"previous": {
"description": "URL pointing to the previous page (if null, there is no previous page).",
"format": "uri",
"type": [
"string",
"null"
]
}
},
"required": [
"data",
"next",
"previous"
],
"type": "object"
}
Properties
Name |
Type |
Required |
Restrictions |
Description |
count |
integer |
false |
|
Number of items returned on this page. |
data |
[TraningPredictions] |
true |
|
A list of training prediction jobs |
next |
string,null(uri) |
true |
|
URL pointing to the next page (if null, there is no next page). |
previous |
string,null(uri) |
true |
|
URL pointing to the previous page (if null, there is no previous page). |
TrainingPredictionsRetrieveResponse
{
"properties": {
"count": {
"description": "Number of items returned on this page.",
"type": "integer"
},
"data": {
"description": "A list of training prediction rows",
"items": {
"description": "A training prediction row",
"properties": {
"forecastDistance": {
"description": "(if time series project) The number of time units this prediction is away from the forecastPoint. The unit of time is determined by the timeUnit of the datetime partition column.",
"type": [
"integer",
"null"
]
},
"forecastPoint": {
"description": "(if time series project) The forecastPoint of the predictions. Either provided or inferred.",
"format": "date-time",
"type": [
"string",
"null"
]
},
"partitionId": {
"description": "The partition used for the prediction record",
"type": "string"
},
"prediction": {
"description": "The prediction of the model.",
"oneOf": [
{
"description": "If using a regressor model, will be the numeric value of the target.",
"type": "number"
},
{
"description": "If using a binary or muliclass classifier model, will be the predicted class.",
"type": "string"
},
{
"description": "If using a multilabel classifier model, will be a list of predicted classes.",
"items": {
"type": "string"
},
"type": "array"
}
]
},
"predictionExplanations": {
"description": "Array contains `predictionExplanation` objects. The total elements in the array are bounded by maxExplanations and feature count. It will be present only if `explanationAlgorithm` is not null (prediction explanations were requested).",
"items": {
"description": "Prediction explanation result.",
"properties": {
"feature": {
"description": "The name of the feature contributing to the prediction.",
"type": "string"
},
"featureValue": {
"description": "The value the feature took on for this row. The type corresponds to the feature (bool, int, float, str, etc.).",
"oneOf": [
{
"type": "integer"
},
{
"type": "boolean"
},
{
"type": "string"
},
{
"type": "number"
}
]
},
"label": {
"description": "Describes what output was driven by this prediction explanation. For regression projects, it is the name of the target feature. For classification projects, it is the class whose probability increasing would correspond to a positive strength of this prediction explanation. For predictions made using anomaly detection models, it is the `Anomaly Score`.",
"oneOf": [
{
"type": "string"
},
{
"type": "number"
}
]
},
"strength": {
"description": "Algorithm-specific explanation value attributed to `feature` in this row. If `explanationAlgorithm` = `shap`, this is the SHAP value.",
"type": [
"number",
"null"
]
}
},
"required": [
"feature",
"featureValue",
"label"
],
"type": "object"
},
"type": "array",
"x-versionadded": "v2.21"
},
"predictionThreshold": {
"description": "Threshold used for binary classification in predictions.",
"maximum": 1,
"minimum": 0,
"type": "number"
},
"predictionValues": {
"description": "A list of predicted values for this row.",
"items": {
"description": "Predicted values",
"properties": {
"label": {
"description": "For regression problems this will be the name of the target column, 'Anomaly score' or ignored field. For classification projects this will be the name of the class.",
"oneOf": [
{
"type": "string"
},
{
"type": "number"
}
]
},
"threshold": {
"description": "Threshold used in multilabel classification for this class.",
"maximum": 1,
"minimum": 0,
"type": "number"
},
"value": {
"description": "The predicted probability of the class identified by the label.",
"type": "number"
}
},
"required": [
"label",
"value"
],
"type": "object"
},
"type": "array"
},
"rowId": {
"description": "The row in the prediction dataset this prediction corresponds to.",
"minimum": 0,
"type": "integer"
},
"seriesId": {
"description": "The ID of the series value for a multiseries project. For time series projects that are not a multiseries this will be a NaN.",
"type": [
"string",
"null"
]
},
"shapMetadata": {
"description": "The additional information necessary to understand shap based prediction explanations. Only present if explanationAlgorithm=\"shap\" was added in compute request.",
"properties": {
"shapBaseValue": {
"description": "The model's average prediction over the training data. SHAP values are deviations from the base value.",
"type": "number"
},
"shapRemainingTotal": {
"description": "The total of SHAP values for features beyond the maxExplanations. This can be identically 0 in all rows, if maxExplanations is greater than the number of features and thus all features are returned.",
"type": "integer"
},
"warnings": {
"description": "SHAP values calculation warnings",
"items": {
"description": "The warnings related to this partition",
"properties": {
"maxNormalizedMismatch": {
"description": "The maximal relative normalized mismatch value",
"type": "number"
},
"mismatchRowCount": {
"description": "The count of rows for which additivity check failed",
"type": "integer"
}
},
"required": [
"maxNormalizedMismatch",
"mismatchRowCount"
],
"type": "object"
},
"type": "array"
}
},
"required": [
"shapBaseValue",
"shapRemainingTotal",
"warnings"
],
"type": "object"
},
"timestamp": {
"description": "(if time series project) The timestamp of this row in the prediction dataset.",
"format": "date-time",
"type": [
"string",
"null"
]
}
},
"required": [
"partitionId",
"prediction",
"rowId"
],
"type": "object"
},
"type": "array"
},
"next": {
"description": "URL pointing to the next page (if null, there is no next page).",
"format": "uri",
"type": [
"string",
"null"
]
},
"previous": {
"description": "URL pointing to the previous page (if null, there is no previous page).",
"format": "uri",
"type": [
"string",
"null"
]
}
},
"required": [
"data",
"next",
"previous"
],
"type": "object"
}
Properties
Name |
Type |
Required |
Restrictions |
Description |
count |
integer |
false |
|
Number of items returned on this page. |
data |
[TraningPredictionRow] |
true |
|
A list of training prediction rows |
next |
string,null(uri) |
true |
|
URL pointing to the next page (if null, there is no next page). |
previous |
string,null(uri) |
true |
|
URL pointing to the previous page (if null, there is no previous page). |
TraningPredictionRow
{
"description": "A training prediction row",
"properties": {
"forecastDistance": {
"description": "(if time series project) The number of time units this prediction is away from the forecastPoint. The unit of time is determined by the timeUnit of the datetime partition column.",
"type": [
"integer",
"null"
]
},
"forecastPoint": {
"description": "(if time series project) The forecastPoint of the predictions. Either provided or inferred.",
"format": "date-time",
"type": [
"string",
"null"
]
},
"partitionId": {
"description": "The partition used for the prediction record",
"type": "string"
},
"prediction": {
"description": "The prediction of the model.",
"oneOf": [
{
"description": "If using a regressor model, will be the numeric value of the target.",
"type": "number"
},
{
"description": "If using a binary or muliclass classifier model, will be the predicted class.",
"type": "string"
},
{
"description": "If using a multilabel classifier model, will be a list of predicted classes.",
"items": {
"type": "string"
},
"type": "array"
}
]
},
"predictionExplanations": {
"description": "Array contains `predictionExplanation` objects. The total elements in the array are bounded by maxExplanations and feature count. It will be present only if `explanationAlgorithm` is not null (prediction explanations were requested).",
"items": {
"description": "Prediction explanation result.",
"properties": {
"feature": {
"description": "The name of the feature contributing to the prediction.",
"type": "string"
},
"featureValue": {
"description": "The value the feature took on for this row. The type corresponds to the feature (bool, int, float, str, etc.).",
"oneOf": [
{
"type": "integer"
},
{
"type": "boolean"
},
{
"type": "string"
},
{
"type": "number"
}
]
},
"label": {
"description": "Describes what output was driven by this prediction explanation. For regression projects, it is the name of the target feature. For classification projects, it is the class whose probability increasing would correspond to a positive strength of this prediction explanation. For predictions made using anomaly detection models, it is the `Anomaly Score`.",
"oneOf": [
{
"type": "string"
},
{
"type": "number"
}
]
},
"strength": {
"description": "Algorithm-specific explanation value attributed to `feature` in this row. If `explanationAlgorithm` = `shap`, this is the SHAP value.",
"type": [
"number",
"null"
]
}
},
"required": [
"feature",
"featureValue",
"label"
],
"type": "object"
},
"type": "array",
"x-versionadded": "v2.21"
},
"predictionThreshold": {
"description": "Threshold used for binary classification in predictions.",
"maximum": 1,
"minimum": 0,
"type": "number"
},
"predictionValues": {
"description": "A list of predicted values for this row.",
"items": {
"description": "Predicted values",
"properties": {
"label": {
"description": "For regression problems this will be the name of the target column, 'Anomaly score' or ignored field. For classification projects this will be the name of the class.",
"oneOf": [
{
"type": "string"
},
{
"type": "number"
}
]
},
"threshold": {
"description": "Threshold used in multilabel classification for this class.",
"maximum": 1,
"minimum": 0,
"type": "number"
},
"value": {
"description": "The predicted probability of the class identified by the label.",
"type": "number"
}
},
"required": [
"label",
"value"
],
"type": "object"
},
"type": "array"
},
"rowId": {
"description": "The row in the prediction dataset this prediction corresponds to.",
"minimum": 0,
"type": "integer"
},
"seriesId": {
"description": "The ID of the series value for a multiseries project. For time series projects that are not a multiseries this will be a NaN.",
"type": [
"string",
"null"
]
},
"shapMetadata": {
"description": "The additional information necessary to understand shap based prediction explanations. Only present if explanationAlgorithm=\"shap\" was added in compute request.",
"properties": {
"shapBaseValue": {
"description": "The model's average prediction over the training data. SHAP values are deviations from the base value.",
"type": "number"
},
"shapRemainingTotal": {
"description": "The total of SHAP values for features beyond the maxExplanations. This can be identically 0 in all rows, if maxExplanations is greater than the number of features and thus all features are returned.",
"type": "integer"
},
"warnings": {
"description": "SHAP values calculation warnings",
"items": {
"description": "The warnings related to this partition",
"properties": {
"maxNormalizedMismatch": {
"description": "The maximal relative normalized mismatch value",
"type": "number"
},
"mismatchRowCount": {
"description": "The count of rows for which additivity check failed",
"type": "integer"
}
},
"required": [
"maxNormalizedMismatch",
"mismatchRowCount"
],
"type": "object"
},
"type": "array"
}
},
"required": [
"shapBaseValue",
"shapRemainingTotal",
"warnings"
],
"type": "object"
},
"timestamp": {
"description": "(if time series project) The timestamp of this row in the prediction dataset.",
"format": "date-time",
"type": [
"string",
"null"
]
}
},
"required": [
"partitionId",
"prediction",
"rowId"
],
"type": "object"
}
A training prediction row
Properties
Name |
Type |
Required |
Restrictions |
Description |
forecastDistance |
integer,null |
false |
|
(if time series project) The number of time units this prediction is away from the forecastPoint. The unit of time is determined by the timeUnit of the datetime partition column. |
forecastPoint |
string,null(date-time) |
false |
|
(if time series project) The forecastPoint of the predictions. Either provided or inferred. |
partitionId |
string |
true |
|
The partition used for the prediction record |
prediction |
any |
true |
|
The prediction of the model. |
oneOf
Name |
Type |
Required |
Restrictions |
Description |
» anonymous |
number |
false |
|
If using a regressor model, will be the numeric value of the target. |
xor
Name |
Type |
Required |
Restrictions |
Description |
» anonymous |
string |
false |
|
If using a binary or muliclass classifier model, will be the predicted class. |
xor
Name |
Type |
Required |
Restrictions |
Description |
» anonymous |
[string] |
false |
|
If using a multilabel classifier model, will be a list of predicted classes. |
continued
Name |
Type |
Required |
Restrictions |
Description |
predictionExplanations |
[PredictionExplanationsObject] |
false |
|
Array contains predictionExplanation objects. The total elements in the array are bounded by maxExplanations and feature count. It will be present only if explanationAlgorithm is not null (prediction explanations were requested). |
predictionThreshold |
number |
false |
maximum: 1 minimum: 0
|
Threshold used for binary classification in predictions. |
predictionValues |
[PredictionArrayObjectValues] |
false |
|
A list of predicted values for this row. |
rowId |
integer |
true |
minimum: 0
|
The row in the prediction dataset this prediction corresponds to. |
seriesId |
string,null |
false |
|
The ID of the series value for a multiseries project. For time series projects that are not a multiseries this will be a NaN. |
shapMetadata |
TraningPredictionShapMetadata |
false |
|
The additional information necessary to understand shap based prediction explanations. Only present if explanationAlgorithm="shap" was added in compute request. |
timestamp |
string,null(date-time) |
false |
|
(if time series project) The timestamp of this row in the prediction dataset. |
{
"description": "The additional information necessary to understand shap based prediction explanations. Only present if explanationAlgorithm=\"shap\" was added in compute request.",
"properties": {
"shapBaseValue": {
"description": "The model's average prediction over the training data. SHAP values are deviations from the base value.",
"type": "number"
},
"shapRemainingTotal": {
"description": "The total of SHAP values for features beyond the maxExplanations. This can be identically 0 in all rows, if maxExplanations is greater than the number of features and thus all features are returned.",
"type": "integer"
},
"warnings": {
"description": "SHAP values calculation warnings",
"items": {
"description": "The warnings related to this partition",
"properties": {
"maxNormalizedMismatch": {
"description": "The maximal relative normalized mismatch value",
"type": "number"
},
"mismatchRowCount": {
"description": "The count of rows for which additivity check failed",
"type": "integer"
}
},
"required": [
"maxNormalizedMismatch",
"mismatchRowCount"
],
"type": "object"
},
"type": "array"
}
},
"required": [
"shapBaseValue",
"shapRemainingTotal",
"warnings"
],
"type": "object"
}
The additional information necessary to understand shap based prediction explanations. Only present if explanationAlgorithm="shap" was added in compute request.
Properties
Name |
Type |
Required |
Restrictions |
Description |
shapBaseValue |
number |
true |
|
The model's average prediction over the training data. SHAP values are deviations from the base value. |
shapRemainingTotal |
integer |
true |
|
The total of SHAP values for features beyond the maxExplanations. This can be identically 0 in all rows, if maxExplanations is greater than the number of features and thus all features are returned. |
warnings |
[ShapWarningItems] |
true |
|
SHAP values calculation warnings |
TraningPredictions
{
"description": "A training prediction job",
"properties": {
"dataSubset": {
"description": "Subset of data predicted on",
"enum": [
"all",
"validationAndHoldout",
"holdout",
"allBacktests",
"validation",
"crossValidation"
],
"type": "string",
"x-enum-versionadded": [
{
"value": "validation",
"x-versionadded": "v2.21"
}
]
},
"explanationAlgorithm": {
"description": "The method used for calculating prediction explanations",
"enum": [
"shap"
],
"type": [
"string",
"null"
],
"x-versionadded": "v2.21"
},
"id": {
"description": "ID of the training prediction job",
"type": "string"
},
"maxExplanations": {
"description": "the number of top contributors that are included in prediction explanations. Defaults to null for datasets narrower than 100 columns, defaults to 100 for datasets wider than 100 columns",
"maximum": 100,
"minimum": 0,
"type": [
"integer",
"null"
]
},
"modelId": {
"description": "ID of the model",
"type": "string"
},
"shapWarnings": {
"description": "Will be present if \"explanationAlgorithm\" was set to \"shap\" and there were additivity failures during SHAP values calculation",
"items": {
"description": "A training prediction job",
"properties": {
"partitionName": {
"description": "The partition used for the prediction record.",
"type": "string"
},
"value": {
"description": "The warnings related to this partition",
"properties": {
"maxNormalizedMismatch": {
"description": "The maximal relative normalized mismatch value",
"type": "number"
},
"mismatchRowCount": {
"description": "The count of rows for which additivity check failed",
"type": "integer"
}
},
"required": [
"maxNormalizedMismatch",
"mismatchRowCount"
],
"type": "object"
}
},
"required": [
"partitionName",
"value"
],
"type": "object"
},
"type": "array",
"x-versionadded": "v2.21"
},
"url": {
"description": "The location of these predictions",
"format": "uri",
"type": "string"
}
},
"required": [
"dataSubset",
"id",
"modelId",
"url"
],
"type": "object"
}
A training prediction job
Properties
Name |
Type |
Required |
Restrictions |
Description |
dataSubset |
string |
true |
|
Subset of data predicted on |
explanationAlgorithm |
string,null |
false |
|
The method used for calculating prediction explanations |
id |
string |
true |
|
ID of the training prediction job |
maxExplanations |
integer,null |
false |
maximum: 100 minimum: 0
|
the number of top contributors that are included in prediction explanations. Defaults to null for datasets narrower than 100 columns, defaults to 100 for datasets wider than 100 columns |
modelId |
string |
true |
|
ID of the model |
shapWarnings |
[ShapWarning] |
false |
|
Will be present if "explanationAlgorithm" was set to "shap" and there were additivity failures during SHAP values calculation |
url |
string(uri) |
true |
|
The location of these predictions |
Enumerated Values
Property |
Value |
dataSubset |
[all , validationAndHoldout , holdout , allBacktests , validation , crossValidation ] |
explanationAlgorithm |
shap |