{
"type": "object",
"properties": {
"PCID": {
"type": "string",
"description": "Pink Connect ID for the authenticated connection"
},
"id": {
"type": "string",
"description": "ID of the ingest pipeline to create or update."
},
"master_timeout": {
"type": "string",
"description": "Period to wait for a connection to the master node. If no response is received before the timeout expires, the request fails and returns an error."
},
"timeout": {
"type": "string",
"description": "Period to wait for a response. If no response is received before the timeout expires, the request fails and returns an error."
},
"if_version": {
"type": "number",
"description": "Required version for optimistic concurrency control for pipeline updates"
},
"_meta": {
"description": "Optional metadata about the ingest pipeline. May have any contents. This map is not automatically generated by Elasticsearch."
},
"deprecated": {
"type": "boolean",
"description": "Marks this ingest pipeline as deprecated. When a deprecated ingest pipeline is referenced as the default or final pipeline when creating or updating a non-deprecated index template, Elasticsearch will emit a deprecation warning."
},
"description": {
"type": "string",
"description": "Description of the ingest pipeline."
},
"field_access_pattern": {
"description": "Controls how processors in this pipeline should read and write data on a document's source."
},
"on_failure": {
"type": "array",
"items": {
"type": "object",
"properties": {
"append": {
"description": "Appends one or more values to an existing array if the field already exists and it is an array. Converts a scalar to an array and appends one or more values to it if the field exists and it is a scalar. Creates an array containing the provided values if the field doesn’t exist. Accepts a single value or an array of values."
},
"attachment": {
"description": "The attachment processor lets Elasticsearch extract file attachments in common formats (such as PPT, XLS, and PDF) by using the Apache text extraction library Tika."
},
"bytes": {
"description": "Converts a human readable byte value (for example `1kb`) to its value in bytes (for example `1024`). If the field is an array of strings, all members of the array will be converted. Supported human readable units are \"b\", \"kb\", \"mb\", \"gb\", \"tb\", \"pb\" case insensitive. An error will occur if the field is not a supported format or resultant value exceeds 2^63."
},
"cef": {
"description": "Converts a CEF message into a structured format."
},
"circle": {
"description": "Converts circle definitions of shapes to regular polygons which approximate them."
},
"community_id": {
"description": "Computes the Community ID for network flow data as defined in the Community ID Specification. You can use a community ID to correlate network events related to a single flow."
},
"convert": {
"description": "Converts a field in the currently ingested document to a different type, such as converting a string to an integer. If the field value is an array, all members will be converted."
},
"csv": {
"description": "Extracts fields from CSV line out of a single text field within a document. Any empty field in CSV will be skipped."
},
"date": {
"description": "Parses dates from fields, and then uses the date or timestamp as the timestamp for the document."
},
"date_index_name": {
"description": "The purpose of this processor is to point documents to the right time based index based on a date or timestamp field in a document by using the date math index name support."
},
"dissect": {
"description": "Extracts structured fields out of a single text field by matching the text field against a delimiter-based pattern."
},
"dot_expander": {
"description": "Expands a field with dots into an object field. This processor allows fields with dots in the name to be accessible by other processors in the pipeline. Otherwise these fields can’t be accessed by any processor."
},
"drop": {
"description": "Drops the document without raising any errors. This is useful to prevent the document from getting indexed based on some condition."
},
"enrich": {
"description": "The `enrich` processor can enrich documents with data from another index."
},
"fail": {
"description": "Raises an exception. This is useful for when you expect a pipeline to fail and want to relay a specific message to the requester."
},
"fingerprint": {
"description": "Computes a hash of the document’s content. You can use this hash for content fingerprinting."
},
"foreach": {
"description": "Runs an ingest processor on each element of an array or object."
},
"ip_location": {
"description": "Currently an undocumented alias for GeoIP Processor."
},
"geo_grid": {
"description": "Converts geo-grid definitions of grid tiles or cells to regular bounding boxes or polygons which describe their shape. This is useful if there is a need to interact with the tile shapes as spatially indexable fields."
},
"geoip": {
"description": "The `geoip` processor adds information about the geographical location of an IPv4 or IPv6 address."
},
"grok": {
"description": "Extracts structured fields out of a single text field within a document. You choose which field to extract matched fields from, as well as the grok pattern you expect will match. A grok pattern is like a regular expression that supports aliased expressions that can be reused."
},
"gsub": {
"description": "Converts a string field by applying a regular expression and a replacement. If the field is an array of string, all members of the array will be converted. If any non-string values are encountered, the processor will throw an exception."
},
"html_strip": {
"description": "Removes HTML tags from the field. If the field is an array of strings, HTML tags will be removed from all members of the array."
},
"inference": {
"description": "Uses a pre-trained data frame analytics model or a model deployed for natural language processing tasks to infer against the data that is being ingested in the pipeline."
},
"join": {
"description": "Joins each element of an array into a single string using a separator character between each element. Throws an error when the field is not an array."
},
"json": {
"description": "Parses a string containing JSON data into a structured object, string, or other value."
},
"kv": {
"description": "This processor helps automatically parse messages (or specific event fields) which are of the `foo=bar` variety."
},
"lowercase": {
"description": "Converts a string to its lowercase equivalent. If the field is an array of strings, all members of the array will be converted."
},
"network_direction": {
"description": "Calculates the network direction given a source IP address, destination IP address, and a list of internal networks."
},
"pipeline": {
"description": "Executes another pipeline."
},
"redact": {
"description": "The Redact processor uses the Grok rules engine to obscure text in the input document matching the given Grok patterns. The processor can be used to obscure Personal Identifying Information (PII) by configuring it to detect known patterns such as email or IP addresses. Text that matches a Grok pattern is replaced with a configurable string such as `<EMAIL>` where an email address is matched or simply replace all matches with the text `<REDACTED>` if preferred."
},
"registered_domain": {
"description": "Extracts the registered domain (also known as the effective top-level domain or eTLD), sub-domain, and top-level domain from a fully qualified domain name (FQDN). Uses the registered domains defined in the Mozilla Public Suffix List."
},
"remove": {
"description": "Removes existing fields. If one field doesn’t exist, an exception will be thrown."
},
"rename": {
"description": "Renames an existing field. If the field doesn’t exist or the new name is already used, an exception will be thrown."
},
"reroute": {
"description": "Routes a document to another target index or data stream. When setting the `destination` option, the target is explicitly specified and the dataset and namespace options can’t be set. When the `destination` option is not set, this processor is in a data stream mode. Note that in this mode, the reroute processor can only be used on data streams that follow the data stream naming scheme."
},
"script": {
"description": "Runs an inline or stored script on incoming documents. The script runs in the `ingest` context."
},
"set": {
"description": "Adds a field with the specified value. If the field already exists, its value will be replaced with the provided one."
},
"set_security_user": {
"description": "Sets user-related details (such as `username`, `roles`, `email`, `full_name`, `metadata`, `api_key`, `realm` and `authentication_type`) from the current authenticated user to the current document by pre-processing the ingest."
},
"sort": {
"description": "Sorts the elements of an array ascending or descending. Homogeneous arrays of numbers will be sorted numerically, while arrays of strings or heterogeneous arrays of strings + numbers will be sorted lexicographically. Throws an error when the field is not an array."
},
"split": {
"description": "Splits a field into an array using a separator character. Only works on string fields."
},
"terminate": {
"description": "Terminates the current ingest pipeline, causing no further processors to be run. This will normally be executed conditionally, using the `if` option."
},
"trim": {
"description": "Trims whitespace from a field. If the field is an array of strings, all members of the array will be trimmed. This only works on leading and trailing whitespace."
},
"uppercase": {
"description": "Converts a string to its uppercase equivalent. If the field is an array of strings, all members of the array will be converted."
},
"urldecode": {
"description": "URL-decodes a string. If the field is an array of strings, all members of the array will be decoded."
},
"uri_parts": {
"description": "Parses a Uniform Resource Identifier (URI) string and extracts its components as an object. This URI object includes properties for the URI’s domain, path, fragment, port, query, scheme, user info, username, and password."
},
"user_agent": {
"description": "The `user_agent` processor extracts details from the user agent string a browser sends with its web requests. This processor adds this information by default under the `user_agent` field."
}
}
},
"description": "Processors to run immediately after a processor failure. Each processor supports a processor-level `on_failure` value. If a processor without an `on_failure` value fails, Elasticsearch uses this pipeline-level parameter as a fallback. The processors in this parameter run sequentially in the order specified. Elasticsearch will not attempt to run the pipeline's remaining processors."
},
"processors": {
"type": "array",
"items": {
"type": "object",
"properties": {
"append": {
"description": "Appends one or more values to an existing array if the field already exists and it is an array. Converts a scalar to an array and appends one or more values to it if the field exists and it is a scalar. Creates an array containing the provided values if the field doesn’t exist. Accepts a single value or an array of values."
},
"attachment": {
"description": "The attachment processor lets Elasticsearch extract file attachments in common formats (such as PPT, XLS, and PDF) by using the Apache text extraction library Tika."
},
"bytes": {
"description": "Converts a human readable byte value (for example `1kb`) to its value in bytes (for example `1024`). If the field is an array of strings, all members of the array will be converted. Supported human readable units are \"b\", \"kb\", \"mb\", \"gb\", \"tb\", \"pb\" case insensitive. An error will occur if the field is not a supported format or resultant value exceeds 2^63."
},
"cef": {
"description": "Converts a CEF message into a structured format."
},
"circle": {
"description": "Converts circle definitions of shapes to regular polygons which approximate them."
},
"community_id": {
"description": "Computes the Community ID for network flow data as defined in the Community ID Specification. You can use a community ID to correlate network events related to a single flow."
},
"convert": {
"description": "Converts a field in the currently ingested document to a different type, such as converting a string to an integer. If the field value is an array, all members will be converted."
},
"csv": {
"description": "Extracts fields from CSV line out of a single text field within a document. Any empty field in CSV will be skipped."
},
"date": {
"description": "Parses dates from fields, and then uses the date or timestamp as the timestamp for the document."
},
"date_index_name": {
"description": "The purpose of this processor is to point documents to the right time based index based on a date or timestamp field in a document by using the date math index name support."
},
"dissect": {
"description": "Extracts structured fields out of a single text field by matching the text field against a delimiter-based pattern."
},
"dot_expander": {
"description": "Expands a field with dots into an object field. This processor allows fields with dots in the name to be accessible by other processors in the pipeline. Otherwise these fields can’t be accessed by any processor."
},
"drop": {
"description": "Drops the document without raising any errors. This is useful to prevent the document from getting indexed based on some condition."
},
"enrich": {
"description": "The `enrich` processor can enrich documents with data from another index."
},
"fail": {
"description": "Raises an exception. This is useful for when you expect a pipeline to fail and want to relay a specific message to the requester."
},
"fingerprint": {
"description": "Computes a hash of the document’s content. You can use this hash for content fingerprinting."
},
"foreach": {
"description": "Runs an ingest processor on each element of an array or object."
},
"ip_location": {
"description": "Currently an undocumented alias for GeoIP Processor."
},
"geo_grid": {
"description": "Converts geo-grid definitions of grid tiles or cells to regular bounding boxes or polygons which describe their shape. This is useful if there is a need to interact with the tile shapes as spatially indexable fields."
},
"geoip": {
"description": "The `geoip` processor adds information about the geographical location of an IPv4 or IPv6 address."
},
"grok": {
"description": "Extracts structured fields out of a single text field within a document. You choose which field to extract matched fields from, as well as the grok pattern you expect will match. A grok pattern is like a regular expression that supports aliased expressions that can be reused."
},
"gsub": {
"description": "Converts a string field by applying a regular expression and a replacement. If the field is an array of string, all members of the array will be converted. If any non-string values are encountered, the processor will throw an exception."
},
"html_strip": {
"description": "Removes HTML tags from the field. If the field is an array of strings, HTML tags will be removed from all members of the array."
},
"inference": {
"description": "Uses a pre-trained data frame analytics model or a model deployed for natural language processing tasks to infer against the data that is being ingested in the pipeline."
},
"join": {
"description": "Joins each element of an array into a single string using a separator character between each element. Throws an error when the field is not an array."
},
"json": {
"description": "Parses a string containing JSON data into a structured object, string, or other value."
},
"kv": {
"description": "This processor helps automatically parse messages (or specific event fields) which are of the `foo=bar` variety."
},
"lowercase": {
"description": "Converts a string to its lowercase equivalent. If the field is an array of strings, all members of the array will be converted."
},
"network_direction": {
"description": "Calculates the network direction given a source IP address, destination IP address, and a list of internal networks."
},
"pipeline": {
"description": "Executes another pipeline."
},
"redact": {
"description": "The Redact processor uses the Grok rules engine to obscure text in the input document matching the given Grok patterns. The processor can be used to obscure Personal Identifying Information (PII) by configuring it to detect known patterns such as email or IP addresses. Text that matches a Grok pattern is replaced with a configurable string such as `<EMAIL>` where an email address is matched or simply replace all matches with the text `<REDACTED>` if preferred."
},
"registered_domain": {
"description": "Extracts the registered domain (also known as the effective top-level domain or eTLD), sub-domain, and top-level domain from a fully qualified domain name (FQDN). Uses the registered domains defined in the Mozilla Public Suffix List."
},
"remove": {
"description": "Removes existing fields. If one field doesn’t exist, an exception will be thrown."
},
"rename": {
"description": "Renames an existing field. If the field doesn’t exist or the new name is already used, an exception will be thrown."
},
"reroute": {
"description": "Routes a document to another target index or data stream. When setting the `destination` option, the target is explicitly specified and the dataset and namespace options can’t be set. When the `destination` option is not set, this processor is in a data stream mode. Note that in this mode, the reroute processor can only be used on data streams that follow the data stream naming scheme."
},
"script": {
"description": "Runs an inline or stored script on incoming documents. The script runs in the `ingest` context."
},
"set": {
"description": "Adds a field with the specified value. If the field already exists, its value will be replaced with the provided one."
},
"set_security_user": {
"description": "Sets user-related details (such as `username`, `roles`, `email`, `full_name`, `metadata`, `api_key`, `realm` and `authentication_type`) from the current authenticated user to the current document by pre-processing the ingest."
},
"sort": {
"description": "Sorts the elements of an array ascending or descending. Homogeneous arrays of numbers will be sorted numerically, while arrays of strings or heterogeneous arrays of strings + numbers will be sorted lexicographically. Throws an error when the field is not an array."
},
"split": {
"description": "Splits a field into an array using a separator character. Only works on string fields."
},
"terminate": {
"description": "Terminates the current ingest pipeline, causing no further processors to be run. This will normally be executed conditionally, using the `if` option."
},
"trim": {
"description": "Trims whitespace from a field. If the field is an array of strings, all members of the array will be trimmed. This only works on leading and trailing whitespace."
},
"uppercase": {
"description": "Converts a string to its uppercase equivalent. If the field is an array of strings, all members of the array will be converted."
},
"urldecode": {
"description": "URL-decodes a string. If the field is an array of strings, all members of the array will be decoded."
},
"uri_parts": {
"description": "Parses a Uniform Resource Identifier (URI) string and extracts its components as an object. This URI object includes properties for the URI’s domain, path, fragment, port, query, scheme, user info, username, and password."
},
"user_agent": {
"description": "The `user_agent` processor extracts details from the user agent string a browser sends with its web requests. This processor adds this information by default under the `user_agent` field."
}
}
},
"description": "Processors used to perform transformations on documents before indexing. Processors run sequentially in the order specified."
},
"version": {
"description": "Version number used by external systems to track ingest pipelines. This parameter is intended for external systems only. Elasticsearch does not use or validate pipeline version numbers."
}
},
"required": [
"PCID",
"id"
]
}