# coding=utf-8
# --------------------------------------------------------------------------
# Copyright (c) Microsoft Corporation. All rights reserved.
# Licensed under the MIT License. See License.txt in the project root for license information.
# Code generated by Microsoft (R) AutoRest Code Generator.
# Changes may cause incorrect behavior and will be lost if the code is regenerated.
# --------------------------------------------------------------------------
from enum import Enum, EnumMeta
from six import with_metaclass
class _CaseInsensitiveEnumMeta(EnumMeta):
def __getitem__(self, name):
return super().__getitem__(name.upper())
def __getattr__(cls, name):
"""Return the enum member matching `name`
We use __getattr__ instead of descriptors or inserting into the enum
class' __dict__ in order to support `name` and `value` being both
properties for enum members (which live in the class' __dict__) and
enum members themselves.
"""
try:
return cls._member_map_[name.upper()]
except KeyError:
raise AttributeError(name)
class BlobIndexerDataToExtract(with_metaclass(_CaseInsensitiveEnumMeta, str, Enum)):
"""Specifies the data to extract from Azure blob storage and tells the indexer which data to
extract from image content when "imageAction" is set to a value other than "none". This
applies to embedded image content in a .PDF or other application, or image files such as .jpg
and .png, in Azure blobs.
"""
#: Indexes just the standard blob properties and user-specified metadata.
STORAGE_METADATA = "storageMetadata"
#: Extracts metadata provided by the Azure blob storage subsystem and the content-type specific
#: metadata (for example, metadata unique to just .png files are indexed).
ALL_METADATA = "allMetadata"
#: Extracts all metadata and textual content from each blob.
CONTENT_AND_METADATA = "contentAndMetadata"
class BlobIndexerImageAction(with_metaclass(_CaseInsensitiveEnumMeta, str, Enum)):
"""Determines how to process embedded images and image files in Azure blob storage. Setting the
"imageAction" configuration to any value other than "none" requires that a skillset also be
attached to that indexer.
"""
#: Ignores embedded images or image files in the data set. This is the default.
NONE = "none"
#: Extracts text from images (for example, the word "STOP" from a traffic stop sign), and embeds
#: it into the content field. This action requires that "dataToExtract" is set to
#: "contentAndMetadata". A normalized image refers to additional processing resulting in uniform
#: image output, sized and rotated to promote consistent rendering when you include images in
#: visual search results. This information is generated for each image when you use this option.
GENERATE_NORMALIZED_IMAGES = "generateNormalizedImages"
#: Extracts text from images (for example, the word "STOP" from a traffic stop sign), and embeds
#: it into the content field, but treats PDF files differently in that each page will be rendered
#: as an image and normalized accordingly, instead of extracting embedded images. Non-PDF file
#: types will be treated the same as if "generateNormalizedImages" was set.
GENERATE_NORMALIZED_IMAGE_PER_PAGE = "generateNormalizedImagePerPage"
class BlobIndexerParsingMode(with_metaclass(_CaseInsensitiveEnumMeta, str, Enum)):
"""Represents the parsing mode for indexing from an Azure blob data source.
"""
#: Set to default for normal file processing.
DEFAULT = "default"
#: Set to text to improve indexing performance on plain text files in blob storage.
TEXT = "text"
#: Set to delimitedText when blobs are plain CSV files.
DELIMITED_TEXT = "delimitedText"
#: Set to json to extract structured content from JSON files.
JSON = "json"
#: Set to jsonArray to extract individual elements of a JSON array as separate documents in Azure
#: Cognitive Search.
JSON_ARRAY = "jsonArray"
#: Set to jsonLines to extract individual JSON entities, separated by a new line, as separate
#: documents in Azure Cognitive Search.
JSON_LINES = "jsonLines"
class BlobIndexerPDFTextRotationAlgorithm(with_metaclass(_CaseInsensitiveEnumMeta, str, Enum)):
"""Determines algorithm for text extraction from PDF files in Azure blob storage.
"""
#: Leverages normal text extraction. This is the default.
NONE = "none"
#: May produce better and more readable text extraction from PDF files that have rotated text
#: within them. Note that there may be a small performance speed impact when this parameter is
#: used. This parameter only applies to PDF files, and only to PDFs with embedded text. If the
#: rotated text appears within an embedded image in the PDF, this parameter does not apply.
DETECT_ANGLES = "detectAngles"
class CharFilterName(with_metaclass(_CaseInsensitiveEnumMeta, str, Enum)):
"""Defines the names of all character filters supported by Azure Cognitive Search.
"""
#: A character filter that attempts to strip out HTML constructs. See
#: https://lucene.apache.org/core/4_10_3/analyzers-common/org/apache/lucene/analysis/charfilter/HTMLStripCharFilter.html.
HTML_STRIP = "html_strip"
class CjkBigramTokenFilterScripts(with_metaclass(_CaseInsensitiveEnumMeta, str, Enum)):
"""Scripts that can be ignored by CjkBigramTokenFilter.
"""
#: Ignore Han script when forming bigrams of CJK terms.
HAN = "han"
#: Ignore Hiragana script when forming bigrams of CJK terms.
HIRAGANA = "hiragana"
#: Ignore Katakana script when forming bigrams of CJK terms.
KATAKANA = "katakana"
#: Ignore Hangul script when forming bigrams of CJK terms.
HANGUL = "hangul"
[docs]class CustomEntityLookupSkillLanguage(with_metaclass(_CaseInsensitiveEnumMeta, str, Enum)):
"""The language codes supported for input text by CustomEntityLookupSkill.
"""
#: Danish.
DA = "da"
#: German.
DE = "de"
#: English.
EN = "en"
#: Spanish.
ES = "es"
#: Finnish.
FI = "fi"
#: French.
FR = "fr"
#: Italian.
IT = "it"
#: Korean.
KO = "ko"
#: Portuguese.
PT = "pt"
[docs]class EdgeNGramTokenFilterSide(with_metaclass(_CaseInsensitiveEnumMeta, str, Enum)):
"""Specifies which side of the input an n-gram should be generated from.
"""
#: Specifies that the n-gram should be generated from the front of the input.
FRONT = "front"
#: Specifies that the n-gram should be generated from the back of the input.
BACK = "back"
[docs]class EntityCategory(with_metaclass(_CaseInsensitiveEnumMeta, str, Enum)):
"""A string indicating what entity categories to return.
"""
#: Entities describing a physical location.
LOCATION = "location"
#: Entities describing an organization.
ORGANIZATION = "organization"
#: Entities describing a person.
PERSON = "person"
#: Entities describing a quantity.
QUANTITY = "quantity"
#: Entities describing a date and time.
DATETIME = "datetime"
#: Entities describing a URL.
URL = "url"
#: Entities describing an email address.
EMAIL = "email"
[docs]class EntityRecognitionSkillLanguage(with_metaclass(_CaseInsensitiveEnumMeta, str, Enum)):
"""The language codes supported for input text by EntityRecognitionSkill.
"""
#: Arabic.
AR = "ar"
#: Czech.
CS = "cs"
#: Chinese-Simplified.
ZH_HANS = "zh-Hans"
#: Chinese-Traditional.
ZH_HANT = "zh-Hant"
#: Danish.
DA = "da"
#: Dutch.
NL = "nl"
#: English.
EN = "en"
#: Finnish.
FI = "fi"
#: French.
FR = "fr"
#: German.
DE = "de"
#: Greek.
EL = "el"
#: Hungarian.
HU = "hu"
#: Italian.
IT = "it"
#: Japanese.
JA = "ja"
#: Korean.
KO = "ko"
#: Norwegian (Bokmaal).
NO = "no"
#: Polish.
PL = "pl"
#: Portuguese (Portugal).
PT_PT = "pt-PT"
#: Portuguese (Brazil).
PT_BR = "pt-BR"
#: Russian.
RU = "ru"
#: Spanish.
ES = "es"
#: Swedish.
SV = "sv"
#: Turkish.
TR = "tr"
[docs]class ImageAnalysisSkillLanguage(with_metaclass(_CaseInsensitiveEnumMeta, str, Enum)):
"""The language codes supported for input by ImageAnalysisSkill.
"""
#: English.
EN = "en"
#: Spanish.
ES = "es"
#: Japanese.
JA = "ja"
#: Portuguese.
PT = "pt"
#: Chinese.
ZH = "zh"
[docs]class ImageDetail(with_metaclass(_CaseInsensitiveEnumMeta, str, Enum)):
"""A string indicating which domain-specific details to return.
"""
#: Details recognized as celebrities.
CELEBRITIES = "celebrities"
#: Details recognized as landmarks.
LANDMARKS = "landmarks"
class IndexerExecutionEnvironment(with_metaclass(_CaseInsensitiveEnumMeta, str, Enum)):
"""Specifies the environment in which the indexer should execute.
"""
#: Indicates that Azure Cognitive Search can determine where the indexer should execute. This is
#: the default environment when nothing is specified and is the recommended value.
STANDARD = "standard"
#: Indicates that the indexer should run with the environment provisioned specifically for the
#: search service. This should only be specified as the execution environment if the indexer needs
#: to access resources securely over shared private link resources.
PRIVATE = "private"
[docs]class IndexerExecutionStatus(with_metaclass(_CaseInsensitiveEnumMeta, str, Enum)):
"""Represents the status of an individual indexer execution.
"""
#: An indexer invocation has failed, but the failure may be transient. Indexer invocations will
#: continue per schedule.
TRANSIENT_FAILURE = "transientFailure"
#: Indexer execution completed successfully.
SUCCESS = "success"
#: Indexer execution is in progress.
IN_PROGRESS = "inProgress"
#: Indexer has been reset.
RESET = "reset"
[docs]class IndexerStatus(with_metaclass(_CaseInsensitiveEnumMeta, str, Enum)):
"""Represents the overall indexer status.
"""
#: Indicates that the indexer is in an unknown state.
UNKNOWN = "unknown"
#: Indicates that the indexer experienced an error that cannot be corrected without human
#: intervention.
ERROR = "error"
#: Indicates that the indexer is running normally.
RUNNING = "running"
class LexicalAnalyzerName(with_metaclass(_CaseInsensitiveEnumMeta, str, Enum)):
"""Defines the names of all text analyzers supported by Azure Cognitive Search.
"""
#: Microsoft analyzer for Arabic.
AR_MICROSOFT = "ar.microsoft"
#: Lucene analyzer for Arabic.
AR_LUCENE = "ar.lucene"
#: Lucene analyzer for Armenian.
HY_LUCENE = "hy.lucene"
#: Microsoft analyzer for Bangla.
BN_MICROSOFT = "bn.microsoft"
#: Lucene analyzer for Basque.
EU_LUCENE = "eu.lucene"
#: Microsoft analyzer for Bulgarian.
BG_MICROSOFT = "bg.microsoft"
#: Lucene analyzer for Bulgarian.
BG_LUCENE = "bg.lucene"
#: Microsoft analyzer for Catalan.
CA_MICROSOFT = "ca.microsoft"
#: Lucene analyzer for Catalan.
CA_LUCENE = "ca.lucene"
#: Microsoft analyzer for Chinese (Simplified).
ZH_HANS_MICROSOFT = "zh-Hans.microsoft"
#: Lucene analyzer for Chinese (Simplified).
ZH_HANS_LUCENE = "zh-Hans.lucene"
#: Microsoft analyzer for Chinese (Traditional).
ZH_HANT_MICROSOFT = "zh-Hant.microsoft"
#: Lucene analyzer for Chinese (Traditional).
ZH_HANT_LUCENE = "zh-Hant.lucene"
#: Microsoft analyzer for Croatian.
HR_MICROSOFT = "hr.microsoft"
#: Microsoft analyzer for Czech.
CS_MICROSOFT = "cs.microsoft"
#: Lucene analyzer for Czech.
CS_LUCENE = "cs.lucene"
#: Microsoft analyzer for Danish.
DA_MICROSOFT = "da.microsoft"
#: Lucene analyzer for Danish.
DA_LUCENE = "da.lucene"
#: Microsoft analyzer for Dutch.
NL_MICROSOFT = "nl.microsoft"
#: Lucene analyzer for Dutch.
NL_LUCENE = "nl.lucene"
#: Microsoft analyzer for English.
EN_MICROSOFT = "en.microsoft"
#: Lucene analyzer for English.
EN_LUCENE = "en.lucene"
#: Microsoft analyzer for Estonian.
ET_MICROSOFT = "et.microsoft"
#: Microsoft analyzer for Finnish.
FI_MICROSOFT = "fi.microsoft"
#: Lucene analyzer for Finnish.
FI_LUCENE = "fi.lucene"
#: Microsoft analyzer for French.
FR_MICROSOFT = "fr.microsoft"
#: Lucene analyzer for French.
FR_LUCENE = "fr.lucene"
#: Lucene analyzer for Galician.
GL_LUCENE = "gl.lucene"
#: Microsoft analyzer for German.
DE_MICROSOFT = "de.microsoft"
#: Lucene analyzer for German.
DE_LUCENE = "de.lucene"
#: Microsoft analyzer for Greek.
EL_MICROSOFT = "el.microsoft"
#: Lucene analyzer for Greek.
EL_LUCENE = "el.lucene"
#: Microsoft analyzer for Gujarati.
GU_MICROSOFT = "gu.microsoft"
#: Microsoft analyzer for Hebrew.
HE_MICROSOFT = "he.microsoft"
#: Microsoft analyzer for Hindi.
HI_MICROSOFT = "hi.microsoft"
#: Lucene analyzer for Hindi.
HI_LUCENE = "hi.lucene"
#: Microsoft analyzer for Hungarian.
HU_MICROSOFT = "hu.microsoft"
#: Lucene analyzer for Hungarian.
HU_LUCENE = "hu.lucene"
#: Microsoft analyzer for Icelandic.
IS_MICROSOFT = "is.microsoft"
#: Microsoft analyzer for Indonesian (Bahasa).
ID_MICROSOFT = "id.microsoft"
#: Lucene analyzer for Indonesian.
ID_LUCENE = "id.lucene"
#: Lucene analyzer for Irish.
GA_LUCENE = "ga.lucene"
#: Microsoft analyzer for Italian.
IT_MICROSOFT = "it.microsoft"
#: Lucene analyzer for Italian.
IT_LUCENE = "it.lucene"
#: Microsoft analyzer for Japanese.
JA_MICROSOFT = "ja.microsoft"
#: Lucene analyzer for Japanese.
JA_LUCENE = "ja.lucene"
#: Microsoft analyzer for Kannada.
KN_MICROSOFT = "kn.microsoft"
#: Microsoft analyzer for Korean.
KO_MICROSOFT = "ko.microsoft"
#: Lucene analyzer for Korean.
KO_LUCENE = "ko.lucene"
#: Microsoft analyzer for Latvian.
LV_MICROSOFT = "lv.microsoft"
#: Lucene analyzer for Latvian.
LV_LUCENE = "lv.lucene"
#: Microsoft analyzer for Lithuanian.
LT_MICROSOFT = "lt.microsoft"
#: Microsoft analyzer for Malayalam.
ML_MICROSOFT = "ml.microsoft"
#: Microsoft analyzer for Malay (Latin).
MS_MICROSOFT = "ms.microsoft"
#: Microsoft analyzer for Marathi.
MR_MICROSOFT = "mr.microsoft"
#: Microsoft analyzer for Norwegian (Bokmål).
NB_MICROSOFT = "nb.microsoft"
#: Lucene analyzer for Norwegian.
NO_LUCENE = "no.lucene"
#: Lucene analyzer for Persian.
FA_LUCENE = "fa.lucene"
#: Microsoft analyzer for Polish.
PL_MICROSOFT = "pl.microsoft"
#: Lucene analyzer for Polish.
PL_LUCENE = "pl.lucene"
#: Microsoft analyzer for Portuguese (Brazil).
PT_BR_MICROSOFT = "pt-BR.microsoft"
#: Lucene analyzer for Portuguese (Brazil).
PT_BR_LUCENE = "pt-BR.lucene"
#: Microsoft analyzer for Portuguese (Portugal).
PT_PT_MICROSOFT = "pt-PT.microsoft"
#: Lucene analyzer for Portuguese (Portugal).
PT_PT_LUCENE = "pt-PT.lucene"
#: Microsoft analyzer for Punjabi.
PA_MICROSOFT = "pa.microsoft"
#: Microsoft analyzer for Romanian.
RO_MICROSOFT = "ro.microsoft"
#: Lucene analyzer for Romanian.
RO_LUCENE = "ro.lucene"
#: Microsoft analyzer for Russian.
RU_MICROSOFT = "ru.microsoft"
#: Lucene analyzer for Russian.
RU_LUCENE = "ru.lucene"
#: Microsoft analyzer for Serbian (Cyrillic).
SR_CYRILLIC_MICROSOFT = "sr-cyrillic.microsoft"
#: Microsoft analyzer for Serbian (Latin).
SR_LATIN_MICROSOFT = "sr-latin.microsoft"
#: Microsoft analyzer for Slovak.
SK_MICROSOFT = "sk.microsoft"
#: Microsoft analyzer for Slovenian.
SL_MICROSOFT = "sl.microsoft"
#: Microsoft analyzer for Spanish.
ES_MICROSOFT = "es.microsoft"
#: Lucene analyzer for Spanish.
ES_LUCENE = "es.lucene"
#: Microsoft analyzer for Swedish.
SV_MICROSOFT = "sv.microsoft"
#: Lucene analyzer for Swedish.
SV_LUCENE = "sv.lucene"
#: Microsoft analyzer for Tamil.
TA_MICROSOFT = "ta.microsoft"
#: Microsoft analyzer for Telugu.
TE_MICROSOFT = "te.microsoft"
#: Microsoft analyzer for Thai.
TH_MICROSOFT = "th.microsoft"
#: Lucene analyzer for Thai.
TH_LUCENE = "th.lucene"
#: Microsoft analyzer for Turkish.
TR_MICROSOFT = "tr.microsoft"
#: Lucene analyzer for Turkish.
TR_LUCENE = "tr.lucene"
#: Microsoft analyzer for Ukrainian.
UK_MICROSOFT = "uk.microsoft"
#: Microsoft analyzer for Urdu.
UR_MICROSOFT = "ur.microsoft"
#: Microsoft analyzer for Vietnamese.
VI_MICROSOFT = "vi.microsoft"
#: Standard Lucene analyzer.
STANDARD_LUCENE = "standard.lucene"
#: Standard ASCII Folding Lucene analyzer. See
#: https://docs.microsoft.com/rest/api/searchservice/Custom-analyzers-in-Azure-Search#Analyzers.
STANDARD_ASCII_FOLDING_LUCENE = "standardasciifolding.lucene"
#: Treats the entire content of a field as a single token. This is useful for data like zip codes,
#: ids, and some product names. See
#: http://lucene.apache.org/core/4_10_3/analyzers-common/org/apache/lucene/analysis/core/KeywordAnalyzer.html.
KEYWORD = "keyword"
#: Flexibly separates text into terms via a regular expression pattern. See
#: http://lucene.apache.org/core/4_10_3/analyzers-common/org/apache/lucene/analysis/miscellaneous/PatternAnalyzer.html.
PATTERN = "pattern"
#: Divides text at non-letters and converts them to lower case. See
#: http://lucene.apache.org/core/4_10_3/analyzers-common/org/apache/lucene/analysis/core/SimpleAnalyzer.html.
SIMPLE = "simple"
#: Divides text at non-letters; Applies the lowercase and stopword token filters. See
#: http://lucene.apache.org/core/4_10_3/analyzers-common/org/apache/lucene/analysis/core/StopAnalyzer.html.
STOP = "stop"
#: An analyzer that uses the whitespace tokenizer. See
#: http://lucene.apache.org/core/4_10_3/analyzers-common/org/apache/lucene/analysis/core/WhitespaceAnalyzer.html.
WHITESPACE = "whitespace"
[docs]class LexicalNormalizerName(with_metaclass(_CaseInsensitiveEnumMeta, str, Enum)):
"""Defines the names of all text normalizers supported by Azure Cognitive Search.
"""
#: Converts alphabetic, numeric, and symbolic Unicode characters which are not in the first 127
#: ASCII characters (the "Basic Latin" Unicode block) into their ASCII equivalents, if such
#: equivalents exist. See
#: http://lucene.apache.org/core/4_10_3/analyzers-common/org/apache/lucene/analysis/miscellaneous/ASCIIFoldingFilter.html.
ASCII_FOLDING = "asciifolding"
#: Removes elisions. For example, "l'avion" (the plane) will be converted to "avion" (plane). See
#: http://lucene.apache.org/core/4_10_3/analyzers-common/org/apache/lucene/analysis/util/ElisionFilter.html.
ELISION = "elision"
#: Normalizes token text to lowercase. See
#: https://lucene.apache.org/core/6_6_1/analyzers-common/org/apache/lucene/analysis/core/LowerCaseFilter.html.
LOWERCASE = "lowercase"
#: Standard normalizer, which consists of lowercase and asciifolding. See
#: http://lucene.apache.org/core/4_10_3/analyzers-common/org/apache/lucene/analysis/reverse/ReverseStringFilter.html.
STANDARD = "standard"
#: Normalizes token text to uppercase. See
#: https://lucene.apache.org/core/6_6_1/analyzers-common/org/apache/lucene/analysis/core/UpperCaseFilter.html.
UPPERCASE = "uppercase"
class LexicalTokenizerName(with_metaclass(_CaseInsensitiveEnumMeta, str, Enum)):
"""Defines the names of all tokenizers supported by Azure Cognitive Search.
"""
#: Grammar-based tokenizer that is suitable for processing most European-language documents. See
#: http://lucene.apache.org/core/4_10_3/analyzers-common/org/apache/lucene/analysis/standard/ClassicTokenizer.html.
CLASSIC = "classic"
#: Tokenizes the input from an edge into n-grams of the given size(s). See
#: https://lucene.apache.org/core/4_10_3/analyzers-common/org/apache/lucene/analysis/ngram/EdgeNGramTokenizer.html.
EDGE_N_GRAM = "edgeNGram"
#: Emits the entire input as a single token. See
#: http://lucene.apache.org/core/4_10_3/analyzers-common/org/apache/lucene/analysis/core/KeywordTokenizer.html.
KEYWORD = "keyword_v2"
#: Divides text at non-letters. See
#: http://lucene.apache.org/core/4_10_3/analyzers-common/org/apache/lucene/analysis/core/LetterTokenizer.html.
LETTER = "letter"
#: Divides text at non-letters and converts them to lower case. See
#: http://lucene.apache.org/core/4_10_3/analyzers-common/org/apache/lucene/analysis/core/LowerCaseTokenizer.html.
LOWERCASE = "lowercase"
#: Divides text using language-specific rules.
MICROSOFT_LANGUAGE_TOKENIZER = "microsoft_language_tokenizer"
#: Divides text using language-specific rules and reduces words to their base forms.
MICROSOFT_LANGUAGE_STEMMING_TOKENIZER = "microsoft_language_stemming_tokenizer"
#: Tokenizes the input into n-grams of the given size(s). See
#: http://lucene.apache.org/core/4_10_3/analyzers-common/org/apache/lucene/analysis/ngram/NGramTokenizer.html.
N_GRAM = "nGram"
#: Tokenizer for path-like hierarchies. See
#: http://lucene.apache.org/core/4_10_3/analyzers-common/org/apache/lucene/analysis/path/PathHierarchyTokenizer.html.
PATH_HIERARCHY = "path_hierarchy_v2"
#: Tokenizer that uses regex pattern matching to construct distinct tokens. See
#: http://lucene.apache.org/core/4_10_3/analyzers-common/org/apache/lucene/analysis/pattern/PatternTokenizer.html.
PATTERN = "pattern"
#: Standard Lucene analyzer; Composed of the standard tokenizer, lowercase filter and stop filter.
#: See
#: http://lucene.apache.org/core/4_10_3/analyzers-common/org/apache/lucene/analysis/standard/StandardTokenizer.html.
STANDARD = "standard_v2"
#: Tokenizes urls and emails as one token. See
#: http://lucene.apache.org/core/4_10_3/analyzers-common/org/apache/lucene/analysis/standard/UAX29URLEmailTokenizer.html.
UAX_URL_EMAIL = "uax_url_email"
#: Divides text at whitespace. See
#: http://lucene.apache.org/core/4_10_3/analyzers-common/org/apache/lucene/analysis/core/WhitespaceTokenizer.html.
WHITESPACE = "whitespace"
class LineEnding(with_metaclass(_CaseInsensitiveEnumMeta, str, Enum)):
"""Defines the sequence of characters to use between the lines of text recognized by the OCR
skill. The default value is "space".
"""
#: Lines are separated by a single space character.
SPACE = "space"
#: Lines are separated by a carriage return ('\r') character.
CARRIAGE_RETURN = "carriageReturn"
#: Lines are separated by a single line feed ('\n') character.
LINE_FEED = "lineFeed"
#: Lines are separated by a carriage return and a line feed ('\r\n') character.
CARRIAGE_RETURN_LINE_FEED = "carriageReturnLineFeed"
[docs]class MicrosoftStemmingTokenizerLanguage(with_metaclass(_CaseInsensitiveEnumMeta, str, Enum)):
"""Lists the languages supported by the Microsoft language stemming tokenizer.
"""
#: Selects the Microsoft stemming tokenizer for Arabic.
ARABIC = "arabic"
#: Selects the Microsoft stemming tokenizer for Bangla.
BANGLA = "bangla"
#: Selects the Microsoft stemming tokenizer for Bulgarian.
BULGARIAN = "bulgarian"
#: Selects the Microsoft stemming tokenizer for Catalan.
CATALAN = "catalan"
#: Selects the Microsoft stemming tokenizer for Croatian.
CROATIAN = "croatian"
#: Selects the Microsoft stemming tokenizer for Czech.
CZECH = "czech"
#: Selects the Microsoft stemming tokenizer for Danish.
DANISH = "danish"
#: Selects the Microsoft stemming tokenizer for Dutch.
DUTCH = "dutch"
#: Selects the Microsoft stemming tokenizer for English.
ENGLISH = "english"
#: Selects the Microsoft stemming tokenizer for Estonian.
ESTONIAN = "estonian"
#: Selects the Microsoft stemming tokenizer for Finnish.
FINNISH = "finnish"
#: Selects the Microsoft stemming tokenizer for French.
FRENCH = "french"
#: Selects the Microsoft stemming tokenizer for German.
GERMAN = "german"
#: Selects the Microsoft stemming tokenizer for Greek.
GREEK = "greek"
#: Selects the Microsoft stemming tokenizer for Gujarati.
GUJARATI = "gujarati"
#: Selects the Microsoft stemming tokenizer for Hebrew.
HEBREW = "hebrew"
#: Selects the Microsoft stemming tokenizer for Hindi.
HINDI = "hindi"
#: Selects the Microsoft stemming tokenizer for Hungarian.
HUNGARIAN = "hungarian"
#: Selects the Microsoft stemming tokenizer for Icelandic.
ICELANDIC = "icelandic"
#: Selects the Microsoft stemming tokenizer for Indonesian.
INDONESIAN = "indonesian"
#: Selects the Microsoft stemming tokenizer for Italian.
ITALIAN = "italian"
#: Selects the Microsoft stemming tokenizer for Kannada.
KANNADA = "kannada"
#: Selects the Microsoft stemming tokenizer for Latvian.
LATVIAN = "latvian"
#: Selects the Microsoft stemming tokenizer for Lithuanian.
LITHUANIAN = "lithuanian"
#: Selects the Microsoft stemming tokenizer for Malay.
MALAY = "malay"
#: Selects the Microsoft stemming tokenizer for Malayalam.
MALAYALAM = "malayalam"
#: Selects the Microsoft stemming tokenizer for Marathi.
MARATHI = "marathi"
#: Selects the Microsoft stemming tokenizer for Norwegian (Bokmål).
NORWEGIAN_BOKMAAL = "norwegianBokmaal"
#: Selects the Microsoft stemming tokenizer for Polish.
POLISH = "polish"
#: Selects the Microsoft stemming tokenizer for Portuguese.
PORTUGUESE = "portuguese"
#: Selects the Microsoft stemming tokenizer for Portuguese (Brazil).
PORTUGUESE_BRAZILIAN = "portugueseBrazilian"
#: Selects the Microsoft stemming tokenizer for Punjabi.
PUNJABI = "punjabi"
#: Selects the Microsoft stemming tokenizer for Romanian.
ROMANIAN = "romanian"
#: Selects the Microsoft stemming tokenizer for Russian.
RUSSIAN = "russian"
#: Selects the Microsoft stemming tokenizer for Serbian (Cyrillic).
SERBIAN_CYRILLIC = "serbianCyrillic"
#: Selects the Microsoft stemming tokenizer for Serbian (Latin).
SERBIAN_LATIN = "serbianLatin"
#: Selects the Microsoft stemming tokenizer for Slovak.
SLOVAK = "slovak"
#: Selects the Microsoft stemming tokenizer for Slovenian.
SLOVENIAN = "slovenian"
#: Selects the Microsoft stemming tokenizer for Spanish.
SPANISH = "spanish"
#: Selects the Microsoft stemming tokenizer for Swedish.
SWEDISH = "swedish"
#: Selects the Microsoft stemming tokenizer for Tamil.
TAMIL = "tamil"
#: Selects the Microsoft stemming tokenizer for Telugu.
TELUGU = "telugu"
#: Selects the Microsoft stemming tokenizer for Turkish.
TURKISH = "turkish"
#: Selects the Microsoft stemming tokenizer for Ukrainian.
UKRAINIAN = "ukrainian"
#: Selects the Microsoft stemming tokenizer for Urdu.
URDU = "urdu"
[docs]class MicrosoftTokenizerLanguage(with_metaclass(_CaseInsensitiveEnumMeta, str, Enum)):
"""Lists the languages supported by the Microsoft language tokenizer.
"""
#: Selects the Microsoft tokenizer for Bangla.
BANGLA = "bangla"
#: Selects the Microsoft tokenizer for Bulgarian.
BULGARIAN = "bulgarian"
#: Selects the Microsoft tokenizer for Catalan.
CATALAN = "catalan"
#: Selects the Microsoft tokenizer for Chinese (Simplified).
CHINESE_SIMPLIFIED = "chineseSimplified"
#: Selects the Microsoft tokenizer for Chinese (Traditional).
CHINESE_TRADITIONAL = "chineseTraditional"
#: Selects the Microsoft tokenizer for Croatian.
CROATIAN = "croatian"
#: Selects the Microsoft tokenizer for Czech.
CZECH = "czech"
#: Selects the Microsoft tokenizer for Danish.
DANISH = "danish"
#: Selects the Microsoft tokenizer for Dutch.
DUTCH = "dutch"
#: Selects the Microsoft tokenizer for English.
ENGLISH = "english"
#: Selects the Microsoft tokenizer for French.
FRENCH = "french"
#: Selects the Microsoft tokenizer for German.
GERMAN = "german"
#: Selects the Microsoft tokenizer for Greek.
GREEK = "greek"
#: Selects the Microsoft tokenizer for Gujarati.
GUJARATI = "gujarati"
#: Selects the Microsoft tokenizer for Hindi.
HINDI = "hindi"
#: Selects the Microsoft tokenizer for Icelandic.
ICELANDIC = "icelandic"
#: Selects the Microsoft tokenizer for Indonesian.
INDONESIAN = "indonesian"
#: Selects the Microsoft tokenizer for Italian.
ITALIAN = "italian"
#: Selects the Microsoft tokenizer for Japanese.
JAPANESE = "japanese"
#: Selects the Microsoft tokenizer for Kannada.
KANNADA = "kannada"
#: Selects the Microsoft tokenizer for Korean.
KOREAN = "korean"
#: Selects the Microsoft tokenizer for Malay.
MALAY = "malay"
#: Selects the Microsoft tokenizer for Malayalam.
MALAYALAM = "malayalam"
#: Selects the Microsoft tokenizer for Marathi.
MARATHI = "marathi"
#: Selects the Microsoft tokenizer for Norwegian (Bokmål).
NORWEGIAN_BOKMAAL = "norwegianBokmaal"
#: Selects the Microsoft tokenizer for Polish.
POLISH = "polish"
#: Selects the Microsoft tokenizer for Portuguese.
PORTUGUESE = "portuguese"
#: Selects the Microsoft tokenizer for Portuguese (Brazil).
PORTUGUESE_BRAZILIAN = "portugueseBrazilian"
#: Selects the Microsoft tokenizer for Punjabi.
PUNJABI = "punjabi"
#: Selects the Microsoft tokenizer for Romanian.
ROMANIAN = "romanian"
#: Selects the Microsoft tokenizer for Russian.
RUSSIAN = "russian"
#: Selects the Microsoft tokenizer for Serbian (Cyrillic).
SERBIAN_CYRILLIC = "serbianCyrillic"
#: Selects the Microsoft tokenizer for Serbian (Latin).
SERBIAN_LATIN = "serbianLatin"
#: Selects the Microsoft tokenizer for Slovenian.
SLOVENIAN = "slovenian"
#: Selects the Microsoft tokenizer for Spanish.
SPANISH = "spanish"
#: Selects the Microsoft tokenizer for Swedish.
SWEDISH = "swedish"
#: Selects the Microsoft tokenizer for Tamil.
TAMIL = "tamil"
#: Selects the Microsoft tokenizer for Telugu.
TELUGU = "telugu"
#: Selects the Microsoft tokenizer for Thai.
THAI = "thai"
#: Selects the Microsoft tokenizer for Ukrainian.
UKRAINIAN = "ukrainian"
#: Selects the Microsoft tokenizer for Urdu.
URDU = "urdu"
#: Selects the Microsoft tokenizer for Vietnamese.
VIETNAMESE = "vietnamese"
[docs]class OcrSkillLanguage(with_metaclass(_CaseInsensitiveEnumMeta, str, Enum)):
"""The language codes supported for input by OcrSkill.
"""
#: Chinese-Simplified.
ZH_HANS = "zh-Hans"
#: Chinese-Traditional.
ZH_HANT = "zh-Hant"
#: Czech.
CS = "cs"
#: Danish.
DA = "da"
#: Dutch.
NL = "nl"
#: English.
EN = "en"
#: Finnish.
FI = "fi"
#: French.
FR = "fr"
#: German.
DE = "de"
#: Greek.
EL = "el"
#: Hungarian.
HU = "hu"
#: Italian.
IT = "it"
#: Japanese.
JA = "ja"
#: Korean.
KO = "ko"
#: Norwegian (Bokmaal).
NB = "nb"
#: Polish.
PL = "pl"
#: Portuguese.
PT = "pt"
#: Russian.
RU = "ru"
#: Spanish.
ES = "es"
#: Swedish.
SV = "sv"
#: Turkish.
TR = "tr"
#: Arabic.
AR = "ar"
#: Romanian.
RO = "ro"
#: Serbian (Cyrillic, Serbia).
SR_CYRL = "sr-Cyrl"
#: Serbian (Latin, Serbia).
SR_LATN = "sr-Latn"
#: Slovak.
SK = "sk"
[docs]class PhoneticEncoder(with_metaclass(_CaseInsensitiveEnumMeta, str, Enum)):
"""Identifies the type of phonetic encoder to use with a PhoneticTokenFilter.
"""
#: Encodes a token into a Metaphone value.
METAPHONE = "metaphone"
#: Encodes a token into a double metaphone value.
DOUBLE_METAPHONE = "doubleMetaphone"
#: Encodes a token into a Soundex value.
SOUNDEX = "soundex"
#: Encodes a token into a Refined Soundex value.
REFINED_SOUNDEX = "refinedSoundex"
#: Encodes a token into a Caverphone 1.0 value.
CAVERPHONE1 = "caverphone1"
#: Encodes a token into a Caverphone 2.0 value.
CAVERPHONE2 = "caverphone2"
#: Encodes a token into a Cologne Phonetic value.
COLOGNE = "cologne"
#: Encodes a token into a NYSIIS value.
NYSIIS = "nysiis"
#: Encodes a token using the Kölner Phonetik algorithm.
KOELNER_PHONETIK = "koelnerPhonetik"
#: Encodes a token using the Haase refinement of the Kölner Phonetik algorithm.
HAASE_PHONETIK = "haasePhonetik"
#: Encodes a token into a Beider-Morse value.
BEIDER_MORSE = "beiderMorse"
[docs]class PIIDetectionSkillMaskingMode(with_metaclass(_CaseInsensitiveEnumMeta, str, Enum)):
"""A string indicating what maskingMode to use to mask the personal information detected in the
input text.
"""
#: No masking occurs and the maskedText output will not be returned.
NONE = "none"
#: Replaces the detected entities with the character given in the maskingCharacter parameter. The
#: character will be repeated to the length of the detected entity so that the offsets will
#: correctly correspond to both the input text as well as the output maskedText.
REPLACE = "replace"
[docs]class RegexFlags(with_metaclass(_CaseInsensitiveEnumMeta, str, Enum)):
"""Defines flags that can be combined to control how regular expressions are used in the pattern
analyzer and pattern tokenizer.
"""
#: Enables canonical equivalence.
CANON_EQ = "CANON_EQ"
#: Enables case-insensitive matching.
CASE_INSENSITIVE = "CASE_INSENSITIVE"
#: Permits whitespace and comments in the pattern.
COMMENTS = "COMMENTS"
#: Enables dotall mode.
DOT_ALL = "DOTALL"
#: Enables literal parsing of the pattern.
LITERAL = "LITERAL"
#: Enables multiline mode.
MULTILINE = "MULTILINE"
#: Enables Unicode-aware case folding.
UNICODE_CASE = "UNICODE_CASE"
#: Enables Unix lines mode.
UNIX_LINES = "UNIX_LINES"
[docs]class ScoringFunctionAggregation(with_metaclass(_CaseInsensitiveEnumMeta, str, Enum)):
"""Defines the aggregation function used to combine the results of all the scoring functions in a
scoring profile.
"""
#: Boost scores by the sum of all scoring function results.
SUM = "sum"
#: Boost scores by the average of all scoring function results.
AVERAGE = "average"
#: Boost scores by the minimum of all scoring function results.
MINIMUM = "minimum"
#: Boost scores by the maximum of all scoring function results.
MAXIMUM = "maximum"
#: Boost scores using the first applicable scoring function in the scoring profile.
FIRST_MATCHING = "firstMatching"
[docs]class ScoringFunctionInterpolation(with_metaclass(_CaseInsensitiveEnumMeta, str, Enum)):
"""Defines the function used to interpolate score boosting across a range of documents.
"""
#: Boosts scores by a linearly decreasing amount. This is the default interpolation for scoring
#: functions.
LINEAR = "linear"
#: Boosts scores by a constant factor.
CONSTANT = "constant"
#: Boosts scores by an amount that decreases quadratically. Boosts decrease slowly for higher
#: scores, and more quickly as the scores decrease. This interpolation option is not allowed in
#: tag scoring functions.
QUADRATIC = "quadratic"
#: Boosts scores by an amount that decreases logarithmically. Boosts decrease quickly for higher
#: scores, and more slowly as the scores decrease. This interpolation option is not allowed in tag
#: scoring functions.
LOGARITHMIC = "logarithmic"
class SearchFieldDataType(with_metaclass(_CaseInsensitiveEnumMeta, str, Enum)):
"""Defines the data type of a field in a search index.
"""
#: Indicates that a field contains a string.
STRING = "Edm.String"
#: Indicates that a field contains a 32-bit signed integer.
INT32 = "Edm.Int32"
#: Indicates that a field contains a 64-bit signed integer.
INT64 = "Edm.Int64"
#: Indicates that a field contains an IEEE double-precision floating point number.
DOUBLE = "Edm.Double"
#: Indicates that a field contains a Boolean value (true or false).
BOOLEAN = "Edm.Boolean"
#: Indicates that a field contains a date/time value, including timezone information.
DATE_TIME_OFFSET = "Edm.DateTimeOffset"
#: Indicates that a field contains a geo-location in terms of longitude and latitude.
GEOGRAPHY_POINT = "Edm.GeographyPoint"
#: Indicates that a field contains one or more complex objects that in turn have sub-fields of
#: other types.
COMPLEX = "Edm.ComplexType"
[docs]class SearchIndexerDataSourceType(with_metaclass(_CaseInsensitiveEnumMeta, str, Enum)):
"""Defines the type of a datasource.
"""
#: Indicates an Azure SQL datasource.
AZURE_SQL = "azuresql"
#: Indicates a CosmosDB datasource.
COSMOS_DB = "cosmosdb"
#: Indicates an Azure Blob datasource.
AZURE_BLOB = "azureblob"
#: Indicates an Azure Table datasource.
AZURE_TABLE = "azuretable"
#: Indicates a MySql datasource.
MY_SQL = "mysql"
#: Indicates an ADLS Gen2 datasource.
ADLS_GEN2 = "adlsgen2"
[docs]class SentimentSkillLanguage(with_metaclass(_CaseInsensitiveEnumMeta, str, Enum)):
"""The language codes supported for input text by SentimentSkill.
"""
#: Danish.
DA = "da"
#: Dutch.
NL = "nl"
#: English.
EN = "en"
#: Finnish.
FI = "fi"
#: French.
FR = "fr"
#: German.
DE = "de"
#: Greek.
EL = "el"
#: Italian.
IT = "it"
#: Norwegian (Bokmaal).
NO = "no"
#: Polish.
PL = "pl"
#: Portuguese (Portugal).
PT_PT = "pt-PT"
#: Russian.
RU = "ru"
#: Spanish.
ES = "es"
#: Swedish.
SV = "sv"
#: Turkish.
TR = "tr"
[docs]class SnowballTokenFilterLanguage(with_metaclass(_CaseInsensitiveEnumMeta, str, Enum)):
"""The language to use for a Snowball token filter.
"""
#: Selects the Lucene Snowball stemming tokenizer for Armenian.
ARMENIAN = "armenian"
#: Selects the Lucene Snowball stemming tokenizer for Basque.
BASQUE = "basque"
#: Selects the Lucene Snowball stemming tokenizer for Catalan.
CATALAN = "catalan"
#: Selects the Lucene Snowball stemming tokenizer for Danish.
DANISH = "danish"
#: Selects the Lucene Snowball stemming tokenizer for Dutch.
DUTCH = "dutch"
#: Selects the Lucene Snowball stemming tokenizer for English.
ENGLISH = "english"
#: Selects the Lucene Snowball stemming tokenizer for Finnish.
FINNISH = "finnish"
#: Selects the Lucene Snowball stemming tokenizer for French.
FRENCH = "french"
#: Selects the Lucene Snowball stemming tokenizer for German.
GERMAN = "german"
#: Selects the Lucene Snowball stemming tokenizer that uses the German variant algorithm.
GERMAN2 = "german2"
#: Selects the Lucene Snowball stemming tokenizer for Hungarian.
HUNGARIAN = "hungarian"
#: Selects the Lucene Snowball stemming tokenizer for Italian.
ITALIAN = "italian"
#: Selects the Lucene Snowball stemming tokenizer for Dutch that uses the Kraaij-Pohlmann stemming
#: algorithm.
KP = "kp"
#: Selects the Lucene Snowball stemming tokenizer for English that uses the Lovins stemming
#: algorithm.
LOVINS = "lovins"
#: Selects the Lucene Snowball stemming tokenizer for Norwegian.
NORWEGIAN = "norwegian"
#: Selects the Lucene Snowball stemming tokenizer for English that uses the Porter stemming
#: algorithm.
PORTER = "porter"
#: Selects the Lucene Snowball stemming tokenizer for Portuguese.
PORTUGUESE = "portuguese"
#: Selects the Lucene Snowball stemming tokenizer for Romanian.
ROMANIAN = "romanian"
#: Selects the Lucene Snowball stemming tokenizer for Russian.
RUSSIAN = "russian"
#: Selects the Lucene Snowball stemming tokenizer for Spanish.
SPANISH = "spanish"
#: Selects the Lucene Snowball stemming tokenizer for Swedish.
SWEDISH = "swedish"
#: Selects the Lucene Snowball stemming tokenizer for Turkish.
TURKISH = "turkish"
[docs]class SplitSkillLanguage(with_metaclass(_CaseInsensitiveEnumMeta, str, Enum)):
"""The language codes supported for input text by SplitSkill.
"""
#: Danish.
DA = "da"
#: German.
DE = "de"
#: English.
EN = "en"
#: Spanish.
ES = "es"
#: Finnish.
FI = "fi"
#: French.
FR = "fr"
#: Italian.
IT = "it"
#: Korean.
KO = "ko"
#: Portuguese.
PT = "pt"
[docs]class StemmerTokenFilterLanguage(with_metaclass(_CaseInsensitiveEnumMeta, str, Enum)):
"""The language to use for a stemmer token filter.
"""
#: Selects the Lucene stemming tokenizer for Arabic.
ARABIC = "arabic"
#: Selects the Lucene stemming tokenizer for Armenian.
ARMENIAN = "armenian"
#: Selects the Lucene stemming tokenizer for Basque.
BASQUE = "basque"
#: Selects the Lucene stemming tokenizer for Portuguese (Brazil).
BRAZILIAN = "brazilian"
#: Selects the Lucene stemming tokenizer for Bulgarian.
BULGARIAN = "bulgarian"
#: Selects the Lucene stemming tokenizer for Catalan.
CATALAN = "catalan"
#: Selects the Lucene stemming tokenizer for Czech.
CZECH = "czech"
#: Selects the Lucene stemming tokenizer for Danish.
DANISH = "danish"
#: Selects the Lucene stemming tokenizer for Dutch.
DUTCH = "dutch"
#: Selects the Lucene stemming tokenizer for Dutch that uses the Kraaij-Pohlmann stemming
#: algorithm.
DUTCH_KP = "dutchKp"
#: Selects the Lucene stemming tokenizer for English.
ENGLISH = "english"
#: Selects the Lucene stemming tokenizer for English that does light stemming.
LIGHT_ENGLISH = "lightEnglish"
#: Selects the Lucene stemming tokenizer for English that does minimal stemming.
MINIMAL_ENGLISH = "minimalEnglish"
#: Selects the Lucene stemming tokenizer for English that removes trailing possessives from words.
POSSESSIVE_ENGLISH = "possessiveEnglish"
#: Selects the Lucene stemming tokenizer for English that uses the Porter2 stemming algorithm.
PORTER2 = "porter2"
#: Selects the Lucene stemming tokenizer for English that uses the Lovins stemming algorithm.
LOVINS = "lovins"
#: Selects the Lucene stemming tokenizer for Finnish.
FINNISH = "finnish"
#: Selects the Lucene stemming tokenizer for Finnish that does light stemming.
LIGHT_FINNISH = "lightFinnish"
#: Selects the Lucene stemming tokenizer for French.
FRENCH = "french"
#: Selects the Lucene stemming tokenizer for French that does light stemming.
LIGHT_FRENCH = "lightFrench"
#: Selects the Lucene stemming tokenizer for French that does minimal stemming.
MINIMAL_FRENCH = "minimalFrench"
#: Selects the Lucene stemming tokenizer for Galician.
GALICIAN = "galician"
#: Selects the Lucene stemming tokenizer for Galician that does minimal stemming.
MINIMAL_GALICIAN = "minimalGalician"
#: Selects the Lucene stemming tokenizer for German.
GERMAN = "german"
#: Selects the Lucene stemming tokenizer that uses the German variant algorithm.
GERMAN2 = "german2"
#: Selects the Lucene stemming tokenizer for German that does light stemming.
LIGHT_GERMAN = "lightGerman"
#: Selects the Lucene stemming tokenizer for German that does minimal stemming.
MINIMAL_GERMAN = "minimalGerman"
#: Selects the Lucene stemming tokenizer for Greek.
GREEK = "greek"
#: Selects the Lucene stemming tokenizer for Hindi.
HINDI = "hindi"
#: Selects the Lucene stemming tokenizer for Hungarian.
HUNGARIAN = "hungarian"
#: Selects the Lucene stemming tokenizer for Hungarian that does light stemming.
LIGHT_HUNGARIAN = "lightHungarian"
#: Selects the Lucene stemming tokenizer for Indonesian.
INDONESIAN = "indonesian"
#: Selects the Lucene stemming tokenizer for Irish.
IRISH = "irish"
#: Selects the Lucene stemming tokenizer for Italian.
ITALIAN = "italian"
#: Selects the Lucene stemming tokenizer for Italian that does light stemming.
LIGHT_ITALIAN = "lightItalian"
#: Selects the Lucene stemming tokenizer for Sorani.
SORANI = "sorani"
#: Selects the Lucene stemming tokenizer for Latvian.
LATVIAN = "latvian"
#: Selects the Lucene stemming tokenizer for Norwegian (Bokmål).
NORWEGIAN = "norwegian"
#: Selects the Lucene stemming tokenizer for Norwegian (Bokmål) that does light stemming.
LIGHT_NORWEGIAN = "lightNorwegian"
#: Selects the Lucene stemming tokenizer for Norwegian (Bokmål) that does minimal stemming.
MINIMAL_NORWEGIAN = "minimalNorwegian"
#: Selects the Lucene stemming tokenizer for Norwegian (Nynorsk) that does light stemming.
LIGHT_NYNORSK = "lightNynorsk"
#: Selects the Lucene stemming tokenizer for Norwegian (Nynorsk) that does minimal stemming.
MINIMAL_NYNORSK = "minimalNynorsk"
#: Selects the Lucene stemming tokenizer for Portuguese.
PORTUGUESE = "portuguese"
#: Selects the Lucene stemming tokenizer for Portuguese that does light stemming.
LIGHT_PORTUGUESE = "lightPortuguese"
#: Selects the Lucene stemming tokenizer for Portuguese that does minimal stemming.
MINIMAL_PORTUGUESE = "minimalPortuguese"
#: Selects the Lucene stemming tokenizer for Portuguese that uses the RSLP stemming algorithm.
PORTUGUESE_RSLP = "portugueseRslp"
#: Selects the Lucene stemming tokenizer for Romanian.
ROMANIAN = "romanian"
#: Selects the Lucene stemming tokenizer for Russian.
RUSSIAN = "russian"
#: Selects the Lucene stemming tokenizer for Russian that does light stemming.
LIGHT_RUSSIAN = "lightRussian"
#: Selects the Lucene stemming tokenizer for Spanish.
SPANISH = "spanish"
#: Selects the Lucene stemming tokenizer for Spanish that does light stemming.
LIGHT_SPANISH = "lightSpanish"
#: Selects the Lucene stemming tokenizer for Swedish.
SWEDISH = "swedish"
#: Selects the Lucene stemming tokenizer for Swedish that does light stemming.
LIGHT_SWEDISH = "lightSwedish"
#: Selects the Lucene stemming tokenizer for Turkish.
TURKISH = "turkish"
[docs]class StopwordsList(with_metaclass(_CaseInsensitiveEnumMeta, str, Enum)):
"""Identifies a predefined list of language-specific stopwords.
"""
#: Selects the stopword list for Arabic.
ARABIC = "arabic"
#: Selects the stopword list for Armenian.
ARMENIAN = "armenian"
#: Selects the stopword list for Basque.
BASQUE = "basque"
#: Selects the stopword list for Portuguese (Brazil).
BRAZILIAN = "brazilian"
#: Selects the stopword list for Bulgarian.
BULGARIAN = "bulgarian"
#: Selects the stopword list for Catalan.
CATALAN = "catalan"
#: Selects the stopword list for Czech.
CZECH = "czech"
#: Selects the stopword list for Danish.
DANISH = "danish"
#: Selects the stopword list for Dutch.
DUTCH = "dutch"
#: Selects the stopword list for English.
ENGLISH = "english"
#: Selects the stopword list for Finnish.
FINNISH = "finnish"
#: Selects the stopword list for French.
FRENCH = "french"
#: Selects the stopword list for Galician.
GALICIAN = "galician"
#: Selects the stopword list for German.
GERMAN = "german"
#: Selects the stopword list for Greek.
GREEK = "greek"
#: Selects the stopword list for Hindi.
HINDI = "hindi"
#: Selects the stopword list for Hungarian.
HUNGARIAN = "hungarian"
#: Selects the stopword list for Indonesian.
INDONESIAN = "indonesian"
#: Selects the stopword list for Irish.
IRISH = "irish"
#: Selects the stopword list for Italian.
ITALIAN = "italian"
#: Selects the stopword list for Latvian.
LATVIAN = "latvian"
#: Selects the stopword list for Norwegian.
NORWEGIAN = "norwegian"
#: Selects the stopword list for Persian.
PERSIAN = "persian"
#: Selects the stopword list for Portuguese.
PORTUGUESE = "portuguese"
#: Selects the stopword list for Romanian.
ROMANIAN = "romanian"
#: Selects the stopword list for Russian.
RUSSIAN = "russian"
#: Selects the stopword list for Sorani.
SORANI = "sorani"
#: Selects the stopword list for Spanish.
SPANISH = "spanish"
#: Selects the stopword list for Swedish.
SWEDISH = "swedish"
#: Selects the stopword list for Thai.
THAI = "thai"
#: Selects the stopword list for Turkish.
TURKISH = "turkish"
[docs]class TextSplitMode(with_metaclass(_CaseInsensitiveEnumMeta, str, Enum)):
"""A value indicating which split mode to perform.
"""
#: Split the text into individual pages.
PAGES = "pages"
#: Split the text into individual sentences.
SENTENCES = "sentences"
[docs]class TextTranslationSkillLanguage(with_metaclass(_CaseInsensitiveEnumMeta, str, Enum)):
"""The language codes supported for input text by TextTranslationSkill.
"""
#: Afrikaans.
AF = "af"
#: Arabic.
AR = "ar"
#: Bangla.
BN = "bn"
#: Bosnian (Latin).
BS = "bs"
#: Bulgarian.
BG = "bg"
#: Cantonese (Traditional).
YUE = "yue"
#: Catalan.
CA = "ca"
#: Chinese Simplified.
ZH_HANS = "zh-Hans"
#: Chinese Traditional.
ZH_HANT = "zh-Hant"
#: Croatian.
HR = "hr"
#: Czech.
CS = "cs"
#: Danish.
DA = "da"
#: Dutch.
NL = "nl"
#: English.
EN = "en"
#: Estonian.
ET = "et"
#: Fijian.
FJ = "fj"
#: Filipino.
FIL = "fil"
#: Finnish.
FI = "fi"
#: French.
FR = "fr"
#: German.
DE = "de"
#: Greek.
EL = "el"
#: Haitian Creole.
HT = "ht"
#: Hebrew.
HE = "he"
#: Hindi.
HI = "hi"
#: Hmong Daw.
MWW = "mww"
#: Hungarian.
HU = "hu"
#: Icelandic.
IS_ENUM = "is"
#: Indonesian.
ID = "id"
#: Italian.
IT = "it"
#: Japanese.
JA = "ja"
#: Kiswahili.
SW = "sw"
#: Klingon.
TLH = "tlh"
#: Klingon (Latin script).
TLH_LATN = "tlh-Latn"
#: Klingon (Klingon script).
TLH_PIQD = "tlh-Piqd"
#: Korean.
KO = "ko"
#: Latvian.
LV = "lv"
#: Lithuanian.
LT = "lt"
#: Malagasy.
MG = "mg"
#: Malay.
MS = "ms"
#: Maltese.
MT = "mt"
#: Norwegian.
NB = "nb"
#: Persian.
FA = "fa"
#: Polish.
PL = "pl"
#: Portuguese.
PT = "pt"
#: Portuguese (Brazil).
PT_BR = "pt-br"
#: Portuguese (Portugal).
PT_PT = "pt-PT"
#: Queretaro Otomi.
OTQ = "otq"
#: Romanian.
RO = "ro"
#: Russian.
RU = "ru"
#: Samoan.
SM = "sm"
#: Serbian (Cyrillic).
SR_CYRL = "sr-Cyrl"
#: Serbian (Latin).
SR_LATN = "sr-Latn"
#: Slovak.
SK = "sk"
#: Slovenian.
SL = "sl"
#: Spanish.
ES = "es"
#: Swedish.
SV = "sv"
#: Tahitian.
TY = "ty"
#: Tamil.
TA = "ta"
#: Telugu.
TE = "te"
#: Thai.
TH = "th"
#: Tongan.
TO = "to"
#: Turkish.
TR = "tr"
#: Ukrainian.
UK = "uk"
#: Urdu.
UR = "ur"
#: Vietnamese.
VI = "vi"
#: Welsh.
CY = "cy"
#: Yucatec Maya.
YUA = "yua"
#: Irish.
GA = "ga"
#: Kannada.
KN = "kn"
#: Maori.
MI = "mi"
#: Malayalam.
ML = "ml"
#: Punjabi.
PA = "pa"
[docs]class TokenCharacterKind(with_metaclass(_CaseInsensitiveEnumMeta, str, Enum)):
"""Represents classes of characters on which a token filter can operate.
"""
#: Keeps letters in tokens.
LETTER = "letter"
#: Keeps digits in tokens.
DIGIT = "digit"
#: Keeps whitespace in tokens.
WHITESPACE = "whitespace"
#: Keeps punctuation in tokens.
PUNCTUATION = "punctuation"
#: Keeps symbols in tokens.
SYMBOL = "symbol"
[docs]class TokenFilterName(with_metaclass(_CaseInsensitiveEnumMeta, str, Enum)):
"""Defines the names of all token filters supported by Azure Cognitive Search.
"""
#: A token filter that applies the Arabic normalizer to normalize the orthography. See
#: http://lucene.apache.org/core/4_10_3/analyzers-common/org/apache/lucene/analysis/ar/ArabicNormalizationFilter.html.
ARABIC_NORMALIZATION = "arabic_normalization"
#: Strips all characters after an apostrophe (including the apostrophe itself). See
#: http://lucene.apache.org/core/4_10_3/analyzers-common/org/apache/lucene/analysis/tr/ApostropheFilter.html.
APOSTROPHE = "apostrophe"
#: Converts alphabetic, numeric, and symbolic Unicode characters which are not in the first 127
#: ASCII characters (the "Basic Latin" Unicode block) into their ASCII equivalents, if such
#: equivalents exist. See
#: http://lucene.apache.org/core/4_10_3/analyzers-common/org/apache/lucene/analysis/miscellaneous/ASCIIFoldingFilter.html.
ASCII_FOLDING = "asciifolding"
#: Forms bigrams of CJK terms that are generated from the standard tokenizer. See
#: http://lucene.apache.org/core/4_10_3/analyzers-common/org/apache/lucene/analysis/cjk/CJKBigramFilter.html.
CJK_BIGRAM = "cjk_bigram"
#: Normalizes CJK width differences. Folds fullwidth ASCII variants into the equivalent basic
#: Latin, and half-width Katakana variants into the equivalent Kana. See
#: http://lucene.apache.org/core/4_10_3/analyzers-common/org/apache/lucene/analysis/cjk/CJKWidthFilter.html.
CJK_WIDTH = "cjk_width"
#: Removes English possessives, and dots from acronyms. See
#: http://lucene.apache.org/core/4_10_3/analyzers-common/org/apache/lucene/analysis/standard/ClassicFilter.html.
CLASSIC = "classic"
#: Construct bigrams for frequently occurring terms while indexing. Single terms are still indexed
#: too, with bigrams overlaid. See
#: http://lucene.apache.org/core/4_10_3/analyzers-common/org/apache/lucene/analysis/commongrams/CommonGramsFilter.html.
COMMON_GRAM = "common_grams"
#: Generates n-grams of the given size(s) starting from the front or the back of an input token.
#: See
#: http://lucene.apache.org/core/4_10_3/analyzers-common/org/apache/lucene/analysis/ngram/EdgeNGramTokenFilter.html.
EDGE_N_GRAM = "edgeNGram_v2"
#: Removes elisions. For example, "l'avion" (the plane) will be converted to "avion" (plane). See
#: http://lucene.apache.org/core/4_10_3/analyzers-common/org/apache/lucene/analysis/util/ElisionFilter.html.
ELISION = "elision"
#: Normalizes German characters according to the heuristics of the German2 snowball algorithm. See
#: http://lucene.apache.org/core/4_10_3/analyzers-common/org/apache/lucene/analysis/de/GermanNormalizationFilter.html.
GERMAN_NORMALIZATION = "german_normalization"
#: Normalizes text in Hindi to remove some differences in spelling variations. See
#: http://lucene.apache.org/core/4_10_3/analyzers-common/org/apache/lucene/analysis/hi/HindiNormalizationFilter.html.
HINDI_NORMALIZATION = "hindi_normalization"
#: Normalizes the Unicode representation of text in Indian languages. See
#: http://lucene.apache.org/core/4_10_3/analyzers-common/org/apache/lucene/analysis/in/IndicNormalizationFilter.html.
INDIC_NORMALIZATION = "indic_normalization"
#: Emits each incoming token twice, once as keyword and once as non-keyword. See
#: http://lucene.apache.org/core/4_10_3/analyzers-common/org/apache/lucene/analysis/miscellaneous/KeywordRepeatFilter.html.
KEYWORD_REPEAT = "keyword_repeat"
#: A high-performance kstem filter for English. See
#: http://lucene.apache.org/core/4_10_3/analyzers-common/org/apache/lucene/analysis/en/KStemFilter.html.
K_STEM = "kstem"
#: Removes words that are too long or too short. See
#: http://lucene.apache.org/core/4_10_3/analyzers-common/org/apache/lucene/analysis/miscellaneous/LengthFilter.html.
LENGTH = "length"
#: Limits the number of tokens while indexing. See
#: http://lucene.apache.org/core/4_10_3/analyzers-common/org/apache/lucene/analysis/miscellaneous/LimitTokenCountFilter.html.
LIMIT = "limit"
#: Normalizes token text to lower case. See
#: https://lucene.apache.org/core/6_6_1/analyzers-common/org/apache/lucene/analysis/core/LowerCaseFilter.html.
LOWERCASE = "lowercase"
#: Generates n-grams of the given size(s). See
#: http://lucene.apache.org/core/4_10_3/analyzers-common/org/apache/lucene/analysis/ngram/NGramTokenFilter.html.
N_GRAM = "nGram_v2"
#: Applies normalization for Persian. See
#: http://lucene.apache.org/core/4_10_3/analyzers-common/org/apache/lucene/analysis/fa/PersianNormalizationFilter.html.
PERSIAN_NORMALIZATION = "persian_normalization"
#: Create tokens for phonetic matches. See
#: https://lucene.apache.org/core/4_10_3/analyzers-phonetic/org/apache/lucene/analysis/phonetic/package-tree.html.
PHONETIC = "phonetic"
#: Uses the Porter stemming algorithm to transform the token stream. See
#: http://tartarus.org/~martin/PorterStemmer.
PORTER_STEM = "porter_stem"
#: Reverses the token string. See
#: http://lucene.apache.org/core/4_10_3/analyzers-common/org/apache/lucene/analysis/reverse/ReverseStringFilter.html.
REVERSE = "reverse"
#: Normalizes use of the interchangeable Scandinavian characters. See
#: http://lucene.apache.org/core/4_10_3/analyzers-common/org/apache/lucene/analysis/miscellaneous/ScandinavianNormalizationFilter.html.
SCANDINAVIAN_NORMALIZATION = "scandinavian_normalization"
#: Folds Scandinavian characters åÅäæÄÆ->a and öÖøØ->o. It also discriminates against use of
#: double vowels aa, ae, ao, oe and oo, leaving just the first one. See
#: http://lucene.apache.org/core/4_10_3/analyzers-common/org/apache/lucene/analysis/miscellaneous/ScandinavianFoldingFilter.html.
SCANDINAVIAN_FOLDING_NORMALIZATION = "scandinavian_folding"
#: Creates combinations of tokens as a single token. See
#: http://lucene.apache.org/core/4_10_3/analyzers-common/org/apache/lucene/analysis/shingle/ShingleFilter.html.
SHINGLE = "shingle"
#: A filter that stems words using a Snowball-generated stemmer. See
#: http://lucene.apache.org/core/4_10_3/analyzers-common/org/apache/lucene/analysis/snowball/SnowballFilter.html.
SNOWBALL = "snowball"
#: Normalizes the Unicode representation of Sorani text. See
#: http://lucene.apache.org/core/4_10_3/analyzers-common/org/apache/lucene/analysis/ckb/SoraniNormalizationFilter.html.
SORANI_NORMALIZATION = "sorani_normalization"
#: Language specific stemming filter. See
#: https://docs.microsoft.com/rest/api/searchservice/Custom-analyzers-in-Azure-Search#TokenFilters.
STEMMER = "stemmer"
#: Removes stop words from a token stream. See
#: http://lucene.apache.org/core/4_10_3/analyzers-common/org/apache/lucene/analysis/core/StopFilter.html.
STOPWORDS = "stopwords"
#: Trims leading and trailing whitespace from tokens. See
#: http://lucene.apache.org/core/4_10_3/analyzers-common/org/apache/lucene/analysis/miscellaneous/TrimFilter.html.
TRIM = "trim"
#: Truncates the terms to a specific length. See
#: http://lucene.apache.org/core/4_10_3/analyzers-common/org/apache/lucene/analysis/miscellaneous/TruncateTokenFilter.html.
TRUNCATE = "truncate"
#: Filters out tokens with same text as the previous token. See
#: http://lucene.apache.org/core/4_10_3/analyzers-common/org/apache/lucene/analysis/miscellaneous/RemoveDuplicatesTokenFilter.html.
UNIQUE = "unique"
#: Normalizes token text to upper case. See
#: https://lucene.apache.org/core/6_6_1/analyzers-common/org/apache/lucene/analysis/core/UpperCaseFilter.html.
UPPERCASE = "uppercase"
#: Splits words into subwords and performs optional transformations on subword groups.
WORD_DELIMITER = "word_delimiter"
[docs]class VisualFeature(with_metaclass(_CaseInsensitiveEnumMeta, str, Enum)):
"""The strings indicating what visual feature types to return.
"""
#: Visual features recognized as adult persons.
ADULT = "adult"
#: Visual features recognized as commercial brands.
BRANDS = "brands"
#: Categories.
CATEGORIES = "categories"
#: Description.
DESCRIPTION = "description"
#: Visual features recognized as people faces.
FACES = "faces"
#: Visual features recognized as objects.
OBJECTS = "objects"
#: Tags.
TAGS = "tags"