Source code for azure.search.documents.indexes._generated.models._search_client_enums

# coding=utf-8
# --------------------------------------------------------------------------
# Copyright (c) Microsoft Corporation. All rights reserved.
# Licensed under the MIT License. See License.txt in the project root for license information.
# Code generated by Microsoft (R) AutoRest Code Generator.
# Changes may cause incorrect behavior and will be lost if the code is regenerated.
# --------------------------------------------------------------------------

from enum import Enum
from six import with_metaclass
from azure.core import CaseInsensitiveEnumMeta


class BlobIndexerDataToExtract(with_metaclass(CaseInsensitiveEnumMeta, str, Enum)):
    """Specifies the data to extract from Azure blob storage and tells the indexer which data to
    extract from image content when "imageAction" is set to a value other than "none".  This
    applies to embedded image content in a .PDF or other application, or image files such as .jpg
    and .png, in Azure blobs.
    """

    #: Indexes just the standard blob properties and user-specified metadata.
    STORAGE_METADATA = "storageMetadata"
    #: Extracts metadata provided by the Azure blob storage subsystem and the content-type specific
    #: metadata (for example, metadata unique to just .png files are indexed).
    ALL_METADATA = "allMetadata"
    #: Extracts all metadata and textual content from each blob.
    CONTENT_AND_METADATA = "contentAndMetadata"

class BlobIndexerImageAction(with_metaclass(CaseInsensitiveEnumMeta, str, Enum)):
    """Determines how to process embedded images and image files in Azure blob storage.  Setting the
    "imageAction" configuration to any value other than "none" requires that a skillset also be
    attached to that indexer.
    """

    #: Ignores embedded images or image files in the data set.  This is the default.
    NONE = "none"
    #: Extracts text from images (for example, the word "STOP" from a traffic stop sign), and embeds
    #: it into the content field.  This action requires that "dataToExtract" is set to
    #: "contentAndMetadata".  A normalized image refers to additional processing resulting in uniform
    #: image output, sized and rotated to promote consistent rendering when you include images in
    #: visual search results. This information is generated for each image when you use this option.
    GENERATE_NORMALIZED_IMAGES = "generateNormalizedImages"
    #: Extracts text from images (for example, the word "STOP" from a traffic stop sign), and embeds
    #: it into the content field, but treats PDF files differently in that each page will be rendered
    #: as an image and normalized accordingly, instead of extracting embedded images.  Non-PDF file
    #: types will be treated the same as if "generateNormalizedImages" was set.
    GENERATE_NORMALIZED_IMAGE_PER_PAGE = "generateNormalizedImagePerPage"

class BlobIndexerParsingMode(with_metaclass(CaseInsensitiveEnumMeta, str, Enum)):
    """Represents the parsing mode for indexing from an Azure blob data source.
    """

    #: Set to default for normal file processing.
    DEFAULT = "default"
    #: Set to text to improve indexing performance on plain text files in blob storage.
    TEXT = "text"
    #: Set to delimitedText when blobs are plain CSV files.
    DELIMITED_TEXT = "delimitedText"
    #: Set to json to extract structured content from JSON files.
    JSON = "json"
    #: Set to jsonArray to extract individual elements of a JSON array as separate documents in Azure
    #: Cognitive Search.
    JSON_ARRAY = "jsonArray"
    #: Set to jsonLines to extract individual JSON entities, separated by a new line, as separate
    #: documents in Azure Cognitive Search.
    JSON_LINES = "jsonLines"

class BlobIndexerPDFTextRotationAlgorithm(with_metaclass(CaseInsensitiveEnumMeta, str, Enum)):
    """Determines algorithm for text extraction from PDF files in Azure blob storage.
    """

    #: Leverages normal text extraction.  This is the default.
    NONE = "none"
    #: May produce better and more readable text extraction from PDF files that have rotated text
    #: within them.  Note that there may be a small performance speed impact when this parameter is
    #: used.  This parameter only applies to PDF files, and only to PDFs with embedded text.  If the
    #: rotated text appears within an embedded image in the PDF, this parameter does not apply.
    DETECT_ANGLES = "detectAngles"

class CharFilterName(with_metaclass(CaseInsensitiveEnumMeta, str, Enum)):
    """Defines the names of all character filters supported by Azure Cognitive Search.
    """

    #: A character filter that attempts to strip out HTML constructs. See
    #: https://lucene.apache.org/core/4_10_3/analyzers-common/org/apache/lucene/analysis/charfilter/HTMLStripCharFilter.html.
    HTML_STRIP = "html_strip"

class CjkBigramTokenFilterScripts(with_metaclass(CaseInsensitiveEnumMeta, str, Enum)):
    """Scripts that can be ignored by CjkBigramTokenFilter.
    """

    #: Ignore Han script when forming bigrams of CJK terms.
    HAN = "han"
    #: Ignore Hiragana script when forming bigrams of CJK terms.
    HIRAGANA = "hiragana"
    #: Ignore Katakana script when forming bigrams of CJK terms.
    KATAKANA = "katakana"
    #: Ignore Hangul script when forming bigrams of CJK terms.
    HANGUL = "hangul"

[docs]class CustomEntityLookupSkillLanguage(with_metaclass(CaseInsensitiveEnumMeta, str, Enum)): """The language codes supported for input text by CustomEntityLookupSkill. """ #: Danish. DA = "da" #: German. DE = "de" #: English. EN = "en" #: Spanish. ES = "es" #: Finnish. FI = "fi" #: French. FR = "fr" #: Italian. IT = "it" #: Korean. KO = "ko" #: Portuguese. PT = "pt"
[docs]class EdgeNGramTokenFilterSide(with_metaclass(CaseInsensitiveEnumMeta, str, Enum)): """Specifies which side of the input an n-gram should be generated from. """ #: Specifies that the n-gram should be generated from the front of the input. FRONT = "front" #: Specifies that the n-gram should be generated from the back of the input. BACK = "back"
[docs]class EntityCategory(with_metaclass(CaseInsensitiveEnumMeta, str, Enum)): """A string indicating what entity categories to return. """ #: Entities describing a physical location. LOCATION = "location" #: Entities describing an organization. ORGANIZATION = "organization" #: Entities describing a person. PERSON = "person" #: Entities describing a quantity. QUANTITY = "quantity" #: Entities describing a date and time. DATETIME = "datetime" #: Entities describing a URL. URL = "url" #: Entities describing an email address. EMAIL = "email"
[docs]class EntityRecognitionSkillLanguage(with_metaclass(CaseInsensitiveEnumMeta, str, Enum)): """The language codes supported for input text by EntityRecognitionSkill. """ #: Arabic. AR = "ar" #: Czech. CS = "cs" #: Chinese-Simplified. ZH_HANS = "zh-Hans" #: Chinese-Traditional. ZH_HANT = "zh-Hant" #: Danish. DA = "da" #: Dutch. NL = "nl" #: English. EN = "en" #: Finnish. FI = "fi" #: French. FR = "fr" #: German. DE = "de" #: Greek. EL = "el" #: Hungarian. HU = "hu" #: Italian. IT = "it" #: Japanese. JA = "ja" #: Korean. KO = "ko" #: Norwegian (Bokmaal). NO = "no" #: Polish. PL = "pl" #: Portuguese (Portugal). PT_PT = "pt-PT" #: Portuguese (Brazil). PT_BR = "pt-BR" #: Russian. RU = "ru" #: Spanish. ES = "es" #: Swedish. SV = "sv" #: Turkish. TR = "tr"
[docs]class ImageAnalysisSkillLanguage(with_metaclass(CaseInsensitiveEnumMeta, str, Enum)): """The language codes supported for input by ImageAnalysisSkill. """ #: English. EN = "en" #: Spanish. ES = "es" #: Japanese. JA = "ja" #: Portuguese. PT = "pt" #: Chinese. ZH = "zh"
[docs]class ImageDetail(with_metaclass(CaseInsensitiveEnumMeta, str, Enum)): """A string indicating which domain-specific details to return. """ #: Details recognized as celebrities. CELEBRITIES = "celebrities" #: Details recognized as landmarks. LANDMARKS = "landmarks"
class IndexerExecutionEnvironment(with_metaclass(CaseInsensitiveEnumMeta, str, Enum)): """Specifies the environment in which the indexer should execute. """ #: Indicates that Azure Cognitive Search can determine where the indexer should execute. This is #: the default environment when nothing is specified and is the recommended value. STANDARD = "standard" #: Indicates that the indexer should run with the environment provisioned specifically for the #: search service. This should only be specified as the execution environment if the indexer needs #: to access resources securely over shared private link resources. PRIVATE = "private"
[docs]class IndexerExecutionStatus(with_metaclass(CaseInsensitiveEnumMeta, str, Enum)): """Represents the status of an individual indexer execution. """ #: An indexer invocation has failed, but the failure may be transient. Indexer invocations will #: continue per schedule. TRANSIENT_FAILURE = "transientFailure" #: Indexer execution completed successfully. SUCCESS = "success" #: Indexer execution is in progress. IN_PROGRESS = "inProgress" #: Indexer has been reset. RESET = "reset"
class IndexerExecutionStatusDetail(with_metaclass(CaseInsensitiveEnumMeta, str, Enum)): """Details the status of an individual indexer execution. """ #: Indicates that the reset that occurred was for a call to ResetDocs. RESET_DOCS = "resetDocs"
[docs]class IndexerStatus(with_metaclass(CaseInsensitiveEnumMeta, str, Enum)): """Represents the overall indexer status. """ #: Indicates that the indexer is in an unknown state. UNKNOWN = "unknown" #: Indicates that the indexer experienced an error that cannot be corrected without human #: intervention. ERROR = "error" #: Indicates that the indexer is running normally. RUNNING = "running"
class IndexingMode(with_metaclass(CaseInsensitiveEnumMeta, str, Enum)): """Represents the mode the indexer is executing in. """ #: The indexer is indexing all documents in the datasource. INDEXING_ALL_DOCS = "indexingAllDocs" #: The indexer is indexing selective, reset documents in the datasource. The documents being #: indexed are defined on indexer status. INDEXING_RESET_DOCS = "indexingResetDocs"
[docs]class KeyPhraseExtractionSkillLanguage(with_metaclass(CaseInsensitiveEnumMeta, str, Enum)): """The language codes supported for input text by KeyPhraseExtractionSkill. """ #: Danish. DA = "da" #: Dutch. NL = "nl" #: English. EN = "en" #: Finnish. FI = "fi" #: French. FR = "fr" #: German. DE = "de" #: Italian. IT = "it" #: Japanese. JA = "ja" #: Korean. KO = "ko" #: Norwegian (Bokmaal). NO = "no" #: Polish. PL = "pl" #: Portuguese (Portugal). PT_PT = "pt-PT" #: Portuguese (Brazil). PT_BR = "pt-BR" #: Russian. RU = "ru" #: Spanish. ES = "es" #: Swedish. SV = "sv"
class LexicalAnalyzerName(with_metaclass(CaseInsensitiveEnumMeta, str, Enum)): """Defines the names of all text analyzers supported by Azure Cognitive Search. """ #: Microsoft analyzer for Arabic. AR_MICROSOFT = "ar.microsoft" #: Lucene analyzer for Arabic. AR_LUCENE = "ar.lucene" #: Lucene analyzer for Armenian. HY_LUCENE = "hy.lucene" #: Microsoft analyzer for Bangla. BN_MICROSOFT = "bn.microsoft" #: Lucene analyzer for Basque. EU_LUCENE = "eu.lucene" #: Microsoft analyzer for Bulgarian. BG_MICROSOFT = "bg.microsoft" #: Lucene analyzer for Bulgarian. BG_LUCENE = "bg.lucene" #: Microsoft analyzer for Catalan. CA_MICROSOFT = "ca.microsoft" #: Lucene analyzer for Catalan. CA_LUCENE = "ca.lucene" #: Microsoft analyzer for Chinese (Simplified). ZH_HANS_MICROSOFT = "zh-Hans.microsoft" #: Lucene analyzer for Chinese (Simplified). ZH_HANS_LUCENE = "zh-Hans.lucene" #: Microsoft analyzer for Chinese (Traditional). ZH_HANT_MICROSOFT = "zh-Hant.microsoft" #: Lucene analyzer for Chinese (Traditional). ZH_HANT_LUCENE = "zh-Hant.lucene" #: Microsoft analyzer for Croatian. HR_MICROSOFT = "hr.microsoft" #: Microsoft analyzer for Czech. CS_MICROSOFT = "cs.microsoft" #: Lucene analyzer for Czech. CS_LUCENE = "cs.lucene" #: Microsoft analyzer for Danish. DA_MICROSOFT = "da.microsoft" #: Lucene analyzer for Danish. DA_LUCENE = "da.lucene" #: Microsoft analyzer for Dutch. NL_MICROSOFT = "nl.microsoft" #: Lucene analyzer for Dutch. NL_LUCENE = "nl.lucene" #: Microsoft analyzer for English. EN_MICROSOFT = "en.microsoft" #: Lucene analyzer for English. EN_LUCENE = "en.lucene" #: Microsoft analyzer for Estonian. ET_MICROSOFT = "et.microsoft" #: Microsoft analyzer for Finnish. FI_MICROSOFT = "fi.microsoft" #: Lucene analyzer for Finnish. FI_LUCENE = "fi.lucene" #: Microsoft analyzer for French. FR_MICROSOFT = "fr.microsoft" #: Lucene analyzer for French. FR_LUCENE = "fr.lucene" #: Lucene analyzer for Galician. GL_LUCENE = "gl.lucene" #: Microsoft analyzer for German. DE_MICROSOFT = "de.microsoft" #: Lucene analyzer for German. DE_LUCENE = "de.lucene" #: Microsoft analyzer for Greek. EL_MICROSOFT = "el.microsoft" #: Lucene analyzer for Greek. EL_LUCENE = "el.lucene" #: Microsoft analyzer for Gujarati. GU_MICROSOFT = "gu.microsoft" #: Microsoft analyzer for Hebrew. HE_MICROSOFT = "he.microsoft" #: Microsoft analyzer for Hindi. HI_MICROSOFT = "hi.microsoft" #: Lucene analyzer for Hindi. HI_LUCENE = "hi.lucene" #: Microsoft analyzer for Hungarian. HU_MICROSOFT = "hu.microsoft" #: Lucene analyzer for Hungarian. HU_LUCENE = "hu.lucene" #: Microsoft analyzer for Icelandic. IS_MICROSOFT = "is.microsoft" #: Microsoft analyzer for Indonesian (Bahasa). ID_MICROSOFT = "id.microsoft" #: Lucene analyzer for Indonesian. ID_LUCENE = "id.lucene" #: Lucene analyzer for Irish. GA_LUCENE = "ga.lucene" #: Microsoft analyzer for Italian. IT_MICROSOFT = "it.microsoft" #: Lucene analyzer for Italian. IT_LUCENE = "it.lucene" #: Microsoft analyzer for Japanese. JA_MICROSOFT = "ja.microsoft" #: Lucene analyzer for Japanese. JA_LUCENE = "ja.lucene" #: Microsoft analyzer for Kannada. KN_MICROSOFT = "kn.microsoft" #: Microsoft analyzer for Korean. KO_MICROSOFT = "ko.microsoft" #: Lucene analyzer for Korean. KO_LUCENE = "ko.lucene" #: Microsoft analyzer for Latvian. LV_MICROSOFT = "lv.microsoft" #: Lucene analyzer for Latvian. LV_LUCENE = "lv.lucene" #: Microsoft analyzer for Lithuanian. LT_MICROSOFT = "lt.microsoft" #: Microsoft analyzer for Malayalam. ML_MICROSOFT = "ml.microsoft" #: Microsoft analyzer for Malay (Latin). MS_MICROSOFT = "ms.microsoft" #: Microsoft analyzer for Marathi. MR_MICROSOFT = "mr.microsoft" #: Microsoft analyzer for Norwegian (Bokmål). NB_MICROSOFT = "nb.microsoft" #: Lucene analyzer for Norwegian. NO_LUCENE = "no.lucene" #: Lucene analyzer for Persian. FA_LUCENE = "fa.lucene" #: Microsoft analyzer for Polish. PL_MICROSOFT = "pl.microsoft" #: Lucene analyzer for Polish. PL_LUCENE = "pl.lucene" #: Microsoft analyzer for Portuguese (Brazil). PT_BR_MICROSOFT = "pt-BR.microsoft" #: Lucene analyzer for Portuguese (Brazil). PT_BR_LUCENE = "pt-BR.lucene" #: Microsoft analyzer for Portuguese (Portugal). PT_PT_MICROSOFT = "pt-PT.microsoft" #: Lucene analyzer for Portuguese (Portugal). PT_PT_LUCENE = "pt-PT.lucene" #: Microsoft analyzer for Punjabi. PA_MICROSOFT = "pa.microsoft" #: Microsoft analyzer for Romanian. RO_MICROSOFT = "ro.microsoft" #: Lucene analyzer for Romanian. RO_LUCENE = "ro.lucene" #: Microsoft analyzer for Russian. RU_MICROSOFT = "ru.microsoft" #: Lucene analyzer for Russian. RU_LUCENE = "ru.lucene" #: Microsoft analyzer for Serbian (Cyrillic). SR_CYRILLIC_MICROSOFT = "sr-cyrillic.microsoft" #: Microsoft analyzer for Serbian (Latin). SR_LATIN_MICROSOFT = "sr-latin.microsoft" #: Microsoft analyzer for Slovak. SK_MICROSOFT = "sk.microsoft" #: Microsoft analyzer for Slovenian. SL_MICROSOFT = "sl.microsoft" #: Microsoft analyzer for Spanish. ES_MICROSOFT = "es.microsoft" #: Lucene analyzer for Spanish. ES_LUCENE = "es.lucene" #: Microsoft analyzer for Swedish. SV_MICROSOFT = "sv.microsoft" #: Lucene analyzer for Swedish. SV_LUCENE = "sv.lucene" #: Microsoft analyzer for Tamil. TA_MICROSOFT = "ta.microsoft" #: Microsoft analyzer for Telugu. TE_MICROSOFT = "te.microsoft" #: Microsoft analyzer for Thai. TH_MICROSOFT = "th.microsoft" #: Lucene analyzer for Thai. TH_LUCENE = "th.lucene" #: Microsoft analyzer for Turkish. TR_MICROSOFT = "tr.microsoft" #: Lucene analyzer for Turkish. TR_LUCENE = "tr.lucene" #: Microsoft analyzer for Ukrainian. UK_MICROSOFT = "uk.microsoft" #: Microsoft analyzer for Urdu. UR_MICROSOFT = "ur.microsoft" #: Microsoft analyzer for Vietnamese. VI_MICROSOFT = "vi.microsoft" #: Standard Lucene analyzer. STANDARD_LUCENE = "standard.lucene" #: Standard ASCII Folding Lucene analyzer. See #: https://docs.microsoft.com/rest/api/searchservice/Custom-analyzers-in-Azure-Search#Analyzers. STANDARD_ASCII_FOLDING_LUCENE = "standardasciifolding.lucene" #: Treats the entire content of a field as a single token. This is useful for data like zip codes, #: ids, and some product names. See #: http://lucene.apache.org/core/4_10_3/analyzers-common/org/apache/lucene/analysis/core/KeywordAnalyzer.html. KEYWORD = "keyword" #: Flexibly separates text into terms via a regular expression pattern. See #: http://lucene.apache.org/core/4_10_3/analyzers-common/org/apache/lucene/analysis/miscellaneous/PatternAnalyzer.html. PATTERN = "pattern" #: Divides text at non-letters and converts them to lower case. See #: http://lucene.apache.org/core/4_10_3/analyzers-common/org/apache/lucene/analysis/core/SimpleAnalyzer.html. SIMPLE = "simple" #: Divides text at non-letters; Applies the lowercase and stopword token filters. See #: http://lucene.apache.org/core/4_10_3/analyzers-common/org/apache/lucene/analysis/core/StopAnalyzer.html. STOP = "stop" #: An analyzer that uses the whitespace tokenizer. See #: http://lucene.apache.org/core/4_10_3/analyzers-common/org/apache/lucene/analysis/core/WhitespaceAnalyzer.html. WHITESPACE = "whitespace"
[docs]class LexicalNormalizerName(with_metaclass(CaseInsensitiveEnumMeta, str, Enum)): """Defines the names of all text normalizers supported by Azure Cognitive Search. """ #: Converts alphabetic, numeric, and symbolic Unicode characters which are not in the first 127 #: ASCII characters (the "Basic Latin" Unicode block) into their ASCII equivalents, if such #: equivalents exist. See #: http://lucene.apache.org/core/4_10_3/analyzers-common/org/apache/lucene/analysis/miscellaneous/ASCIIFoldingFilter.html. ASCII_FOLDING = "asciifolding" #: Removes elisions. For example, "l'avion" (the plane) will be converted to "avion" (plane). See #: http://lucene.apache.org/core/4_10_3/analyzers-common/org/apache/lucene/analysis/util/ElisionFilter.html. ELISION = "elision" #: Normalizes token text to lowercase. See #: https://lucene.apache.org/core/6_6_1/analyzers-common/org/apache/lucene/analysis/core/LowerCaseFilter.html. LOWERCASE = "lowercase" #: Standard normalizer, which consists of lowercase and asciifolding. See #: http://lucene.apache.org/core/4_10_3/analyzers-common/org/apache/lucene/analysis/reverse/ReverseStringFilter.html. STANDARD = "standard" #: Normalizes token text to uppercase. See #: https://lucene.apache.org/core/6_6_1/analyzers-common/org/apache/lucene/analysis/core/UpperCaseFilter.html. UPPERCASE = "uppercase"
class LexicalTokenizerName(with_metaclass(CaseInsensitiveEnumMeta, str, Enum)): """Defines the names of all tokenizers supported by Azure Cognitive Search. """ #: Grammar-based tokenizer that is suitable for processing most European-language documents. See #: http://lucene.apache.org/core/4_10_3/analyzers-common/org/apache/lucene/analysis/standard/ClassicTokenizer.html. CLASSIC = "classic" #: Tokenizes the input from an edge into n-grams of the given size(s). See #: https://lucene.apache.org/core/4_10_3/analyzers-common/org/apache/lucene/analysis/ngram/EdgeNGramTokenizer.html. EDGE_N_GRAM = "edgeNGram" #: Emits the entire input as a single token. See #: http://lucene.apache.org/core/4_10_3/analyzers-common/org/apache/lucene/analysis/core/KeywordTokenizer.html. KEYWORD = "keyword_v2" #: Divides text at non-letters. See #: http://lucene.apache.org/core/4_10_3/analyzers-common/org/apache/lucene/analysis/core/LetterTokenizer.html. LETTER = "letter" #: Divides text at non-letters and converts them to lower case. See #: http://lucene.apache.org/core/4_10_3/analyzers-common/org/apache/lucene/analysis/core/LowerCaseTokenizer.html. LOWERCASE = "lowercase" #: Divides text using language-specific rules. MICROSOFT_LANGUAGE_TOKENIZER = "microsoft_language_tokenizer" #: Divides text using language-specific rules and reduces words to their base forms. MICROSOFT_LANGUAGE_STEMMING_TOKENIZER = "microsoft_language_stemming_tokenizer" #: Tokenizes the input into n-grams of the given size(s). See #: http://lucene.apache.org/core/4_10_3/analyzers-common/org/apache/lucene/analysis/ngram/NGramTokenizer.html. N_GRAM = "nGram" #: Tokenizer for path-like hierarchies. See #: http://lucene.apache.org/core/4_10_3/analyzers-common/org/apache/lucene/analysis/path/PathHierarchyTokenizer.html. PATH_HIERARCHY = "path_hierarchy_v2" #: Tokenizer that uses regex pattern matching to construct distinct tokens. See #: http://lucene.apache.org/core/4_10_3/analyzers-common/org/apache/lucene/analysis/pattern/PatternTokenizer.html. PATTERN = "pattern" #: Standard Lucene analyzer; Composed of the standard tokenizer, lowercase filter and stop filter. #: See #: http://lucene.apache.org/core/4_10_3/analyzers-common/org/apache/lucene/analysis/standard/StandardTokenizer.html. STANDARD = "standard_v2" #: Tokenizes urls and emails as one token. See #: http://lucene.apache.org/core/4_10_3/analyzers-common/org/apache/lucene/analysis/standard/UAX29URLEmailTokenizer.html. UAX_URL_EMAIL = "uax_url_email" #: Divides text at whitespace. See #: http://lucene.apache.org/core/4_10_3/analyzers-common/org/apache/lucene/analysis/core/WhitespaceTokenizer.html. WHITESPACE = "whitespace" class LineEnding(with_metaclass(CaseInsensitiveEnumMeta, str, Enum)): """Defines the sequence of characters to use between the lines of text recognized by the OCR skill. The default value is "space". """ #: Lines are separated by a single space character. SPACE = "space" #: Lines are separated by a carriage return ('\r') character. CARRIAGE_RETURN = "carriageReturn" #: Lines are separated by a single line feed ('\n') character. LINE_FEED = "lineFeed" #: Lines are separated by a carriage return and a line feed ('\r\n') character. CARRIAGE_RETURN_LINE_FEED = "carriageReturnLineFeed"
[docs]class MicrosoftStemmingTokenizerLanguage(with_metaclass(CaseInsensitiveEnumMeta, str, Enum)): """Lists the languages supported by the Microsoft language stemming tokenizer. """ #: Selects the Microsoft stemming tokenizer for Arabic. ARABIC = "arabic" #: Selects the Microsoft stemming tokenizer for Bangla. BANGLA = "bangla" #: Selects the Microsoft stemming tokenizer for Bulgarian. BULGARIAN = "bulgarian" #: Selects the Microsoft stemming tokenizer for Catalan. CATALAN = "catalan" #: Selects the Microsoft stemming tokenizer for Croatian. CROATIAN = "croatian" #: Selects the Microsoft stemming tokenizer for Czech. CZECH = "czech" #: Selects the Microsoft stemming tokenizer for Danish. DANISH = "danish" #: Selects the Microsoft stemming tokenizer for Dutch. DUTCH = "dutch" #: Selects the Microsoft stemming tokenizer for English. ENGLISH = "english" #: Selects the Microsoft stemming tokenizer for Estonian. ESTONIAN = "estonian" #: Selects the Microsoft stemming tokenizer for Finnish. FINNISH = "finnish" #: Selects the Microsoft stemming tokenizer for French. FRENCH = "french" #: Selects the Microsoft stemming tokenizer for German. GERMAN = "german" #: Selects the Microsoft stemming tokenizer for Greek. GREEK = "greek" #: Selects the Microsoft stemming tokenizer for Gujarati. GUJARATI = "gujarati" #: Selects the Microsoft stemming tokenizer for Hebrew. HEBREW = "hebrew" #: Selects the Microsoft stemming tokenizer for Hindi. HINDI = "hindi" #: Selects the Microsoft stemming tokenizer for Hungarian. HUNGARIAN = "hungarian" #: Selects the Microsoft stemming tokenizer for Icelandic. ICELANDIC = "icelandic" #: Selects the Microsoft stemming tokenizer for Indonesian. INDONESIAN = "indonesian" #: Selects the Microsoft stemming tokenizer for Italian. ITALIAN = "italian" #: Selects the Microsoft stemming tokenizer for Kannada. KANNADA = "kannada" #: Selects the Microsoft stemming tokenizer for Latvian. LATVIAN = "latvian" #: Selects the Microsoft stemming tokenizer for Lithuanian. LITHUANIAN = "lithuanian" #: Selects the Microsoft stemming tokenizer for Malay. MALAY = "malay" #: Selects the Microsoft stemming tokenizer for Malayalam. MALAYALAM = "malayalam" #: Selects the Microsoft stemming tokenizer for Marathi. MARATHI = "marathi" #: Selects the Microsoft stemming tokenizer for Norwegian (Bokmål). NORWEGIAN_BOKMAAL = "norwegianBokmaal" #: Selects the Microsoft stemming tokenizer for Polish. POLISH = "polish" #: Selects the Microsoft stemming tokenizer for Portuguese. PORTUGUESE = "portuguese" #: Selects the Microsoft stemming tokenizer for Portuguese (Brazil). PORTUGUESE_BRAZILIAN = "portugueseBrazilian" #: Selects the Microsoft stemming tokenizer for Punjabi. PUNJABI = "punjabi" #: Selects the Microsoft stemming tokenizer for Romanian. ROMANIAN = "romanian" #: Selects the Microsoft stemming tokenizer for Russian. RUSSIAN = "russian" #: Selects the Microsoft stemming tokenizer for Serbian (Cyrillic). SERBIAN_CYRILLIC = "serbianCyrillic" #: Selects the Microsoft stemming tokenizer for Serbian (Latin). SERBIAN_LATIN = "serbianLatin" #: Selects the Microsoft stemming tokenizer for Slovak. SLOVAK = "slovak" #: Selects the Microsoft stemming tokenizer for Slovenian. SLOVENIAN = "slovenian" #: Selects the Microsoft stemming tokenizer for Spanish. SPANISH = "spanish" #: Selects the Microsoft stemming tokenizer for Swedish. SWEDISH = "swedish" #: Selects the Microsoft stemming tokenizer for Tamil. TAMIL = "tamil" #: Selects the Microsoft stemming tokenizer for Telugu. TELUGU = "telugu" #: Selects the Microsoft stemming tokenizer for Turkish. TURKISH = "turkish" #: Selects the Microsoft stemming tokenizer for Ukrainian. UKRAINIAN = "ukrainian" #: Selects the Microsoft stemming tokenizer for Urdu. URDU = "urdu"
[docs]class MicrosoftTokenizerLanguage(with_metaclass(CaseInsensitiveEnumMeta, str, Enum)): """Lists the languages supported by the Microsoft language tokenizer. """ #: Selects the Microsoft tokenizer for Bangla. BANGLA = "bangla" #: Selects the Microsoft tokenizer for Bulgarian. BULGARIAN = "bulgarian" #: Selects the Microsoft tokenizer for Catalan. CATALAN = "catalan" #: Selects the Microsoft tokenizer for Chinese (Simplified). CHINESE_SIMPLIFIED = "chineseSimplified" #: Selects the Microsoft tokenizer for Chinese (Traditional). CHINESE_TRADITIONAL = "chineseTraditional" #: Selects the Microsoft tokenizer for Croatian. CROATIAN = "croatian" #: Selects the Microsoft tokenizer for Czech. CZECH = "czech" #: Selects the Microsoft tokenizer for Danish. DANISH = "danish" #: Selects the Microsoft tokenizer for Dutch. DUTCH = "dutch" #: Selects the Microsoft tokenizer for English. ENGLISH = "english" #: Selects the Microsoft tokenizer for French. FRENCH = "french" #: Selects the Microsoft tokenizer for German. GERMAN = "german" #: Selects the Microsoft tokenizer for Greek. GREEK = "greek" #: Selects the Microsoft tokenizer for Gujarati. GUJARATI = "gujarati" #: Selects the Microsoft tokenizer for Hindi. HINDI = "hindi" #: Selects the Microsoft tokenizer for Icelandic. ICELANDIC = "icelandic" #: Selects the Microsoft tokenizer for Indonesian. INDONESIAN = "indonesian" #: Selects the Microsoft tokenizer for Italian. ITALIAN = "italian" #: Selects the Microsoft tokenizer for Japanese. JAPANESE = "japanese" #: Selects the Microsoft tokenizer for Kannada. KANNADA = "kannada" #: Selects the Microsoft tokenizer for Korean. KOREAN = "korean" #: Selects the Microsoft tokenizer for Malay. MALAY = "malay" #: Selects the Microsoft tokenizer for Malayalam. MALAYALAM = "malayalam" #: Selects the Microsoft tokenizer for Marathi. MARATHI = "marathi" #: Selects the Microsoft tokenizer for Norwegian (Bokmål). NORWEGIAN_BOKMAAL = "norwegianBokmaal" #: Selects the Microsoft tokenizer for Polish. POLISH = "polish" #: Selects the Microsoft tokenizer for Portuguese. PORTUGUESE = "portuguese" #: Selects the Microsoft tokenizer for Portuguese (Brazil). PORTUGUESE_BRAZILIAN = "portugueseBrazilian" #: Selects the Microsoft tokenizer for Punjabi. PUNJABI = "punjabi" #: Selects the Microsoft tokenizer for Romanian. ROMANIAN = "romanian" #: Selects the Microsoft tokenizer for Russian. RUSSIAN = "russian" #: Selects the Microsoft tokenizer for Serbian (Cyrillic). SERBIAN_CYRILLIC = "serbianCyrillic" #: Selects the Microsoft tokenizer for Serbian (Latin). SERBIAN_LATIN = "serbianLatin" #: Selects the Microsoft tokenizer for Slovenian. SLOVENIAN = "slovenian" #: Selects the Microsoft tokenizer for Spanish. SPANISH = "spanish" #: Selects the Microsoft tokenizer for Swedish. SWEDISH = "swedish" #: Selects the Microsoft tokenizer for Tamil. TAMIL = "tamil" #: Selects the Microsoft tokenizer for Telugu. TELUGU = "telugu" #: Selects the Microsoft tokenizer for Thai. THAI = "thai" #: Selects the Microsoft tokenizer for Ukrainian. UKRAINIAN = "ukrainian" #: Selects the Microsoft tokenizer for Urdu. URDU = "urdu" #: Selects the Microsoft tokenizer for Vietnamese. VIETNAMESE = "vietnamese"
[docs]class OcrSkillLanguage(with_metaclass(CaseInsensitiveEnumMeta, str, Enum)): """The language codes supported for input by OcrSkill. """ #: Chinese-Simplified. ZH_HANS = "zh-Hans" #: Chinese-Traditional. ZH_HANT = "zh-Hant" #: Czech. CS = "cs" #: Danish. DA = "da" #: Dutch. NL = "nl" #: English. EN = "en" #: Finnish. FI = "fi" #: French. FR = "fr" #: German. DE = "de" #: Greek. EL = "el" #: Hungarian. HU = "hu" #: Italian. IT = "it" #: Japanese. JA = "ja" #: Korean. KO = "ko" #: Norwegian (Bokmaal). NB = "nb" #: Polish. PL = "pl" #: Portuguese. PT = "pt" #: Russian. RU = "ru" #: Spanish. ES = "es" #: Swedish. SV = "sv" #: Turkish. TR = "tr" #: Arabic. AR = "ar" #: Romanian. RO = "ro" #: Serbian (Cyrillic, Serbia). SR_CYRL = "sr-Cyrl" #: Serbian (Latin, Serbia). SR_LATN = "sr-Latn" #: Slovak. SK = "sk" #: Unknown. If the language is explicitly set to "unk", the language will be auto-detected. UNK = "unk"
[docs]class PhoneticEncoder(with_metaclass(CaseInsensitiveEnumMeta, str, Enum)): """Identifies the type of phonetic encoder to use with a PhoneticTokenFilter. """ #: Encodes a token into a Metaphone value. METAPHONE = "metaphone" #: Encodes a token into a double metaphone value. DOUBLE_METAPHONE = "doubleMetaphone" #: Encodes a token into a Soundex value. SOUNDEX = "soundex" #: Encodes a token into a Refined Soundex value. REFINED_SOUNDEX = "refinedSoundex" #: Encodes a token into a Caverphone 1.0 value. CAVERPHONE1 = "caverphone1" #: Encodes a token into a Caverphone 2.0 value. CAVERPHONE2 = "caverphone2" #: Encodes a token into a Cologne Phonetic value. COLOGNE = "cologne" #: Encodes a token into a NYSIIS value. NYSIIS = "nysiis" #: Encodes a token using the Kölner Phonetik algorithm. KOELNER_PHONETIK = "koelnerPhonetik" #: Encodes a token using the Haase refinement of the Kölner Phonetik algorithm. HAASE_PHONETIK = "haasePhonetik" #: Encodes a token into a Beider-Morse value. BEIDER_MORSE = "beiderMorse"
[docs]class PIIDetectionSkillMaskingMode(with_metaclass(CaseInsensitiveEnumMeta, str, Enum)): """A string indicating what maskingMode to use to mask the personal information detected in the input text. """ #: No masking occurs and the maskedText output will not be returned. NONE = "none" #: Replaces the detected entities with the character given in the maskingCharacter parameter. The #: character will be repeated to the length of the detected entity so that the offsets will #: correctly correspond to both the input text as well as the output maskedText. REPLACE = "replace"
[docs]class RegexFlags(with_metaclass(CaseInsensitiveEnumMeta, str, Enum)): """Defines flags that can be combined to control how regular expressions are used in the pattern analyzer and pattern tokenizer. """ #: Enables canonical equivalence. CANON_EQ = "CANON_EQ" #: Enables case-insensitive matching. CASE_INSENSITIVE = "CASE_INSENSITIVE" #: Permits whitespace and comments in the pattern. COMMENTS = "COMMENTS" #: Enables dotall mode. DOT_ALL = "DOTALL" #: Enables literal parsing of the pattern. LITERAL = "LITERAL" #: Enables multiline mode. MULTILINE = "MULTILINE" #: Enables Unicode-aware case folding. UNICODE_CASE = "UNICODE_CASE" #: Enables Unix lines mode. UNIX_LINES = "UNIX_LINES"
[docs]class ScoringFunctionAggregation(with_metaclass(CaseInsensitiveEnumMeta, str, Enum)): """Defines the aggregation function used to combine the results of all the scoring functions in a scoring profile. """ #: Boost scores by the sum of all scoring function results. SUM = "sum" #: Boost scores by the average of all scoring function results. AVERAGE = "average" #: Boost scores by the minimum of all scoring function results. MINIMUM = "minimum" #: Boost scores by the maximum of all scoring function results. MAXIMUM = "maximum" #: Boost scores using the first applicable scoring function in the scoring profile. FIRST_MATCHING = "firstMatching"
[docs]class ScoringFunctionInterpolation(with_metaclass(CaseInsensitiveEnumMeta, str, Enum)): """Defines the function used to interpolate score boosting across a range of documents. """ #: Boosts scores by a linearly decreasing amount. This is the default interpolation for scoring #: functions. LINEAR = "linear" #: Boosts scores by a constant factor. CONSTANT = "constant" #: Boosts scores by an amount that decreases quadratically. Boosts decrease slowly for higher #: scores, and more quickly as the scores decrease. This interpolation option is not allowed in #: tag scoring functions. QUADRATIC = "quadratic" #: Boosts scores by an amount that decreases logarithmically. Boosts decrease quickly for higher #: scores, and more slowly as the scores decrease. This interpolation option is not allowed in tag #: scoring functions. LOGARITHMIC = "logarithmic"
class SearchFieldDataType(with_metaclass(CaseInsensitiveEnumMeta, str, Enum)): """Defines the data type of a field in a search index. """ #: Indicates that a field contains a string. STRING = "Edm.String" #: Indicates that a field contains a 32-bit signed integer. INT32 = "Edm.Int32" #: Indicates that a field contains a 64-bit signed integer. INT64 = "Edm.Int64" #: Indicates that a field contains an IEEE double-precision floating point number. DOUBLE = "Edm.Double" #: Indicates that a field contains a Boolean value (true or false). BOOLEAN = "Edm.Boolean" #: Indicates that a field contains a date/time value, including timezone information. DATE_TIME_OFFSET = "Edm.DateTimeOffset" #: Indicates that a field contains a geo-location in terms of longitude and latitude. GEOGRAPHY_POINT = "Edm.GeographyPoint" #: Indicates that a field contains one or more complex objects that in turn have sub-fields of #: other types. COMPLEX = "Edm.ComplexType"
[docs]class SearchIndexerDataSourceType(with_metaclass(CaseInsensitiveEnumMeta, str, Enum)): """Defines the type of a datasource. """ #: Indicates an Azure SQL datasource. AZURE_SQL = "azuresql" #: Indicates a CosmosDB datasource. COSMOS_DB = "cosmosdb" #: Indicates an Azure Blob datasource. AZURE_BLOB = "azureblob" #: Indicates an Azure Table datasource. AZURE_TABLE = "azuretable" #: Indicates a MySql datasource. MY_SQL = "mysql" #: Indicates an ADLS Gen2 datasource. ADLS_GEN2 = "adlsgen2"
[docs]class SentimentSkillLanguage(with_metaclass(CaseInsensitiveEnumMeta, str, Enum)): """The language codes supported for input text by SentimentSkill. """ #: Danish. DA = "da" #: Dutch. NL = "nl" #: English. EN = "en" #: Finnish. FI = "fi" #: French. FR = "fr" #: German. DE = "de" #: Greek. EL = "el" #: Italian. IT = "it" #: Norwegian (Bokmaal). NO = "no" #: Polish. PL = "pl" #: Portuguese (Portugal). PT_PT = "pt-PT" #: Russian. RU = "ru" #: Spanish. ES = "es" #: Swedish. SV = "sv" #: Turkish. TR = "tr"
[docs]class SnowballTokenFilterLanguage(with_metaclass(CaseInsensitiveEnumMeta, str, Enum)): """The language to use for a Snowball token filter. """ #: Selects the Lucene Snowball stemming tokenizer for Armenian. ARMENIAN = "armenian" #: Selects the Lucene Snowball stemming tokenizer for Basque. BASQUE = "basque" #: Selects the Lucene Snowball stemming tokenizer for Catalan. CATALAN = "catalan" #: Selects the Lucene Snowball stemming tokenizer for Danish. DANISH = "danish" #: Selects the Lucene Snowball stemming tokenizer for Dutch. DUTCH = "dutch" #: Selects the Lucene Snowball stemming tokenizer for English. ENGLISH = "english" #: Selects the Lucene Snowball stemming tokenizer for Finnish. FINNISH = "finnish" #: Selects the Lucene Snowball stemming tokenizer for French. FRENCH = "french" #: Selects the Lucene Snowball stemming tokenizer for German. GERMAN = "german" #: Selects the Lucene Snowball stemming tokenizer that uses the German variant algorithm. GERMAN2 = "german2" #: Selects the Lucene Snowball stemming tokenizer for Hungarian. HUNGARIAN = "hungarian" #: Selects the Lucene Snowball stemming tokenizer for Italian. ITALIAN = "italian" #: Selects the Lucene Snowball stemming tokenizer for Dutch that uses the Kraaij-Pohlmann stemming #: algorithm. KP = "kp" #: Selects the Lucene Snowball stemming tokenizer for English that uses the Lovins stemming #: algorithm. LOVINS = "lovins" #: Selects the Lucene Snowball stemming tokenizer for Norwegian. NORWEGIAN = "norwegian" #: Selects the Lucene Snowball stemming tokenizer for English that uses the Porter stemming #: algorithm. PORTER = "porter" #: Selects the Lucene Snowball stemming tokenizer for Portuguese. PORTUGUESE = "portuguese" #: Selects the Lucene Snowball stemming tokenizer for Romanian. ROMANIAN = "romanian" #: Selects the Lucene Snowball stemming tokenizer for Russian. RUSSIAN = "russian" #: Selects the Lucene Snowball stemming tokenizer for Spanish. SPANISH = "spanish" #: Selects the Lucene Snowball stemming tokenizer for Swedish. SWEDISH = "swedish" #: Selects the Lucene Snowball stemming tokenizer for Turkish. TURKISH = "turkish"
[docs]class SplitSkillLanguage(with_metaclass(CaseInsensitiveEnumMeta, str, Enum)): """The language codes supported for input text by SplitSkill. """ #: Danish. DA = "da" #: German. DE = "de" #: English. EN = "en" #: Spanish. ES = "es" #: Finnish. FI = "fi" #: French. FR = "fr" #: Italian. IT = "it" #: Korean. KO = "ko" #: Portuguese. PT = "pt"
[docs]class StemmerTokenFilterLanguage(with_metaclass(CaseInsensitiveEnumMeta, str, Enum)): """The language to use for a stemmer token filter. """ #: Selects the Lucene stemming tokenizer for Arabic. ARABIC = "arabic" #: Selects the Lucene stemming tokenizer for Armenian. ARMENIAN = "armenian" #: Selects the Lucene stemming tokenizer for Basque. BASQUE = "basque" #: Selects the Lucene stemming tokenizer for Portuguese (Brazil). BRAZILIAN = "brazilian" #: Selects the Lucene stemming tokenizer for Bulgarian. BULGARIAN = "bulgarian" #: Selects the Lucene stemming tokenizer for Catalan. CATALAN = "catalan" #: Selects the Lucene stemming tokenizer for Czech. CZECH = "czech" #: Selects the Lucene stemming tokenizer for Danish. DANISH = "danish" #: Selects the Lucene stemming tokenizer for Dutch. DUTCH = "dutch" #: Selects the Lucene stemming tokenizer for Dutch that uses the Kraaij-Pohlmann stemming #: algorithm. DUTCH_KP = "dutchKp" #: Selects the Lucene stemming tokenizer for English. ENGLISH = "english" #: Selects the Lucene stemming tokenizer for English that does light stemming. LIGHT_ENGLISH = "lightEnglish" #: Selects the Lucene stemming tokenizer for English that does minimal stemming. MINIMAL_ENGLISH = "minimalEnglish" #: Selects the Lucene stemming tokenizer for English that removes trailing possessives from words. POSSESSIVE_ENGLISH = "possessiveEnglish" #: Selects the Lucene stemming tokenizer for English that uses the Porter2 stemming algorithm. PORTER2 = "porter2" #: Selects the Lucene stemming tokenizer for English that uses the Lovins stemming algorithm. LOVINS = "lovins" #: Selects the Lucene stemming tokenizer for Finnish. FINNISH = "finnish" #: Selects the Lucene stemming tokenizer for Finnish that does light stemming. LIGHT_FINNISH = "lightFinnish" #: Selects the Lucene stemming tokenizer for French. FRENCH = "french" #: Selects the Lucene stemming tokenizer for French that does light stemming. LIGHT_FRENCH = "lightFrench" #: Selects the Lucene stemming tokenizer for French that does minimal stemming. MINIMAL_FRENCH = "minimalFrench" #: Selects the Lucene stemming tokenizer for Galician. GALICIAN = "galician" #: Selects the Lucene stemming tokenizer for Galician that does minimal stemming. MINIMAL_GALICIAN = "minimalGalician" #: Selects the Lucene stemming tokenizer for German. GERMAN = "german" #: Selects the Lucene stemming tokenizer that uses the German variant algorithm. GERMAN2 = "german2" #: Selects the Lucene stemming tokenizer for German that does light stemming. LIGHT_GERMAN = "lightGerman" #: Selects the Lucene stemming tokenizer for German that does minimal stemming. MINIMAL_GERMAN = "minimalGerman" #: Selects the Lucene stemming tokenizer for Greek. GREEK = "greek" #: Selects the Lucene stemming tokenizer for Hindi. HINDI = "hindi" #: Selects the Lucene stemming tokenizer for Hungarian. HUNGARIAN = "hungarian" #: Selects the Lucene stemming tokenizer for Hungarian that does light stemming. LIGHT_HUNGARIAN = "lightHungarian" #: Selects the Lucene stemming tokenizer for Indonesian. INDONESIAN = "indonesian" #: Selects the Lucene stemming tokenizer for Irish. IRISH = "irish" #: Selects the Lucene stemming tokenizer for Italian. ITALIAN = "italian" #: Selects the Lucene stemming tokenizer for Italian that does light stemming. LIGHT_ITALIAN = "lightItalian" #: Selects the Lucene stemming tokenizer for Sorani. SORANI = "sorani" #: Selects the Lucene stemming tokenizer for Latvian. LATVIAN = "latvian" #: Selects the Lucene stemming tokenizer for Norwegian (Bokmål). NORWEGIAN = "norwegian" #: Selects the Lucene stemming tokenizer for Norwegian (Bokmål) that does light stemming. LIGHT_NORWEGIAN = "lightNorwegian" #: Selects the Lucene stemming tokenizer for Norwegian (Bokmål) that does minimal stemming. MINIMAL_NORWEGIAN = "minimalNorwegian" #: Selects the Lucene stemming tokenizer for Norwegian (Nynorsk) that does light stemming. LIGHT_NYNORSK = "lightNynorsk" #: Selects the Lucene stemming tokenizer for Norwegian (Nynorsk) that does minimal stemming. MINIMAL_NYNORSK = "minimalNynorsk" #: Selects the Lucene stemming tokenizer for Portuguese. PORTUGUESE = "portuguese" #: Selects the Lucene stemming tokenizer for Portuguese that does light stemming. LIGHT_PORTUGUESE = "lightPortuguese" #: Selects the Lucene stemming tokenizer for Portuguese that does minimal stemming. MINIMAL_PORTUGUESE = "minimalPortuguese" #: Selects the Lucene stemming tokenizer for Portuguese that uses the RSLP stemming algorithm. PORTUGUESE_RSLP = "portugueseRslp" #: Selects the Lucene stemming tokenizer for Romanian. ROMANIAN = "romanian" #: Selects the Lucene stemming tokenizer for Russian. RUSSIAN = "russian" #: Selects the Lucene stemming tokenizer for Russian that does light stemming. LIGHT_RUSSIAN = "lightRussian" #: Selects the Lucene stemming tokenizer for Spanish. SPANISH = "spanish" #: Selects the Lucene stemming tokenizer for Spanish that does light stemming. LIGHT_SPANISH = "lightSpanish" #: Selects the Lucene stemming tokenizer for Swedish. SWEDISH = "swedish" #: Selects the Lucene stemming tokenizer for Swedish that does light stemming. LIGHT_SWEDISH = "lightSwedish" #: Selects the Lucene stemming tokenizer for Turkish. TURKISH = "turkish"
[docs]class StopwordsList(with_metaclass(CaseInsensitiveEnumMeta, str, Enum)): """Identifies a predefined list of language-specific stopwords. """ #: Selects the stopword list for Arabic. ARABIC = "arabic" #: Selects the stopword list for Armenian. ARMENIAN = "armenian" #: Selects the stopword list for Basque. BASQUE = "basque" #: Selects the stopword list for Portuguese (Brazil). BRAZILIAN = "brazilian" #: Selects the stopword list for Bulgarian. BULGARIAN = "bulgarian" #: Selects the stopword list for Catalan. CATALAN = "catalan" #: Selects the stopword list for Czech. CZECH = "czech" #: Selects the stopword list for Danish. DANISH = "danish" #: Selects the stopword list for Dutch. DUTCH = "dutch" #: Selects the stopword list for English. ENGLISH = "english" #: Selects the stopword list for Finnish. FINNISH = "finnish" #: Selects the stopword list for French. FRENCH = "french" #: Selects the stopword list for Galician. GALICIAN = "galician" #: Selects the stopword list for German. GERMAN = "german" #: Selects the stopword list for Greek. GREEK = "greek" #: Selects the stopword list for Hindi. HINDI = "hindi" #: Selects the stopword list for Hungarian. HUNGARIAN = "hungarian" #: Selects the stopword list for Indonesian. INDONESIAN = "indonesian" #: Selects the stopword list for Irish. IRISH = "irish" #: Selects the stopword list for Italian. ITALIAN = "italian" #: Selects the stopword list for Latvian. LATVIAN = "latvian" #: Selects the stopword list for Norwegian. NORWEGIAN = "norwegian" #: Selects the stopword list for Persian. PERSIAN = "persian" #: Selects the stopword list for Portuguese. PORTUGUESE = "portuguese" #: Selects the stopword list for Romanian. ROMANIAN = "romanian" #: Selects the stopword list for Russian. RUSSIAN = "russian" #: Selects the stopword list for Sorani. SORANI = "sorani" #: Selects the stopword list for Spanish. SPANISH = "spanish" #: Selects the stopword list for Swedish. SWEDISH = "swedish" #: Selects the stopword list for Thai. THAI = "thai" #: Selects the stopword list for Turkish. TURKISH = "turkish"
[docs]class TextSplitMode(with_metaclass(CaseInsensitiveEnumMeta, str, Enum)): """A value indicating which split mode to perform. """ #: Split the text into individual pages. PAGES = "pages" #: Split the text into individual sentences. SENTENCES = "sentences"
[docs]class TextTranslationSkillLanguage(with_metaclass(CaseInsensitiveEnumMeta, str, Enum)): """The language codes supported for input text by TextTranslationSkill. """ #: Afrikaans. AF = "af" #: Arabic. AR = "ar" #: Bangla. BN = "bn" #: Bosnian (Latin). BS = "bs" #: Bulgarian. BG = "bg" #: Cantonese (Traditional). YUE = "yue" #: Catalan. CA = "ca" #: Chinese Simplified. ZH_HANS = "zh-Hans" #: Chinese Traditional. ZH_HANT = "zh-Hant" #: Croatian. HR = "hr" #: Czech. CS = "cs" #: Danish. DA = "da" #: Dutch. NL = "nl" #: English. EN = "en" #: Estonian. ET = "et" #: Fijian. FJ = "fj" #: Filipino. FIL = "fil" #: Finnish. FI = "fi" #: French. FR = "fr" #: German. DE = "de" #: Greek. EL = "el" #: Haitian Creole. HT = "ht" #: Hebrew. HE = "he" #: Hindi. HI = "hi" #: Hmong Daw. MWW = "mww" #: Hungarian. HU = "hu" #: Icelandic. IS_ENUM = "is" #: Indonesian. ID = "id" #: Italian. IT = "it" #: Japanese. JA = "ja" #: Kiswahili. SW = "sw" #: Klingon. TLH = "tlh" #: Klingon (Latin script). TLH_LATN = "tlh-Latn" #: Klingon (Klingon script). TLH_PIQD = "tlh-Piqd" #: Korean. KO = "ko" #: Latvian. LV = "lv" #: Lithuanian. LT = "lt" #: Malagasy. MG = "mg" #: Malay. MS = "ms" #: Maltese. MT = "mt" #: Norwegian. NB = "nb" #: Persian. FA = "fa" #: Polish. PL = "pl" #: Portuguese. PT = "pt" #: Portuguese (Brazil). PT_BR = "pt-br" #: Portuguese (Portugal). PT_PT = "pt-PT" #: Queretaro Otomi. OTQ = "otq" #: Romanian. RO = "ro" #: Russian. RU = "ru" #: Samoan. SM = "sm" #: Serbian (Cyrillic). SR_CYRL = "sr-Cyrl" #: Serbian (Latin). SR_LATN = "sr-Latn" #: Slovak. SK = "sk" #: Slovenian. SL = "sl" #: Spanish. ES = "es" #: Swedish. SV = "sv" #: Tahitian. TY = "ty" #: Tamil. TA = "ta" #: Telugu. TE = "te" #: Thai. TH = "th" #: Tongan. TO = "to" #: Turkish. TR = "tr" #: Ukrainian. UK = "uk" #: Urdu. UR = "ur" #: Vietnamese. VI = "vi" #: Welsh. CY = "cy" #: Yucatec Maya. YUA = "yua" #: Irish. GA = "ga" #: Kannada. KN = "kn" #: Maori. MI = "mi" #: Malayalam. ML = "ml" #: Punjabi. PA = "pa"
[docs]class TokenCharacterKind(with_metaclass(CaseInsensitiveEnumMeta, str, Enum)): """Represents classes of characters on which a token filter can operate. """ #: Keeps letters in tokens. LETTER = "letter" #: Keeps digits in tokens. DIGIT = "digit" #: Keeps whitespace in tokens. WHITESPACE = "whitespace" #: Keeps punctuation in tokens. PUNCTUATION = "punctuation" #: Keeps symbols in tokens. SYMBOL = "symbol"
[docs]class TokenFilterName(with_metaclass(CaseInsensitiveEnumMeta, str, Enum)): """Defines the names of all token filters supported by Azure Cognitive Search. """ #: A token filter that applies the Arabic normalizer to normalize the orthography. See #: http://lucene.apache.org/core/4_10_3/analyzers-common/org/apache/lucene/analysis/ar/ArabicNormalizationFilter.html. ARABIC_NORMALIZATION = "arabic_normalization" #: Strips all characters after an apostrophe (including the apostrophe itself). See #: http://lucene.apache.org/core/4_10_3/analyzers-common/org/apache/lucene/analysis/tr/ApostropheFilter.html. APOSTROPHE = "apostrophe" #: Converts alphabetic, numeric, and symbolic Unicode characters which are not in the first 127 #: ASCII characters (the "Basic Latin" Unicode block) into their ASCII equivalents, if such #: equivalents exist. See #: http://lucene.apache.org/core/4_10_3/analyzers-common/org/apache/lucene/analysis/miscellaneous/ASCIIFoldingFilter.html. ASCII_FOLDING = "asciifolding" #: Forms bigrams of CJK terms that are generated from the standard tokenizer. See #: http://lucene.apache.org/core/4_10_3/analyzers-common/org/apache/lucene/analysis/cjk/CJKBigramFilter.html. CJK_BIGRAM = "cjk_bigram" #: Normalizes CJK width differences. Folds fullwidth ASCII variants into the equivalent basic #: Latin, and half-width Katakana variants into the equivalent Kana. See #: http://lucene.apache.org/core/4_10_3/analyzers-common/org/apache/lucene/analysis/cjk/CJKWidthFilter.html. CJK_WIDTH = "cjk_width" #: Removes English possessives, and dots from acronyms. See #: http://lucene.apache.org/core/4_10_3/analyzers-common/org/apache/lucene/analysis/standard/ClassicFilter.html. CLASSIC = "classic" #: Construct bigrams for frequently occurring terms while indexing. Single terms are still indexed #: too, with bigrams overlaid. See #: http://lucene.apache.org/core/4_10_3/analyzers-common/org/apache/lucene/analysis/commongrams/CommonGramsFilter.html. COMMON_GRAM = "common_grams" #: Generates n-grams of the given size(s) starting from the front or the back of an input token. #: See #: http://lucene.apache.org/core/4_10_3/analyzers-common/org/apache/lucene/analysis/ngram/EdgeNGramTokenFilter.html. EDGE_N_GRAM = "edgeNGram_v2" #: Removes elisions. For example, "l'avion" (the plane) will be converted to "avion" (plane). See #: http://lucene.apache.org/core/4_10_3/analyzers-common/org/apache/lucene/analysis/util/ElisionFilter.html. ELISION = "elision" #: Normalizes German characters according to the heuristics of the German2 snowball algorithm. See #: http://lucene.apache.org/core/4_10_3/analyzers-common/org/apache/lucene/analysis/de/GermanNormalizationFilter.html. GERMAN_NORMALIZATION = "german_normalization" #: Normalizes text in Hindi to remove some differences in spelling variations. See #: http://lucene.apache.org/core/4_10_3/analyzers-common/org/apache/lucene/analysis/hi/HindiNormalizationFilter.html. HINDI_NORMALIZATION = "hindi_normalization" #: Normalizes the Unicode representation of text in Indian languages. See #: http://lucene.apache.org/core/4_10_3/analyzers-common/org/apache/lucene/analysis/in/IndicNormalizationFilter.html. INDIC_NORMALIZATION = "indic_normalization" #: Emits each incoming token twice, once as keyword and once as non-keyword. See #: http://lucene.apache.org/core/4_10_3/analyzers-common/org/apache/lucene/analysis/miscellaneous/KeywordRepeatFilter.html. KEYWORD_REPEAT = "keyword_repeat" #: A high-performance kstem filter for English. See #: http://lucene.apache.org/core/4_10_3/analyzers-common/org/apache/lucene/analysis/en/KStemFilter.html. K_STEM = "kstem" #: Removes words that are too long or too short. See #: http://lucene.apache.org/core/4_10_3/analyzers-common/org/apache/lucene/analysis/miscellaneous/LengthFilter.html. LENGTH = "length" #: Limits the number of tokens while indexing. See #: http://lucene.apache.org/core/4_10_3/analyzers-common/org/apache/lucene/analysis/miscellaneous/LimitTokenCountFilter.html. LIMIT = "limit" #: Normalizes token text to lower case. See #: https://lucene.apache.org/core/6_6_1/analyzers-common/org/apache/lucene/analysis/core/LowerCaseFilter.html. LOWERCASE = "lowercase" #: Generates n-grams of the given size(s). See #: http://lucene.apache.org/core/4_10_3/analyzers-common/org/apache/lucene/analysis/ngram/NGramTokenFilter.html. N_GRAM = "nGram_v2" #: Applies normalization for Persian. See #: http://lucene.apache.org/core/4_10_3/analyzers-common/org/apache/lucene/analysis/fa/PersianNormalizationFilter.html. PERSIAN_NORMALIZATION = "persian_normalization" #: Create tokens for phonetic matches. See #: https://lucene.apache.org/core/4_10_3/analyzers-phonetic/org/apache/lucene/analysis/phonetic/package-tree.html. PHONETIC = "phonetic" #: Uses the Porter stemming algorithm to transform the token stream. See #: http://tartarus.org/~martin/PorterStemmer. PORTER_STEM = "porter_stem" #: Reverses the token string. See #: http://lucene.apache.org/core/4_10_3/analyzers-common/org/apache/lucene/analysis/reverse/ReverseStringFilter.html. REVERSE = "reverse" #: Normalizes use of the interchangeable Scandinavian characters. See #: http://lucene.apache.org/core/4_10_3/analyzers-common/org/apache/lucene/analysis/miscellaneous/ScandinavianNormalizationFilter.html. SCANDINAVIAN_NORMALIZATION = "scandinavian_normalization" #: Folds Scandinavian characters åÅäæÄÆ->a and öÖøØ->o. It also discriminates against use of #: double vowels aa, ae, ao, oe and oo, leaving just the first one. See #: http://lucene.apache.org/core/4_10_3/analyzers-common/org/apache/lucene/analysis/miscellaneous/ScandinavianFoldingFilter.html. SCANDINAVIAN_FOLDING_NORMALIZATION = "scandinavian_folding" #: Creates combinations of tokens as a single token. See #: http://lucene.apache.org/core/4_10_3/analyzers-common/org/apache/lucene/analysis/shingle/ShingleFilter.html. SHINGLE = "shingle" #: A filter that stems words using a Snowball-generated stemmer. See #: http://lucene.apache.org/core/4_10_3/analyzers-common/org/apache/lucene/analysis/snowball/SnowballFilter.html. SNOWBALL = "snowball" #: Normalizes the Unicode representation of Sorani text. See #: http://lucene.apache.org/core/4_10_3/analyzers-common/org/apache/lucene/analysis/ckb/SoraniNormalizationFilter.html. SORANI_NORMALIZATION = "sorani_normalization" #: Language specific stemming filter. See #: https://docs.microsoft.com/rest/api/searchservice/Custom-analyzers-in-Azure-Search#TokenFilters. STEMMER = "stemmer" #: Removes stop words from a token stream. See #: http://lucene.apache.org/core/4_10_3/analyzers-common/org/apache/lucene/analysis/core/StopFilter.html. STOPWORDS = "stopwords" #: Trims leading and trailing whitespace from tokens. See #: http://lucene.apache.org/core/4_10_3/analyzers-common/org/apache/lucene/analysis/miscellaneous/TrimFilter.html. TRIM = "trim" #: Truncates the terms to a specific length. See #: http://lucene.apache.org/core/4_10_3/analyzers-common/org/apache/lucene/analysis/miscellaneous/TruncateTokenFilter.html. TRUNCATE = "truncate" #: Filters out tokens with same text as the previous token. See #: http://lucene.apache.org/core/4_10_3/analyzers-common/org/apache/lucene/analysis/miscellaneous/RemoveDuplicatesTokenFilter.html. UNIQUE = "unique" #: Normalizes token text to upper case. See #: https://lucene.apache.org/core/6_6_1/analyzers-common/org/apache/lucene/analysis/core/UpperCaseFilter.html. UPPERCASE = "uppercase" #: Splits words into subwords and performs optional transformations on subword groups. WORD_DELIMITER = "word_delimiter"
[docs]class VisualFeature(with_metaclass(CaseInsensitiveEnumMeta, str, Enum)): """The strings indicating what visual feature types to return. """ #: Visual features recognized as adult persons. ADULT = "adult" #: Visual features recognized as commercial brands. BRANDS = "brands" #: Categories. CATEGORIES = "categories" #: Description. DESCRIPTION = "description" #: Visual features recognized as people faces. FACES = "faces" #: Visual features recognized as objects. OBJECTS = "objects" #: Tags. TAGS = "tags"