Source code for azure.search.documents.indexes._internal._generated.models._search_service_client_enums

# coding=utf-8
# --------------------------------------------------------------------------
# Copyright (c) Microsoft Corporation. All rights reserved.
# Licensed under the MIT License. See License.txt in the project root for license information.
# Code generated by Microsoft (R) AutoRest Code Generator.
# Changes may cause incorrect behavior and will be lost if the code is regenerated.
# --------------------------------------------------------------------------

from enum import Enum, EnumMeta
from six import with_metaclass

class _CaseInsensitiveEnumMeta(EnumMeta):
    def __getitem__(self, name):
        return super().__getitem__(name.upper())

    def __getattr__(cls, name):
        """Return the enum member matching `name`
        We use __getattr__ instead of descriptors or inserting into the enum
        class' __dict__ in order to support `name` and `value` being both
        properties for enum members (which live in the class' __dict__) and
        enum members themselves.
        """
        try:
            return cls._member_map_[name.upper()]
        except KeyError:
            raise AttributeError(name)


class BlobIndexerDataToExtract(with_metaclass(_CaseInsensitiveEnumMeta, str, Enum)):
    """Specifies the data to extract from Azure blob storage and tells the indexer which data to
    extract from image content when "imageAction" is set to a value other than "none".  This
    applies to embedded image content in a .PDF or other application, or image files such as .jpg
    and .png, in Azure blobs.
    """

    STORAGE_METADATA = "storageMetadata"  #: Indexes just the standard blob properties and user-specified metadata.
    ALL_METADATA = "allMetadata"  #: Extracts metadata provided by the Azure blob storage subsystem and the content-type specific metadata (for example, metadata unique to just .png files are indexed).
    CONTENT_AND_METADATA = "contentAndMetadata"  #: Extracts all metadata and textual content from each blob.

class BlobIndexerImageAction(with_metaclass(_CaseInsensitiveEnumMeta, str, Enum)):
    """Determines how to process embedded images and image files in Azure blob storage.  Setting the
    "imageAction" configuration to any value other than "none" requires that a skillset also be
    attached to that indexer.
    """

    NONE = "none"  #: Ignores embedded images or image files in the data set.  This is the default.
    GENERATE_NORMALIZED_IMAGES = "generateNormalizedImages"  #: Extracts text from images (for example, the word "STOP" from a traffic stop sign), and embeds it into the content field.  This action requires that "dataToExtract" is set to "contentAndMetadata".  A normalized image refers to additional processing resulting in uniform image output, sized and rotated to promote consistent rendering when you include images in visual search results. This information is generated for each image when you use this option.
    GENERATE_NORMALIZED_IMAGE_PER_PAGE = "generateNormalizedImagePerPage"  #: Extracts text from images (for example, the word "STOP" from a traffic stop sign), and embeds it into the content field, but treats PDF files differently in that each page will be rendered as an image and normalized accordingly, instead of extracting embedded images.  Non-PDF file types will be treated the same as if "generateNormalizedImages" was set.

class BlobIndexerParsingMode(with_metaclass(_CaseInsensitiveEnumMeta, str, Enum)):
    """Represents the parsing mode for indexing from an Azure blob data source.
    """

    DEFAULT = "default"  #: Set to default for normal file processing.
    TEXT = "text"  #: Set to text to improve indexing performance on plain text files in blob storage.
    DELIMITED_TEXT = "delimitedText"  #: Set to delimitedText when blobs are plain CSV files.
    JSON = "json"  #: Set to json to extract structured content from JSON files.
    JSON_ARRAY = "jsonArray"  #: Set to jsonArray to extract individual elements of a JSON array as separate documents in Azure Cognitive Search.
    JSON_LINES = "jsonLines"  #: Set to jsonLines to extract individual JSON entities, separated by a new line, as separate documents in Azure Cognitive Search.

class BlobIndexerPDFTextRotationAlgorithm(with_metaclass(_CaseInsensitiveEnumMeta, str, Enum)):
    """Determines algorithm for text extraction from PDF files in Azure blob storage.
    """

    NONE = "none"  #: Leverages normal text extraction.  This is the default.
    DETECT_ANGLES = "detectAngles"  #: May produce better and more readable text extraction from PDF files that have rotated text within them.  Note that there may be a small performance speed impact when this parameter is used.  This parameter only applies to PDF files, and only to PDFs with embedded text.  If the rotated text appears within an embedded image in the PDF, this parameter does not apply.

class CharFilterName(with_metaclass(_CaseInsensitiveEnumMeta, str, Enum)):
    """Defines the names of all character filters supported by Azure Cognitive Search.
    """

    HTML_STRIP = "html_strip"  #: A character filter that attempts to strip out HTML constructs. See https://lucene.apache.org/core/4_10_3/analyzers-common/org/apache/lucene/analysis/charfilter/HTMLStripCharFilter.html.

class CjkBigramTokenFilterScripts(with_metaclass(_CaseInsensitiveEnumMeta, str, Enum)):
    """Scripts that can be ignored by CjkBigramTokenFilter.
    """

    HAN = "han"  #: Ignore Han script when forming bigrams of CJK terms.
    HIRAGANA = "hiragana"  #: Ignore Hiragana script when forming bigrams of CJK terms.
    KATAKANA = "katakana"  #: Ignore Katakana script when forming bigrams of CJK terms.
    HANGUL = "hangul"  #: Ignore Hangul script when forming bigrams of CJK terms.

[docs]class EdgeNGramTokenFilterSide(with_metaclass(_CaseInsensitiveEnumMeta, str, Enum)): """Specifies which side of the input an n-gram should be generated from. """ FRONT = "front" #: Specifies that the n-gram should be generated from the front of the input. BACK = "back" #: Specifies that the n-gram should be generated from the back of the input.
[docs]class EntityCategory(with_metaclass(_CaseInsensitiveEnumMeta, str, Enum)): """A string indicating what entity categories to return. """ LOCATION = "location" #: Entities describing a physical location. ORGANIZATION = "organization" #: Entities describing an organization. PERSON = "person" #: Entities describing a person. QUANTITY = "quantity" #: Entities describing a quantity. DATETIME = "datetime" #: Entities describing a date and time. URL = "url" #: Entities describing a URL. EMAIL = "email" #: Entities describing an email address.
[docs]class EntityRecognitionSkillLanguage(with_metaclass(_CaseInsensitiveEnumMeta, str, Enum)): """The language codes supported for input text by EntityRecognitionSkill. """ AR = "ar" #: Arabic. CS = "cs" #: Czech. ZH_HANS = "zh-Hans" #: Chinese-Simplified. ZH_HANT = "zh-Hant" #: Chinese-Traditional. DA = "da" #: Danish. NL = "nl" #: Dutch. EN = "en" #: English. FI = "fi" #: Finnish. FR = "fr" #: French. DE = "de" #: German. EL = "el" #: Greek. HU = "hu" #: Hungarian. IT = "it" #: Italian. JA = "ja" #: Japanese. KO = "ko" #: Korean. NO = "no" #: Norwegian (Bokmaal). PL = "pl" #: Polish. PT_PT = "pt-PT" #: Portuguese (Portugal). PT_BR = "pt-BR" #: Portuguese (Brazil). RU = "ru" #: Russian. ES = "es" #: Spanish. SV = "sv" #: Swedish. TR = "tr" #: Turkish.
[docs]class ImageAnalysisSkillLanguage(with_metaclass(_CaseInsensitiveEnumMeta, str, Enum)): """The language codes supported for input by ImageAnalysisSkill. """ EN = "en" #: English. ES = "es" #: Spanish. JA = "ja" #: Japanese. PT = "pt" #: Portuguese. ZH = "zh" #: Chinese.
[docs]class ImageDetail(with_metaclass(_CaseInsensitiveEnumMeta, str, Enum)): """A string indicating which domain-specific details to return. """ CELEBRITIES = "celebrities" #: Details recognized as celebrities. LANDMARKS = "landmarks" #: Details recognized as landmarks.
class IndexerExecutionEnvironment(with_metaclass(_CaseInsensitiveEnumMeta, str, Enum)): """Specifies the environment in which the indexer should execute. """ STANDARD = "standard" #: Indicates that Azure Cognitive Search can determine where the indexer should execute. This is the default environment when nothing is specified and is the recommended value. PRIVATE = "private" #: Indicates that the indexer should run with the environment provisioned specifically for the search service. This should only be specified as the execution environment if the indexer needs to access resources securely over shared private link resources.
[docs]class IndexerExecutionStatus(with_metaclass(_CaseInsensitiveEnumMeta, str, Enum)): """Represents the status of an individual indexer execution. """ TRANSIENT_FAILURE = "transientFailure" #: An indexer invocation has failed, but the failure may be transient. Indexer invocations will continue per schedule. SUCCESS = "success" #: Indexer execution completed successfully. IN_PROGRESS = "inProgress" #: Indexer execution is in progress. RESET = "reset" #: Indexer has been reset.
[docs]class IndexerStatus(with_metaclass(_CaseInsensitiveEnumMeta, str, Enum)): """Represents the overall indexer status. """ UNKNOWN = "unknown" #: Indicates that the indexer is in an unknown state. ERROR = "error" #: Indicates that the indexer experienced an error that cannot be corrected without human intervention. RUNNING = "running" #: Indicates that the indexer is running normally.
[docs]class KeyPhraseExtractionSkillLanguage(with_metaclass(_CaseInsensitiveEnumMeta, str, Enum)): """The language codes supported for input text by KeyPhraseExtractionSkill. """ DA = "da" #: Danish. NL = "nl" #: Dutch. EN = "en" #: English. FI = "fi" #: Finnish. FR = "fr" #: French. DE = "de" #: German. IT = "it" #: Italian. JA = "ja" #: Japanese. KO = "ko" #: Korean. NO = "no" #: Norwegian (Bokmaal). PL = "pl" #: Polish. PT_PT = "pt-PT" #: Portuguese (Portugal). PT_BR = "pt-BR" #: Portuguese (Brazil). RU = "ru" #: Russian. ES = "es" #: Spanish. SV = "sv" #: Swedish.
class LexicalAnalyzerName(with_metaclass(_CaseInsensitiveEnumMeta, str, Enum)): """Defines the names of all text analyzers supported by Azure Cognitive Search. """ AR_MICROSOFT = "ar.microsoft" #: Microsoft analyzer for Arabic. AR_LUCENE = "ar.lucene" #: Lucene analyzer for Arabic. HY_LUCENE = "hy.lucene" #: Lucene analyzer for Armenian. BN_MICROSOFT = "bn.microsoft" #: Microsoft analyzer for Bangla. EU_LUCENE = "eu.lucene" #: Lucene analyzer for Basque. BG_MICROSOFT = "bg.microsoft" #: Microsoft analyzer for Bulgarian. BG_LUCENE = "bg.lucene" #: Lucene analyzer for Bulgarian. CA_MICROSOFT = "ca.microsoft" #: Microsoft analyzer for Catalan. CA_LUCENE = "ca.lucene" #: Lucene analyzer for Catalan. ZH_HANS_MICROSOFT = "zh-Hans.microsoft" #: Microsoft analyzer for Chinese (Simplified). ZH_HANS_LUCENE = "zh-Hans.lucene" #: Lucene analyzer for Chinese (Simplified). ZH_HANT_MICROSOFT = "zh-Hant.microsoft" #: Microsoft analyzer for Chinese (Traditional). ZH_HANT_LUCENE = "zh-Hant.lucene" #: Lucene analyzer for Chinese (Traditional). HR_MICROSOFT = "hr.microsoft" #: Microsoft analyzer for Croatian. CS_MICROSOFT = "cs.microsoft" #: Microsoft analyzer for Czech. CS_LUCENE = "cs.lucene" #: Lucene analyzer for Czech. DA_MICROSOFT = "da.microsoft" #: Microsoft analyzer for Danish. DA_LUCENE = "da.lucene" #: Lucene analyzer for Danish. NL_MICROSOFT = "nl.microsoft" #: Microsoft analyzer for Dutch. NL_LUCENE = "nl.lucene" #: Lucene analyzer for Dutch. EN_MICROSOFT = "en.microsoft" #: Microsoft analyzer for English. EN_LUCENE = "en.lucene" #: Lucene analyzer for English. ET_MICROSOFT = "et.microsoft" #: Microsoft analyzer for Estonian. FI_MICROSOFT = "fi.microsoft" #: Microsoft analyzer for Finnish. FI_LUCENE = "fi.lucene" #: Lucene analyzer for Finnish. FR_MICROSOFT = "fr.microsoft" #: Microsoft analyzer for French. FR_LUCENE = "fr.lucene" #: Lucene analyzer for French. GL_LUCENE = "gl.lucene" #: Lucene analyzer for Galician. DE_MICROSOFT = "de.microsoft" #: Microsoft analyzer for German. DE_LUCENE = "de.lucene" #: Lucene analyzer for German. EL_MICROSOFT = "el.microsoft" #: Microsoft analyzer for Greek. EL_LUCENE = "el.lucene" #: Lucene analyzer for Greek. GU_MICROSOFT = "gu.microsoft" #: Microsoft analyzer for Gujarati. HE_MICROSOFT = "he.microsoft" #: Microsoft analyzer for Hebrew. HI_MICROSOFT = "hi.microsoft" #: Microsoft analyzer for Hindi. HI_LUCENE = "hi.lucene" #: Lucene analyzer for Hindi. HU_MICROSOFT = "hu.microsoft" #: Microsoft analyzer for Hungarian. HU_LUCENE = "hu.lucene" #: Lucene analyzer for Hungarian. IS_MICROSOFT = "is.microsoft" #: Microsoft analyzer for Icelandic. ID_MICROSOFT = "id.microsoft" #: Microsoft analyzer for Indonesian (Bahasa). ID_LUCENE = "id.lucene" #: Lucene analyzer for Indonesian. GA_LUCENE = "ga.lucene" #: Lucene analyzer for Irish. IT_MICROSOFT = "it.microsoft" #: Microsoft analyzer for Italian. IT_LUCENE = "it.lucene" #: Lucene analyzer for Italian. JA_MICROSOFT = "ja.microsoft" #: Microsoft analyzer for Japanese. JA_LUCENE = "ja.lucene" #: Lucene analyzer for Japanese. KN_MICROSOFT = "kn.microsoft" #: Microsoft analyzer for Kannada. KO_MICROSOFT = "ko.microsoft" #: Microsoft analyzer for Korean. KO_LUCENE = "ko.lucene" #: Lucene analyzer for Korean. LV_MICROSOFT = "lv.microsoft" #: Microsoft analyzer for Latvian. LV_LUCENE = "lv.lucene" #: Lucene analyzer for Latvian. LT_MICROSOFT = "lt.microsoft" #: Microsoft analyzer for Lithuanian. ML_MICROSOFT = "ml.microsoft" #: Microsoft analyzer for Malayalam. MS_MICROSOFT = "ms.microsoft" #: Microsoft analyzer for Malay (Latin). MR_MICROSOFT = "mr.microsoft" #: Microsoft analyzer for Marathi. NB_MICROSOFT = "nb.microsoft" #: Microsoft analyzer for Norwegian (Bokmål). NO_LUCENE = "no.lucene" #: Lucene analyzer for Norwegian. FA_LUCENE = "fa.lucene" #: Lucene analyzer for Persian. PL_MICROSOFT = "pl.microsoft" #: Microsoft analyzer for Polish. PL_LUCENE = "pl.lucene" #: Lucene analyzer for Polish. PT_BR_MICROSOFT = "pt-BR.microsoft" #: Microsoft analyzer for Portuguese (Brazil). PT_BR_LUCENE = "pt-BR.lucene" #: Lucene analyzer for Portuguese (Brazil). PT_PT_MICROSOFT = "pt-PT.microsoft" #: Microsoft analyzer for Portuguese (Portugal). PT_PT_LUCENE = "pt-PT.lucene" #: Lucene analyzer for Portuguese (Portugal). PA_MICROSOFT = "pa.microsoft" #: Microsoft analyzer for Punjabi. RO_MICROSOFT = "ro.microsoft" #: Microsoft analyzer for Romanian. RO_LUCENE = "ro.lucene" #: Lucene analyzer for Romanian. RU_MICROSOFT = "ru.microsoft" #: Microsoft analyzer for Russian. RU_LUCENE = "ru.lucene" #: Lucene analyzer for Russian. SR_CYRILLIC_MICROSOFT = "sr-cyrillic.microsoft" #: Microsoft analyzer for Serbian (Cyrillic). SR_LATIN_MICROSOFT = "sr-latin.microsoft" #: Microsoft analyzer for Serbian (Latin). SK_MICROSOFT = "sk.microsoft" #: Microsoft analyzer for Slovak. SL_MICROSOFT = "sl.microsoft" #: Microsoft analyzer for Slovenian. ES_MICROSOFT = "es.microsoft" #: Microsoft analyzer for Spanish. ES_LUCENE = "es.lucene" #: Lucene analyzer for Spanish. SV_MICROSOFT = "sv.microsoft" #: Microsoft analyzer for Swedish. SV_LUCENE = "sv.lucene" #: Lucene analyzer for Swedish. TA_MICROSOFT = "ta.microsoft" #: Microsoft analyzer for Tamil. TE_MICROSOFT = "te.microsoft" #: Microsoft analyzer for Telugu. TH_MICROSOFT = "th.microsoft" #: Microsoft analyzer for Thai. TH_LUCENE = "th.lucene" #: Lucene analyzer for Thai. TR_MICROSOFT = "tr.microsoft" #: Microsoft analyzer for Turkish. TR_LUCENE = "tr.lucene" #: Lucene analyzer for Turkish. UK_MICROSOFT = "uk.microsoft" #: Microsoft analyzer for Ukrainian. UR_MICROSOFT = "ur.microsoft" #: Microsoft analyzer for Urdu. VI_MICROSOFT = "vi.microsoft" #: Microsoft analyzer for Vietnamese. STANDARD_LUCENE = "standard.lucene" #: Standard Lucene analyzer. STANDARD_ASCII_FOLDING_LUCENE = "standardasciifolding.lucene" #: Standard ASCII Folding Lucene analyzer. See https://docs.microsoft.com/rest/api/searchservice/Custom-analyzers-in-Azure-Search#Analyzers. KEYWORD = "keyword" #: Treats the entire content of a field as a single token. This is useful for data like zip codes, ids, and some product names. See http://lucene.apache.org/core/4_10_3/analyzers-common/org/apache/lucene/analysis/core/KeywordAnalyzer.html. PATTERN = "pattern" #: Flexibly separates text into terms via a regular expression pattern. See http://lucene.apache.org/core/4_10_3/analyzers-common/org/apache/lucene/analysis/miscellaneous/PatternAnalyzer.html. SIMPLE = "simple" #: Divides text at non-letters and converts them to lower case. See http://lucene.apache.org/core/4_10_3/analyzers-common/org/apache/lucene/analysis/core/SimpleAnalyzer.html. STOP = "stop" #: Divides text at non-letters; Applies the lowercase and stopword token filters. See http://lucene.apache.org/core/4_10_3/analyzers-common/org/apache/lucene/analysis/core/StopAnalyzer.html. WHITESPACE = "whitespace" #: An analyzer that uses the whitespace tokenizer. See http://lucene.apache.org/core/4_10_3/analyzers-common/org/apache/lucene/analysis/core/WhitespaceAnalyzer.html. class LexicalTokenizerName(with_metaclass(_CaseInsensitiveEnumMeta, str, Enum)): """Defines the names of all tokenizers supported by Azure Cognitive Search. """ CLASSIC = "classic" #: Grammar-based tokenizer that is suitable for processing most European-language documents. See http://lucene.apache.org/core/4_10_3/analyzers-common/org/apache/lucene/analysis/standard/ClassicTokenizer.html. EDGE_N_GRAM = "edgeNGram" #: Tokenizes the input from an edge into n-grams of the given size(s). See https://lucene.apache.org/core/4_10_3/analyzers-common/org/apache/lucene/analysis/ngram/EdgeNGramTokenizer.html. KEYWORD = "keyword_v2" #: Emits the entire input as a single token. See http://lucene.apache.org/core/4_10_3/analyzers-common/org/apache/lucene/analysis/core/KeywordTokenizer.html. LETTER = "letter" #: Divides text at non-letters. See http://lucene.apache.org/core/4_10_3/analyzers-common/org/apache/lucene/analysis/core/LetterTokenizer.html. LOWERCASE = "lowercase" #: Divides text at non-letters and converts them to lower case. See http://lucene.apache.org/core/4_10_3/analyzers-common/org/apache/lucene/analysis/core/LowerCaseTokenizer.html. MICROSOFT_LANGUAGE_TOKENIZER = "microsoft_language_tokenizer" #: Divides text using language-specific rules. MICROSOFT_LANGUAGE_STEMMING_TOKENIZER = "microsoft_language_stemming_tokenizer" #: Divides text using language-specific rules and reduces words to their base forms. N_GRAM = "nGram" #: Tokenizes the input into n-grams of the given size(s). See http://lucene.apache.org/core/4_10_3/analyzers-common/org/apache/lucene/analysis/ngram/NGramTokenizer.html. PATH_HIERARCHY = "path_hierarchy_v2" #: Tokenizer for path-like hierarchies. See http://lucene.apache.org/core/4_10_3/analyzers-common/org/apache/lucene/analysis/path/PathHierarchyTokenizer.html. PATTERN = "pattern" #: Tokenizer that uses regex pattern matching to construct distinct tokens. See http://lucene.apache.org/core/4_10_3/analyzers-common/org/apache/lucene/analysis/pattern/PatternTokenizer.html. STANDARD = "standard_v2" #: Standard Lucene analyzer; Composed of the standard tokenizer, lowercase filter and stop filter. See http://lucene.apache.org/core/4_10_3/analyzers-common/org/apache/lucene/analysis/standard/StandardTokenizer.html. UAX_URL_EMAIL = "uax_url_email" #: Tokenizes urls and emails as one token. See http://lucene.apache.org/core/4_10_3/analyzers-common/org/apache/lucene/analysis/standard/UAX29URLEmailTokenizer.html. WHITESPACE = "whitespace" #: Divides text at whitespace. See http://lucene.apache.org/core/4_10_3/analyzers-common/org/apache/lucene/analysis/core/WhitespaceTokenizer.html.
[docs]class MicrosoftStemmingTokenizerLanguage(with_metaclass(_CaseInsensitiveEnumMeta, str, Enum)): """Lists the languages supported by the Microsoft language stemming tokenizer. """ ARABIC = "arabic" #: Selects the Microsoft stemming tokenizer for Arabic. BANGLA = "bangla" #: Selects the Microsoft stemming tokenizer for Bangla. BULGARIAN = "bulgarian" #: Selects the Microsoft stemming tokenizer for Bulgarian. CATALAN = "catalan" #: Selects the Microsoft stemming tokenizer for Catalan. CROATIAN = "croatian" #: Selects the Microsoft stemming tokenizer for Croatian. CZECH = "czech" #: Selects the Microsoft stemming tokenizer for Czech. DANISH = "danish" #: Selects the Microsoft stemming tokenizer for Danish. DUTCH = "dutch" #: Selects the Microsoft stemming tokenizer for Dutch. ENGLISH = "english" #: Selects the Microsoft stemming tokenizer for English. ESTONIAN = "estonian" #: Selects the Microsoft stemming tokenizer for Estonian. FINNISH = "finnish" #: Selects the Microsoft stemming tokenizer for Finnish. FRENCH = "french" #: Selects the Microsoft stemming tokenizer for French. GERMAN = "german" #: Selects the Microsoft stemming tokenizer for German. GREEK = "greek" #: Selects the Microsoft stemming tokenizer for Greek. GUJARATI = "gujarati" #: Selects the Microsoft stemming tokenizer for Gujarati. HEBREW = "hebrew" #: Selects the Microsoft stemming tokenizer for Hebrew. HINDI = "hindi" #: Selects the Microsoft stemming tokenizer for Hindi. HUNGARIAN = "hungarian" #: Selects the Microsoft stemming tokenizer for Hungarian. ICELANDIC = "icelandic" #: Selects the Microsoft stemming tokenizer for Icelandic. INDONESIAN = "indonesian" #: Selects the Microsoft stemming tokenizer for Indonesian. ITALIAN = "italian" #: Selects the Microsoft stemming tokenizer for Italian. KANNADA = "kannada" #: Selects the Microsoft stemming tokenizer for Kannada. LATVIAN = "latvian" #: Selects the Microsoft stemming tokenizer for Latvian. LITHUANIAN = "lithuanian" #: Selects the Microsoft stemming tokenizer for Lithuanian. MALAY = "malay" #: Selects the Microsoft stemming tokenizer for Malay. MALAYALAM = "malayalam" #: Selects the Microsoft stemming tokenizer for Malayalam. MARATHI = "marathi" #: Selects the Microsoft stemming tokenizer for Marathi. NORWEGIAN_BOKMAAL = "norwegianBokmaal" #: Selects the Microsoft stemming tokenizer for Norwegian (Bokmål). POLISH = "polish" #: Selects the Microsoft stemming tokenizer for Polish. PORTUGUESE = "portuguese" #: Selects the Microsoft stemming tokenizer for Portuguese. PORTUGUESE_BRAZILIAN = "portugueseBrazilian" #: Selects the Microsoft stemming tokenizer for Portuguese (Brazil). PUNJABI = "punjabi" #: Selects the Microsoft stemming tokenizer for Punjabi. ROMANIAN = "romanian" #: Selects the Microsoft stemming tokenizer for Romanian. RUSSIAN = "russian" #: Selects the Microsoft stemming tokenizer for Russian. SERBIAN_CYRILLIC = "serbianCyrillic" #: Selects the Microsoft stemming tokenizer for Serbian (Cyrillic). SERBIAN_LATIN = "serbianLatin" #: Selects the Microsoft stemming tokenizer for Serbian (Latin). SLOVAK = "slovak" #: Selects the Microsoft stemming tokenizer for Slovak. SLOVENIAN = "slovenian" #: Selects the Microsoft stemming tokenizer for Slovenian. SPANISH = "spanish" #: Selects the Microsoft stemming tokenizer for Spanish. SWEDISH = "swedish" #: Selects the Microsoft stemming tokenizer for Swedish. TAMIL = "tamil" #: Selects the Microsoft stemming tokenizer for Tamil. TELUGU = "telugu" #: Selects the Microsoft stemming tokenizer for Telugu. TURKISH = "turkish" #: Selects the Microsoft stemming tokenizer for Turkish. UKRAINIAN = "ukrainian" #: Selects the Microsoft stemming tokenizer for Ukrainian. URDU = "urdu" #: Selects the Microsoft stemming tokenizer for Urdu.
[docs]class MicrosoftTokenizerLanguage(with_metaclass(_CaseInsensitiveEnumMeta, str, Enum)): """Lists the languages supported by the Microsoft language tokenizer. """ BANGLA = "bangla" #: Selects the Microsoft tokenizer for Bangla. BULGARIAN = "bulgarian" #: Selects the Microsoft tokenizer for Bulgarian. CATALAN = "catalan" #: Selects the Microsoft tokenizer for Catalan. CHINESE_SIMPLIFIED = "chineseSimplified" #: Selects the Microsoft tokenizer for Chinese (Simplified). CHINESE_TRADITIONAL = "chineseTraditional" #: Selects the Microsoft tokenizer for Chinese (Traditional). CROATIAN = "croatian" #: Selects the Microsoft tokenizer for Croatian. CZECH = "czech" #: Selects the Microsoft tokenizer for Czech. DANISH = "danish" #: Selects the Microsoft tokenizer for Danish. DUTCH = "dutch" #: Selects the Microsoft tokenizer for Dutch. ENGLISH = "english" #: Selects the Microsoft tokenizer for English. FRENCH = "french" #: Selects the Microsoft tokenizer for French. GERMAN = "german" #: Selects the Microsoft tokenizer for German. GREEK = "greek" #: Selects the Microsoft tokenizer for Greek. GUJARATI = "gujarati" #: Selects the Microsoft tokenizer for Gujarati. HINDI = "hindi" #: Selects the Microsoft tokenizer for Hindi. ICELANDIC = "icelandic" #: Selects the Microsoft tokenizer for Icelandic. INDONESIAN = "indonesian" #: Selects the Microsoft tokenizer for Indonesian. ITALIAN = "italian" #: Selects the Microsoft tokenizer for Italian. JAPANESE = "japanese" #: Selects the Microsoft tokenizer for Japanese. KANNADA = "kannada" #: Selects the Microsoft tokenizer for Kannada. KOREAN = "korean" #: Selects the Microsoft tokenizer for Korean. MALAY = "malay" #: Selects the Microsoft tokenizer for Malay. MALAYALAM = "malayalam" #: Selects the Microsoft tokenizer for Malayalam. MARATHI = "marathi" #: Selects the Microsoft tokenizer for Marathi. NORWEGIAN_BOKMAAL = "norwegianBokmaal" #: Selects the Microsoft tokenizer for Norwegian (Bokmål). POLISH = "polish" #: Selects the Microsoft tokenizer for Polish. PORTUGUESE = "portuguese" #: Selects the Microsoft tokenizer for Portuguese. PORTUGUESE_BRAZILIAN = "portugueseBrazilian" #: Selects the Microsoft tokenizer for Portuguese (Brazil). PUNJABI = "punjabi" #: Selects the Microsoft tokenizer for Punjabi. ROMANIAN = "romanian" #: Selects the Microsoft tokenizer for Romanian. RUSSIAN = "russian" #: Selects the Microsoft tokenizer for Russian. SERBIAN_CYRILLIC = "serbianCyrillic" #: Selects the Microsoft tokenizer for Serbian (Cyrillic). SERBIAN_LATIN = "serbianLatin" #: Selects the Microsoft tokenizer for Serbian (Latin). SLOVENIAN = "slovenian" #: Selects the Microsoft tokenizer for Slovenian. SPANISH = "spanish" #: Selects the Microsoft tokenizer for Spanish. SWEDISH = "swedish" #: Selects the Microsoft tokenizer for Swedish. TAMIL = "tamil" #: Selects the Microsoft tokenizer for Tamil. TELUGU = "telugu" #: Selects the Microsoft tokenizer for Telugu. THAI = "thai" #: Selects the Microsoft tokenizer for Thai. UKRAINIAN = "ukrainian" #: Selects the Microsoft tokenizer for Ukrainian. URDU = "urdu" #: Selects the Microsoft tokenizer for Urdu. VIETNAMESE = "vietnamese" #: Selects the Microsoft tokenizer for Vietnamese.
[docs]class OcrSkillLanguage(with_metaclass(_CaseInsensitiveEnumMeta, str, Enum)): """The language codes supported for input by OcrSkill. """ ZH_HANS = "zh-Hans" #: Chinese-Simplified. ZH_HANT = "zh-Hant" #: Chinese-Traditional. CS = "cs" #: Czech. DA = "da" #: Danish. NL = "nl" #: Dutch. EN = "en" #: English. FI = "fi" #: Finnish. FR = "fr" #: French. DE = "de" #: German. EL = "el" #: Greek. HU = "hu" #: Hungarian. IT = "it" #: Italian. JA = "ja" #: Japanese. KO = "ko" #: Korean. NB = "nb" #: Norwegian (Bokmaal). PL = "pl" #: Polish. PT = "pt" #: Portuguese. RU = "ru" #: Russian. ES = "es" #: Spanish. SV = "sv" #: Swedish. TR = "tr" #: Turkish. AR = "ar" #: Arabic. RO = "ro" #: Romanian. SR_CYRL = "sr-Cyrl" #: Serbian (Cyrillic, Serbia). SR_LATN = "sr-Latn" #: Serbian (Latin, Serbia). SK = "sk" #: Slovak.
[docs]class PhoneticEncoder(with_metaclass(_CaseInsensitiveEnumMeta, str, Enum)): """Identifies the type of phonetic encoder to use with a PhoneticTokenFilter. """ METAPHONE = "metaphone" #: Encodes a token into a Metaphone value. DOUBLE_METAPHONE = "doubleMetaphone" #: Encodes a token into a double metaphone value. SOUNDEX = "soundex" #: Encodes a token into a Soundex value. REFINED_SOUNDEX = "refinedSoundex" #: Encodes a token into a Refined Soundex value. CAVERPHONE1 = "caverphone1" #: Encodes a token into a Caverphone 1.0 value. CAVERPHONE2 = "caverphone2" #: Encodes a token into a Caverphone 2.0 value. COLOGNE = "cologne" #: Encodes a token into a Cologne Phonetic value. NYSIIS = "nysiis" #: Encodes a token into a NYSIIS value. KOELNER_PHONETIK = "koelnerPhonetik" #: Encodes a token using the Kölner Phonetik algorithm. HAASE_PHONETIK = "haasePhonetik" #: Encodes a token using the Haase refinement of the Kölner Phonetik algorithm. BEIDER_MORSE = "beiderMorse" #: Encodes a token into a Beider-Morse value.
[docs]class RegexFlags(with_metaclass(_CaseInsensitiveEnumMeta, str, Enum)): """Defines flags that can be combined to control how regular expressions are used in the pattern analyzer and pattern tokenizer. """ CANON_EQ = "CANON_EQ" #: Enables canonical equivalence. CASE_INSENSITIVE = "CASE_INSENSITIVE" #: Enables case-insensitive matching. COMMENTS = "COMMENTS" #: Permits whitespace and comments in the pattern. DOT_ALL = "DOTALL" #: Enables dotall mode. LITERAL = "LITERAL" #: Enables literal parsing of the pattern. MULTILINE = "MULTILINE" #: Enables multiline mode. UNICODE_CASE = "UNICODE_CASE" #: Enables Unicode-aware case folding. UNIX_LINES = "UNIX_LINES" #: Enables Unix lines mode.
[docs]class ScoringFunctionAggregation(with_metaclass(_CaseInsensitiveEnumMeta, str, Enum)): """Defines the aggregation function used to combine the results of all the scoring functions in a scoring profile. """ SUM = "sum" #: Boost scores by the sum of all scoring function results. AVERAGE = "average" #: Boost scores by the average of all scoring function results. MINIMUM = "minimum" #: Boost scores by the minimum of all scoring function results. MAXIMUM = "maximum" #: Boost scores by the maximum of all scoring function results. FIRST_MATCHING = "firstMatching" #: Boost scores using the first applicable scoring function in the scoring profile.
[docs]class ScoringFunctionInterpolation(with_metaclass(_CaseInsensitiveEnumMeta, str, Enum)): """Defines the function used to interpolate score boosting across a range of documents. """ LINEAR = "linear" #: Boosts scores by a linearly decreasing amount. This is the default interpolation for scoring functions. CONSTANT = "constant" #: Boosts scores by a constant factor. QUADRATIC = "quadratic" #: Boosts scores by an amount that decreases quadratically. Boosts decrease slowly for higher scores, and more quickly as the scores decrease. This interpolation option is not allowed in tag scoring functions. LOGARITHMIC = "logarithmic" #: Boosts scores by an amount that decreases logarithmically. Boosts decrease quickly for higher scores, and more slowly as the scores decrease. This interpolation option is not allowed in tag scoring functions.
class SearchFieldDataType(with_metaclass(_CaseInsensitiveEnumMeta, str, Enum)): """Defines the data type of a field in a search index. """ STRING = "Edm.String" #: Indicates that a field contains a string. INT32 = "Edm.Int32" #: Indicates that a field contains a 32-bit signed integer. INT64 = "Edm.Int64" #: Indicates that a field contains a 64-bit signed integer. DOUBLE = "Edm.Double" #: Indicates that a field contains an IEEE double-precision floating point number. BOOLEAN = "Edm.Boolean" #: Indicates that a field contains a Boolean value (true or false). DATE_TIME_OFFSET = "Edm.DateTimeOffset" #: Indicates that a field contains a date/time value, including timezone information. GEOGRAPHY_POINT = "Edm.GeographyPoint" #: Indicates that a field contains a geo-location in terms of longitude and latitude. COMPLEX = "Edm.ComplexType" #: Indicates that a field contains one or more complex objects that in turn have sub-fields of other types. class SearchIndexerDataSourceType(with_metaclass(_CaseInsensitiveEnumMeta, str, Enum)): """Defines the type of a datasource. """ AZURE_SQL = "azuresql" #: Indicates an Azure SQL datasource. COSMOS_DB = "cosmosdb" #: Indicates a CosmosDB datasource. AZURE_BLOB = "azureblob" #: Indicates a Azure Blob datasource. AZURE_TABLE = "azuretable" #: Indicates a Azure Table datasource. MY_SQL = "mysql" #: Indicates a MySql datasource.
[docs]class SentimentSkillLanguage(with_metaclass(_CaseInsensitiveEnumMeta, str, Enum)): """The language codes supported for input text by SentimentSkill. """ DA = "da" #: Danish. NL = "nl" #: Dutch. EN = "en" #: English. FI = "fi" #: Finnish. FR = "fr" #: French. DE = "de" #: German. EL = "el" #: Greek. IT = "it" #: Italian. NO = "no" #: Norwegian (Bokmaal). PL = "pl" #: Polish. PT_PT = "pt-PT" #: Portuguese (Portugal). RU = "ru" #: Russian. ES = "es" #: Spanish. SV = "sv" #: Swedish. TR = "tr" #: Turkish.
[docs]class SnowballTokenFilterLanguage(with_metaclass(_CaseInsensitiveEnumMeta, str, Enum)): """The language to use for a Snowball token filter. """ ARMENIAN = "armenian" #: Selects the Lucene Snowball stemming tokenizer for Armenian. BASQUE = "basque" #: Selects the Lucene Snowball stemming tokenizer for Basque. CATALAN = "catalan" #: Selects the Lucene Snowball stemming tokenizer for Catalan. DANISH = "danish" #: Selects the Lucene Snowball stemming tokenizer for Danish. DUTCH = "dutch" #: Selects the Lucene Snowball stemming tokenizer for Dutch. ENGLISH = "english" #: Selects the Lucene Snowball stemming tokenizer for English. FINNISH = "finnish" #: Selects the Lucene Snowball stemming tokenizer for Finnish. FRENCH = "french" #: Selects the Lucene Snowball stemming tokenizer for French. GERMAN = "german" #: Selects the Lucene Snowball stemming tokenizer for German. GERMAN2 = "german2" #: Selects the Lucene Snowball stemming tokenizer that uses the German variant algorithm. HUNGARIAN = "hungarian" #: Selects the Lucene Snowball stemming tokenizer for Hungarian. ITALIAN = "italian" #: Selects the Lucene Snowball stemming tokenizer for Italian. KP = "kp" #: Selects the Lucene Snowball stemming tokenizer for Dutch that uses the Kraaij-Pohlmann stemming algorithm. LOVINS = "lovins" #: Selects the Lucene Snowball stemming tokenizer for English that uses the Lovins stemming algorithm. NORWEGIAN = "norwegian" #: Selects the Lucene Snowball stemming tokenizer for Norwegian. PORTER = "porter" #: Selects the Lucene Snowball stemming tokenizer for English that uses the Porter stemming algorithm. PORTUGUESE = "portuguese" #: Selects the Lucene Snowball stemming tokenizer for Portuguese. ROMANIAN = "romanian" #: Selects the Lucene Snowball stemming tokenizer for Romanian. RUSSIAN = "russian" #: Selects the Lucene Snowball stemming tokenizer for Russian. SPANISH = "spanish" #: Selects the Lucene Snowball stemming tokenizer for Spanish. SWEDISH = "swedish" #: Selects the Lucene Snowball stemming tokenizer for Swedish. TURKISH = "turkish" #: Selects the Lucene Snowball stemming tokenizer for Turkish.
[docs]class SplitSkillLanguage(with_metaclass(_CaseInsensitiveEnumMeta, str, Enum)): """The language codes supported for input text by SplitSkill. """ DA = "da" #: Danish. DE = "de" #: German. EN = "en" #: English. ES = "es" #: Spanish. FI = "fi" #: Finnish. FR = "fr" #: French. IT = "it" #: Italian. KO = "ko" #: Korean. PT = "pt" #: Portuguese.
[docs]class StemmerTokenFilterLanguage(with_metaclass(_CaseInsensitiveEnumMeta, str, Enum)): """The language to use for a stemmer token filter. """ ARABIC = "arabic" #: Selects the Lucene stemming tokenizer for Arabic. ARMENIAN = "armenian" #: Selects the Lucene stemming tokenizer for Armenian. BASQUE = "basque" #: Selects the Lucene stemming tokenizer for Basque. BRAZILIAN = "brazilian" #: Selects the Lucene stemming tokenizer for Portuguese (Brazil). BULGARIAN = "bulgarian" #: Selects the Lucene stemming tokenizer for Bulgarian. CATALAN = "catalan" #: Selects the Lucene stemming tokenizer for Catalan. CZECH = "czech" #: Selects the Lucene stemming tokenizer for Czech. DANISH = "danish" #: Selects the Lucene stemming tokenizer for Danish. DUTCH = "dutch" #: Selects the Lucene stemming tokenizer for Dutch. DUTCH_KP = "dutchKp" #: Selects the Lucene stemming tokenizer for Dutch that uses the Kraaij-Pohlmann stemming algorithm. ENGLISH = "english" #: Selects the Lucene stemming tokenizer for English. LIGHT_ENGLISH = "lightEnglish" #: Selects the Lucene stemming tokenizer for English that does light stemming. MINIMAL_ENGLISH = "minimalEnglish" #: Selects the Lucene stemming tokenizer for English that does minimal stemming. POSSESSIVE_ENGLISH = "possessiveEnglish" #: Selects the Lucene stemming tokenizer for English that removes trailing possessives from words. PORTER2 = "porter2" #: Selects the Lucene stemming tokenizer for English that uses the Porter2 stemming algorithm. LOVINS = "lovins" #: Selects the Lucene stemming tokenizer for English that uses the Lovins stemming algorithm. FINNISH = "finnish" #: Selects the Lucene stemming tokenizer for Finnish. LIGHT_FINNISH = "lightFinnish" #: Selects the Lucene stemming tokenizer for Finnish that does light stemming. FRENCH = "french" #: Selects the Lucene stemming tokenizer for French. LIGHT_FRENCH = "lightFrench" #: Selects the Lucene stemming tokenizer for French that does light stemming. MINIMAL_FRENCH = "minimalFrench" #: Selects the Lucene stemming tokenizer for French that does minimal stemming. GALICIAN = "galician" #: Selects the Lucene stemming tokenizer for Galician. MINIMAL_GALICIAN = "minimalGalician" #: Selects the Lucene stemming tokenizer for Galician that does minimal stemming. GERMAN = "german" #: Selects the Lucene stemming tokenizer for German. GERMAN2 = "german2" #: Selects the Lucene stemming tokenizer that uses the German variant algorithm. LIGHT_GERMAN = "lightGerman" #: Selects the Lucene stemming tokenizer for German that does light stemming. MINIMAL_GERMAN = "minimalGerman" #: Selects the Lucene stemming tokenizer for German that does minimal stemming. GREEK = "greek" #: Selects the Lucene stemming tokenizer for Greek. HINDI = "hindi" #: Selects the Lucene stemming tokenizer for Hindi. HUNGARIAN = "hungarian" #: Selects the Lucene stemming tokenizer for Hungarian. LIGHT_HUNGARIAN = "lightHungarian" #: Selects the Lucene stemming tokenizer for Hungarian that does light stemming. INDONESIAN = "indonesian" #: Selects the Lucene stemming tokenizer for Indonesian. IRISH = "irish" #: Selects the Lucene stemming tokenizer for Irish. ITALIAN = "italian" #: Selects the Lucene stemming tokenizer for Italian. LIGHT_ITALIAN = "lightItalian" #: Selects the Lucene stemming tokenizer for Italian that does light stemming. SORANI = "sorani" #: Selects the Lucene stemming tokenizer for Sorani. LATVIAN = "latvian" #: Selects the Lucene stemming tokenizer for Latvian. NORWEGIAN = "norwegian" #: Selects the Lucene stemming tokenizer for Norwegian (Bokmål). LIGHT_NORWEGIAN = "lightNorwegian" #: Selects the Lucene stemming tokenizer for Norwegian (Bokmål) that does light stemming. MINIMAL_NORWEGIAN = "minimalNorwegian" #: Selects the Lucene stemming tokenizer for Norwegian (Bokmål) that does minimal stemming. LIGHT_NYNORSK = "lightNynorsk" #: Selects the Lucene stemming tokenizer for Norwegian (Nynorsk) that does light stemming. MINIMAL_NYNORSK = "minimalNynorsk" #: Selects the Lucene stemming tokenizer for Norwegian (Nynorsk) that does minimal stemming. PORTUGUESE = "portuguese" #: Selects the Lucene stemming tokenizer for Portuguese. LIGHT_PORTUGUESE = "lightPortuguese" #: Selects the Lucene stemming tokenizer for Portuguese that does light stemming. MINIMAL_PORTUGUESE = "minimalPortuguese" #: Selects the Lucene stemming tokenizer for Portuguese that does minimal stemming. PORTUGUESE_RSLP = "portugueseRslp" #: Selects the Lucene stemming tokenizer for Portuguese that uses the RSLP stemming algorithm. ROMANIAN = "romanian" #: Selects the Lucene stemming tokenizer for Romanian. RUSSIAN = "russian" #: Selects the Lucene stemming tokenizer for Russian. LIGHT_RUSSIAN = "lightRussian" #: Selects the Lucene stemming tokenizer for Russian that does light stemming. SPANISH = "spanish" #: Selects the Lucene stemming tokenizer for Spanish. LIGHT_SPANISH = "lightSpanish" #: Selects the Lucene stemming tokenizer for Spanish that does light stemming. SWEDISH = "swedish" #: Selects the Lucene stemming tokenizer for Swedish. LIGHT_SWEDISH = "lightSwedish" #: Selects the Lucene stemming tokenizer for Swedish that does light stemming. TURKISH = "turkish" #: Selects the Lucene stemming tokenizer for Turkish.
[docs]class StopwordsList(with_metaclass(_CaseInsensitiveEnumMeta, str, Enum)): """Identifies a predefined list of language-specific stopwords. """ ARABIC = "arabic" #: Selects the stopword list for Arabic. ARMENIAN = "armenian" #: Selects the stopword list for Armenian. BASQUE = "basque" #: Selects the stopword list for Basque. BRAZILIAN = "brazilian" #: Selects the stopword list for Portuguese (Brazil). BULGARIAN = "bulgarian" #: Selects the stopword list for Bulgarian. CATALAN = "catalan" #: Selects the stopword list for Catalan. CZECH = "czech" #: Selects the stopword list for Czech. DANISH = "danish" #: Selects the stopword list for Danish. DUTCH = "dutch" #: Selects the stopword list for Dutch. ENGLISH = "english" #: Selects the stopword list for English. FINNISH = "finnish" #: Selects the stopword list for Finnish. FRENCH = "french" #: Selects the stopword list for French. GALICIAN = "galician" #: Selects the stopword list for Galician. GERMAN = "german" #: Selects the stopword list for German. GREEK = "greek" #: Selects the stopword list for Greek. HINDI = "hindi" #: Selects the stopword list for Hindi. HUNGARIAN = "hungarian" #: Selects the stopword list for Hungarian. INDONESIAN = "indonesian" #: Selects the stopword list for Indonesian. IRISH = "irish" #: Selects the stopword list for Irish. ITALIAN = "italian" #: Selects the stopword list for Italian. LATVIAN = "latvian" #: Selects the stopword list for Latvian. NORWEGIAN = "norwegian" #: Selects the stopword list for Norwegian. PERSIAN = "persian" #: Selects the stopword list for Persian. PORTUGUESE = "portuguese" #: Selects the stopword list for Portuguese. ROMANIAN = "romanian" #: Selects the stopword list for Romanian. RUSSIAN = "russian" #: Selects the stopword list for Russian. SORANI = "sorani" #: Selects the stopword list for Sorani. SPANISH = "spanish" #: Selects the stopword list for Spanish. SWEDISH = "swedish" #: Selects the stopword list for Swedish. THAI = "thai" #: Selects the stopword list for Thai. TURKISH = "turkish" #: Selects the stopword list for Turkish.
[docs]class TextSplitMode(with_metaclass(_CaseInsensitiveEnumMeta, str, Enum)): """A value indicating which split mode to perform. """ PAGES = "pages" #: Split the text into individual pages. SENTENCES = "sentences" #: Split the text into individual sentences.
[docs]class TextTranslationSkillLanguage(with_metaclass(_CaseInsensitiveEnumMeta, str, Enum)): """The language codes supported for input text by TextTranslationSkill. """ AF = "af" #: Afrikaans. AR = "ar" #: Arabic. BN = "bn" #: Bangla. BS = "bs" #: Bosnian (Latin). BG = "bg" #: Bulgarian. YUE = "yue" #: Cantonese (Traditional). CA = "ca" #: Catalan. ZH_HANS = "zh-Hans" #: Chinese Simplified. ZH_HANT = "zh-Hant" #: Chinese Traditional. HR = "hr" #: Croatian. CS = "cs" #: Czech. DA = "da" #: Danish. NL = "nl" #: Dutch. EN = "en" #: English. ET = "et" #: Estonian. FJ = "fj" #: Fijian. FIL = "fil" #: Filipino. FI = "fi" #: Finnish. FR = "fr" #: French. DE = "de" #: German. EL = "el" #: Greek. HT = "ht" #: Haitian Creole. HE = "he" #: Hebrew. HI = "hi" #: Hindi. MWW = "mww" #: Hmong Daw. HU = "hu" #: Hungarian. IS_ENUM = "is" #: Icelandic. ID = "id" #: Indonesian. IT = "it" #: Italian. JA = "ja" #: Japanese. SW = "sw" #: Kiswahili. TLH = "tlh" #: Klingon. KO = "ko" #: Korean. LV = "lv" #: Latvian. LT = "lt" #: Lithuanian. MG = "mg" #: Malagasy. MS = "ms" #: Malay. MT = "mt" #: Maltese. NB = "nb" #: Norwegian. FA = "fa" #: Persian. PL = "pl" #: Polish. PT = "pt" #: Portuguese. OTQ = "otq" #: Queretaro Otomi. RO = "ro" #: Romanian. RU = "ru" #: Russian. SM = "sm" #: Samoan. SR_CYRL = "sr-Cyrl" #: Serbian (Cyrillic). SR_LATN = "sr-Latn" #: Serbian (Latin). SK = "sk" #: Slovak. SL = "sl" #: Slovenian. ES = "es" #: Spanish. SV = "sv" #: Swedish. TY = "ty" #: Tahitian. TA = "ta" #: Tamil. TE = "te" #: Telugu. TH = "th" #: Thai. TO = "to" #: Tongan. TR = "tr" #: Turkish. UK = "uk" #: Ukrainian. UR = "ur" #: Urdu. VI = "vi" #: Vietnamese. CY = "cy" #: Welsh. YUA = "yua" #: Yucatec Maya.
[docs]class TokenCharacterKind(with_metaclass(_CaseInsensitiveEnumMeta, str, Enum)): """Represents classes of characters on which a token filter can operate. """ LETTER = "letter" #: Keeps letters in tokens. DIGIT = "digit" #: Keeps digits in tokens. WHITESPACE = "whitespace" #: Keeps whitespace in tokens. PUNCTUATION = "punctuation" #: Keeps punctuation in tokens. SYMBOL = "symbol" #: Keeps symbols in tokens.
[docs]class TokenFilterName(with_metaclass(_CaseInsensitiveEnumMeta, str, Enum)): """Defines the names of all token filters supported by Azure Cognitive Search. """ ARABIC_NORMALIZATION = "arabic_normalization" #: A token filter that applies the Arabic normalizer to normalize the orthography. See http://lucene.apache.org/core/4_10_3/analyzers-common/org/apache/lucene/analysis/ar/ArabicNormalizationFilter.html. APOSTROPHE = "apostrophe" #: Strips all characters after an apostrophe (including the apostrophe itself). See http://lucene.apache.org/core/4_10_3/analyzers-common/org/apache/lucene/analysis/tr/ApostropheFilter.html. ASCII_FOLDING = "asciifolding" #: Converts alphabetic, numeric, and symbolic Unicode characters which are not in the first 127 ASCII characters (the "Basic Latin" Unicode block) into their ASCII equivalents, if such equivalents exist. See http://lucene.apache.org/core/4_10_3/analyzers-common/org/apache/lucene/analysis/miscellaneous/ASCIIFoldingFilter.html. CJK_BIGRAM = "cjk_bigram" #: Forms bigrams of CJK terms that are generated from the standard tokenizer. See http://lucene.apache.org/core/4_10_3/analyzers-common/org/apache/lucene/analysis/cjk/CJKBigramFilter.html. CJK_WIDTH = "cjk_width" #: Normalizes CJK width differences. Folds fullwidth ASCII variants into the equivalent basic Latin, and half-width Katakana variants into the equivalent Kana. See http://lucene.apache.org/core/4_10_3/analyzers-common/org/apache/lucene/analysis/cjk/CJKWidthFilter.html. CLASSIC = "classic" #: Removes English possessives, and dots from acronyms. See http://lucene.apache.org/core/4_10_3/analyzers-common/org/apache/lucene/analysis/standard/ClassicFilter.html. COMMON_GRAM = "common_grams" #: Construct bigrams for frequently occurring terms while indexing. Single terms are still indexed too, with bigrams overlaid. See http://lucene.apache.org/core/4_10_3/analyzers-common/org/apache/lucene/analysis/commongrams/CommonGramsFilter.html. EDGE_N_GRAM = "edgeNGram_v2" #: Generates n-grams of the given size(s) starting from the front or the back of an input token. See http://lucene.apache.org/core/4_10_3/analyzers-common/org/apache/lucene/analysis/ngram/EdgeNGramTokenFilter.html. ELISION = "elision" #: Removes elisions. For example, "l'avion" (the plane) will be converted to "avion" (plane). See http://lucene.apache.org/core/4_10_3/analyzers-common/org/apache/lucene/analysis/util/ElisionFilter.html. GERMAN_NORMALIZATION = "german_normalization" #: Normalizes German characters according to the heuristics of the German2 snowball algorithm. See http://lucene.apache.org/core/4_10_3/analyzers-common/org/apache/lucene/analysis/de/GermanNormalizationFilter.html. HINDI_NORMALIZATION = "hindi_normalization" #: Normalizes text in Hindi to remove some differences in spelling variations. See http://lucene.apache.org/core/4_10_3/analyzers-common/org/apache/lucene/analysis/hi/HindiNormalizationFilter.html. INDIC_NORMALIZATION = "indic_normalization" #: Normalizes the Unicode representation of text in Indian languages. See http://lucene.apache.org/core/4_10_3/analyzers-common/org/apache/lucene/analysis/in/IndicNormalizationFilter.html. KEYWORD_REPEAT = "keyword_repeat" #: Emits each incoming token twice, once as keyword and once as non-keyword. See http://lucene.apache.org/core/4_10_3/analyzers-common/org/apache/lucene/analysis/miscellaneous/KeywordRepeatFilter.html. K_STEM = "kstem" #: A high-performance kstem filter for English. See http://lucene.apache.org/core/4_10_3/analyzers-common/org/apache/lucene/analysis/en/KStemFilter.html. LENGTH = "length" #: Removes words that are too long or too short. See http://lucene.apache.org/core/4_10_3/analyzers-common/org/apache/lucene/analysis/miscellaneous/LengthFilter.html. LIMIT = "limit" #: Limits the number of tokens while indexing. See http://lucene.apache.org/core/4_10_3/analyzers-common/org/apache/lucene/analysis/miscellaneous/LimitTokenCountFilter.html. LOWERCASE = "lowercase" #: Normalizes token text to lower case. See http://lucene.apache.org/core/4_10_3/analyzers-common/org/apache/lucene/analysis/core/LowerCaseFilter.htm. N_GRAM = "nGram_v2" #: Generates n-grams of the given size(s). See http://lucene.apache.org/core/4_10_3/analyzers-common/org/apache/lucene/analysis/ngram/NGramTokenFilter.html. PERSIAN_NORMALIZATION = "persian_normalization" #: Applies normalization for Persian. See http://lucene.apache.org/core/4_10_3/analyzers-common/org/apache/lucene/analysis/fa/PersianNormalizationFilter.html. PHONETIC = "phonetic" #: Create tokens for phonetic matches. See https://lucene.apache.org/core/4_10_3/analyzers-phonetic/org/apache/lucene/analysis/phonetic/package-tree.html. PORTER_STEM = "porter_stem" #: Uses the Porter stemming algorithm to transform the token stream. See http://tartarus.org/~martin/PorterStemmer. REVERSE = "reverse" #: Reverses the token string. See http://lucene.apache.org/core/4_10_3/analyzers-common/org/apache/lucene/analysis/reverse/ReverseStringFilter.html. SCANDINAVIAN_NORMALIZATION = "scandinavian_normalization" #: Normalizes use of the interchangeable Scandinavian characters. See http://lucene.apache.org/core/4_10_3/analyzers-common/org/apache/lucene/analysis/miscellaneous/ScandinavianNormalizationFilter.html. SCANDINAVIAN_FOLDING_NORMALIZATION = "scandinavian_folding" #: Folds Scandinavian characters åÅäæÄÆ->a and öÖøØ->o. It also discriminates against use of double vowels aa, ae, ao, oe and oo, leaving just the first one. See http://lucene.apache.org/core/4_10_3/analyzers-common/org/apache/lucene/analysis/miscellaneous/ScandinavianFoldingFilter.html. SHINGLE = "shingle" #: Creates combinations of tokens as a single token. See http://lucene.apache.org/core/4_10_3/analyzers-common/org/apache/lucene/analysis/shingle/ShingleFilter.html. SNOWBALL = "snowball" #: A filter that stems words using a Snowball-generated stemmer. See http://lucene.apache.org/core/4_10_3/analyzers-common/org/apache/lucene/analysis/snowball/SnowballFilter.html. SORANI_NORMALIZATION = "sorani_normalization" #: Normalizes the Unicode representation of Sorani text. See http://lucene.apache.org/core/4_10_3/analyzers-common/org/apache/lucene/analysis/ckb/SoraniNormalizationFilter.html. STEMMER = "stemmer" #: Language specific stemming filter. See https://docs.microsoft.com/rest/api/searchservice/Custom-analyzers-in-Azure-Search#TokenFilters. STOPWORDS = "stopwords" #: Removes stop words from a token stream. See http://lucene.apache.org/core/4_10_3/analyzers-common/org/apache/lucene/analysis/core/StopFilter.html. TRIM = "trim" #: Trims leading and trailing whitespace from tokens. See http://lucene.apache.org/core/4_10_3/analyzers-common/org/apache/lucene/analysis/miscellaneous/TrimFilter.html. TRUNCATE = "truncate" #: Truncates the terms to a specific length. See http://lucene.apache.org/core/4_10_3/analyzers-common/org/apache/lucene/analysis/miscellaneous/TruncateTokenFilter.html. UNIQUE = "unique" #: Filters out tokens with same text as the previous token. See http://lucene.apache.org/core/4_10_3/analyzers-common/org/apache/lucene/analysis/miscellaneous/RemoveDuplicatesTokenFilter.html. UPPERCASE = "uppercase" #: Normalizes token text to upper case. See http://lucene.apache.org/core/4_10_3/analyzers-common/org/apache/lucene/analysis/core/UpperCaseFilter.html. WORD_DELIMITER = "word_delimiter" #: Splits words into subwords and performs optional transformations on subword groups.
[docs]class VisualFeature(with_metaclass(_CaseInsensitiveEnumMeta, str, Enum)): """The strings indicating what visual feature types to return. """ ADULT = "adult" #: Visual features recognized as adult persons. BRANDS = "brands" #: Visual features recognized as commercial brands. CATEGORIES = "categories" #: Categories. DESCRIPTION = "description" #: Description. FACES = "faces" #: Visual features recognized as people faces. OBJECTS = "objects" #: Visual features recognized as objects. TAGS = "tags" #: Tags.