Source code for azure.search.documents.indexes._internal._models

# -------------------------------------------------------------------------
# Copyright (c) Microsoft Corporation. All rights reserved.
# Licensed under the MIT License. See License.txt in the project root for
# license information.
# --------------------------------------------------------------------------
import msrest.serialization
from ._generated.models import (
    LexicalAnalyzer,
    LexicalTokenizer,
    AnalyzeRequest,
)


[docs]class AnalyzeTextOptions(msrest.serialization.Model): """Specifies some text and analysis components used to break that text into tokens. All required parameters must be populated in order to send to Azure. :param text: Required. The text to break into tokens. :type text: str :param analyzer_name: The name of the analyzer to use to break the given text. If this parameter is not specified, you must specify a tokenizer instead. The tokenizer and analyzer parameters are mutually exclusive. Possible values include: "ar.microsoft", "ar.lucene", "hy.lucene", "bn.microsoft", "eu.lucene", "bg.microsoft", "bg.lucene", "ca.microsoft", "ca.lucene", "zh- Hans.microsoft", "zh-Hans.lucene", "zh-Hant.microsoft", "zh-Hant.lucene", "hr.microsoft", "cs.microsoft", "cs.lucene", "da.microsoft", "da.lucene", "nl.microsoft", "nl.lucene", "en.microsoft", "en.lucene", "et.microsoft", "fi.microsoft", "fi.lucene", "fr.microsoft", "fr.lucene", "gl.lucene", "de.microsoft", "de.lucene", "el.microsoft", "el.lucene", "gu.microsoft", "he.microsoft", "hi.microsoft", "hi.lucene", "hu.microsoft", "hu.lucene", "is.microsoft", "id.microsoft", "id.lucene", "ga.lucene", "it.microsoft", "it.lucene", "ja.microsoft", "ja.lucene", "kn.microsoft", "ko.microsoft", "ko.lucene", "lv.microsoft", "lv.lucene", "lt.microsoft", "ml.microsoft", "ms.microsoft", "mr.microsoft", "nb.microsoft", "no.lucene", "fa.lucene", "pl.microsoft", "pl.lucene", "pt-BR.microsoft", "pt-BR.lucene", "pt- PT.microsoft", "pt-PT.lucene", "pa.microsoft", "ro.microsoft", "ro.lucene", "ru.microsoft", "ru.lucene", "sr-cyrillic.microsoft", "sr-latin.microsoft", "sk.microsoft", "sl.microsoft", "es.microsoft", "es.lucene", "sv.microsoft", "sv.lucene", "ta.microsoft", "te.microsoft", "th.microsoft", "th.lucene", "tr.microsoft", "tr.lucene", "uk.microsoft", "ur.microsoft", "vi.microsoft", "standard.lucene", "standardasciifolding.lucene", "keyword", "pattern", "simple", "stop", "whitespace". :type analyzer_name: str or ~azure.search.documents.indexes.models.LexicalAnalyzerName :param tokenizer_name: The name of the tokenizer to use to break the given text. If this parameter is not specified, you must specify an analyzer instead. The tokenizer and analyzer parameters are mutually exclusive. Possible values include: "classic", "edgeNGram", "keyword_v2", "letter", "lowercase", "microsoft_language_tokenizer", "microsoft_language_stemming_tokenizer", "nGram", "path_hierarchy_v2", "pattern", "standard_v2", "uax_url_email", "whitespace". :type tokenizer_name: str or ~azure.search.documents.indexes.models.LexicalTokenizerName :param token_filters: An optional list of token filters to use when breaking the given text. This parameter can only be set when using the tokenizer parameter. :type token_filters: list[str or ~azure.search.documents.indexes.models.TokenFilterName] :param char_filters: An optional list of character filters to use when breaking the given text. This parameter can only be set when using the tokenizer parameter. :type char_filters: list[str] """ _validation = { 'text': {'required': True}, } _attribute_map = { 'text': {'key': 'text', 'type': 'str'}, 'analyzer_name': {'key': 'analyzerName', 'type': 'str'}, 'tokenizer_name': {'key': 'tokenizerName', 'type': 'str'}, 'token_filters': {'key': 'tokenFilters', 'type': '[str]'}, 'char_filters': {'key': 'charFilters', 'type': '[str]'}, } def __init__( self, **kwargs ): super(AnalyzeTextOptions, self).__init__(**kwargs) self.text = kwargs['text'] self.analyzer_name = kwargs.get('analyzer_name', None) self.tokenizer_name = kwargs.get('tokenizer_name', None) self.token_filters = kwargs.get('token_filters', None) self.char_filters = kwargs.get('char_filters', None)
[docs] def to_analyze_request(self): return AnalyzeRequest( text=self.text, analyzer=self.analyzer_name, tokenizer=self.tokenizer_name, token_filters=self.token_filters, char_filters=self.char_filters )
[docs]class CustomAnalyzer(LexicalAnalyzer): """Allows you to take control over the process of converting text into indexable/searchable tokens. It's a user-defined configuration consisting of a single predefined tokenizer and one or more filters. The tokenizer is responsible for breaking text into tokens, and the filters for modifying tokens emitted by the tokenizer. All required parameters must be populated in order to send to Azure. :param odata_type: Required. Identifies the concrete type of the analyzer.Constant filled by server. :type odata_type: str :param name: Required. The name of the analyzer. It must only contain letters, digits, spaces, dashes or underscores, can only start and end with alphanumeric characters, and is limited to 128 characters. :type name: str :param tokenizer_name: Required. The name of the tokenizer to use to divide continuous text into a sequence of tokens, such as breaking a sentence into words. Possible values include: "classic", "edgeNGram", "keyword_v2", "letter", "lowercase", "microsoft_language_tokenizer", "microsoft_language_stemming_tokenizer", "nGram", "path_hierarchy_v2", "pattern", "standard_v2", "uax_url_email", "whitespace". :type tokenizer_name: str or ~azure.search.documents.indexes.models.LexicalTokenizerName :param token_filters: A list of token filters used to filter out or modify the tokens generated by a tokenizer. For example, you can specify a lowercase filter that converts all characters to lowercase. The filters are run in the order in which they are listed. :type token_filters: list[str or ~azure.search.documents.indexes.models.TokenFilterName] :param char_filters: A list of character filters used to prepare input text before it is processed by the tokenizer. For instance, they can replace certain characters or symbols. The filters are run in the order in which they are listed. :type char_filters: list[str] """ _validation = { 'odata_type': {'required': True}, 'name': {'required': True}, 'tokenizer_name': {'required': True}, } _attribute_map = { 'odata_type': {'key': '@odata\\.type', 'type': 'str'}, 'name': {'key': 'name', 'type': 'str'}, 'tokenizer_name': {'key': 'tokenizerName', 'type': 'str'}, 'token_filters': {'key': 'tokenFilters', 'type': '[str]'}, 'char_filters': {'key': 'charFilters', 'type': '[str]'}, } def __init__( self, **kwargs ): super(CustomAnalyzer, self).__init__(**kwargs) self.odata_type = '#Microsoft.Azure.Search.CustomAnalyzer' self.tokenizer_name = kwargs['tokenizer_name'] self.token_filters = kwargs.get('token_filters', None) self.char_filters = kwargs.get('char_filters', None)
[docs]class PatternAnalyzer(LexicalAnalyzer): """Flexibly separates text into terms via a regular expression. This analyzer is implemented using Apache Lucene. All required parameters must be populated in order to send to Azure. :param name: Required. The name of the analyzer. It must only contain letters, digits, spaces, dashes or underscores, can only start and end with alphanumeric characters, and is limited to 128 characters. :type name: str :param lower_case_terms: A value indicating whether terms should be lower-cased. Default is true. :type lower_case_terms: bool :param pattern: A regular expression to match token separators. Default is an expression that matches one or more white space characters. :type pattern: str :param flags: List of regular expression flags. Possible values of each flag include: 'CANON_EQ', 'CASE_INSENSITIVE', 'COMMENTS', 'DOTALL', 'LITERAL', 'MULTILINE', 'UNICODE_CASE', 'UNIX_LINES'. :type flags: list[str] or list[~search_service_client.models.RegexFlags] :param stopwords: A list of stopwords. :type stopwords: list[str] """ _validation = {"odata_type": {"required": True}, "name": {"required": True}} _attribute_map = { "odata_type": {"key": "@odata\\.type", "type": "str"}, "name": {"key": "name", "type": "str"}, "lower_case_terms": {"key": "lowercase", "type": "bool"}, "pattern": {"key": "pattern", "type": "str"}, "flags": {"key": "flags", "type": "[str]"}, "stopwords": {"key": "stopwords", "type": "[str]"}, } def __init__(self, **kwargs): super(PatternAnalyzer, self).__init__(**kwargs) self.odata_type = "#Microsoft.Azure.Search.PatternAnalyzer" self.lower_case_terms = kwargs.get("lower_case_terms", True) self.pattern = kwargs.get("pattern", r"\W+") self.flags = kwargs.get("flags", None) self.stopwords = kwargs.get("stopwords", None)
[docs]class PatternTokenizer(LexicalTokenizer): """Tokenizer that uses regex pattern matching to construct distinct tokens. This tokenizer is implemented using Apache Lucene. All required parameters must be populated in order to send to Azure. :param name: Required. The name of the tokenizer. It must only contain letters, digits, spaces, dashes or underscores, can only start and end with alphanumeric characters, and is limited to 128 characters. :type name: str :param pattern: A regular expression to match token separators. Default is an expression that matches one or more white space characters. :type pattern: str :param flags: List of regular expression flags. Possible values of each flag include: 'CANON_EQ', 'CASE_INSENSITIVE', 'COMMENTS', 'DOTALL', 'LITERAL', 'MULTILINE', 'UNICODE_CASE', 'UNIX_LINES'. :type flags: list[str] or list[~search_service_client.models.RegexFlags] :param group: The zero-based ordinal of the matching group in the regular expression to extract into tokens. Use -1 if you want to use the entire pattern to split the input into tokens, irrespective of matching groups. Default is -1. :type group: int """ _validation = {"odata_type": {"required": True}, "name": {"required": True}} _attribute_map = { "odata_type": {"key": "@odata\\.type", "type": "str"}, "name": {"key": "name", "type": "str"}, "pattern": {"key": "pattern", "type": "str"}, "flags": {"key": "flags", "type": "[str]"}, "group": {"key": "group", "type": "int"}, } def __init__(self, **kwargs): super(PatternTokenizer, self).__init__(**kwargs) self.odata_type = "#Microsoft.Azure.Search.PatternTokenizer" self.pattern = kwargs.get("pattern", r"\W+") self.flags = kwargs.get("flags", None) self.group = kwargs.get("group", -1)
[docs]class SearchResourceEncryptionKey(msrest.serialization.Model): """A customer-managed encryption key in Azure Key Vault. Keys that you create and manage can be used to encrypt or decrypt data-at-rest in Azure Cognitive Search, such as indexes and synonym maps. All required parameters must be populated in order to send to Azure. :param key_name: Required. The name of your Azure Key Vault key to be used to encrypt your data at rest. :type key_name: str :param key_version: Required. The version of your Azure Key Vault key to be used to encrypt your data at rest. :type key_version: str :param vault_uri: Required. The URI of your Azure Key Vault, also referred to as DNS name, that contains the key to be used to encrypt your data at rest. An example URI might be https://my- keyvault-name.vault.azure.net. :type vault_uri: str :param application_id: Required. An AAD Application ID that was granted the required access permissions to the Azure Key Vault that is to be used when encrypting your data at rest. The Application ID should not be confused with the Object ID for your AAD Application. :type application_id: str :param application_secret: The authentication key of the specified AAD application. :type application_secret: str """ _validation = { 'key_name': {'required': True}, 'key_version': {'required': True}, 'vault_uri': {'required': True}, } _attribute_map = { 'key_name': {'key': 'keyVaultKeyName', 'type': 'str'}, 'key_version': {'key': 'keyVaultKeyVersion', 'type': 'str'}, 'vault_uri': {'key': 'keyVaultUri', 'type': 'str'}, 'application_id': {'key': 'applicationId', 'type': 'str'}, 'application_secret': {'key': 'applicationSecret', 'type': 'str'}, } def __init__( self, **kwargs ): super(SearchResourceEncryptionKey, self).__init__(**kwargs) self.key_name = kwargs['key_name'] self.key_version = kwargs['key_version'] self.vault_uri = kwargs['vault_uri'] self.application_id = kwargs.get('application_id', None) self.application_secret = kwargs.get('application_secret', None)
[docs]class SynonymMap(msrest.serialization.Model): """Represents a synonym map definition. Variables are only populated by the server, and will be ignored when sending a request. All required parameters must be populated in order to send to Azure. :param name: Required. The name of the synonym map. :type name: str :ivar format: Required. The format of the synonym map. Only the 'solr' format is currently supported. Default value: "solr". :vartype format: str :param synonyms: Required. A series of synonym rules in the specified synonym map format. The rules must be separated by newlines. :type synonyms: str :param encryption_key: A description of an encryption key that you create in Azure Key Vault. This key is used to provide an additional level of encryption-at-rest for your data when you want full assurance that no one, not even Microsoft, can decrypt your data in Azure Cognitive Search. Once you have encrypted your data, it will always remain encrypted. Azure Cognitive Search will ignore attempts to set this property to null. You can change this property as needed if you want to rotate your encryption key; Your data will be unaffected. Encryption with customer-managed keys is not available for free search services, and is only available for paid services created on or after January 1, 2019. :type encryption_key: ~azure.search.documents.models.SearchResourceEncryptionKey :param e_tag: The ETag of the synonym map. :type e_tag: str """ _validation = { 'name': {'required': True}, 'format': {'required': True, 'constant': True}, 'synonyms': {'required': True}, } _attribute_map = { 'name': {'key': 'name', 'type': 'str'}, 'format': {'key': 'format', 'type': 'str'}, 'synonyms': {'key': 'synonyms', 'type': '[str]'}, 'encryption_key': {'key': 'encryptionKey', 'type': 'SearchResourceEncryptionKey'}, 'e_tag': {'key': '@odata\\.etag', 'type': 'str'}, } format = "solr" def __init__( self, **kwargs ): super(SynonymMap, self).__init__(**kwargs) self.name = kwargs['name'] self.synonyms = kwargs['synonyms'] self.encryption_key = kwargs.get('encryption_key', None) self.e_tag = kwargs.get('e_tag', None)
[docs]class SearchIndexerDataSourceConnection(msrest.serialization.Model): """Represents a datasource connection definition, which can be used to configure an indexer. All required parameters must be populated in order to send to Azure. :param name: Required. The name of the datasource connection. :type name: str :param description: The description of the datasource connection. :type description: str :param type: Required. The type of the datasource connection. Possible values include: "azuresql", "cosmosdb", "azureblob", "azuretable", "mysql". :type type: str or ~azure.search.documents.models.SearchIndexerDataSourceType :param connection_string: The connection string for the datasource connection. :type connection_string: str :param container: Required. The data container for the datasource connection. :type container: ~azure.search.documents.models.SearchIndexerDataContainer :param data_change_detection_policy: The data change detection policy for the datasource connection. :type data_change_detection_policy: ~azure.search.documents.models.DataChangeDetectionPolicy :param data_deletion_detection_policy: The data deletion detection policy for the datasource connection. :type data_deletion_detection_policy: ~azure.search.documents.models.DataDeletionDetectionPolicy :param e_tag: The ETag of the data source. :type e_tag: str """ _validation = { 'name': {'required': True}, 'type': {'required': True}, 'connection_string': {'required': True}, 'container': {'required': True}, } _attribute_map = { 'name': {'key': 'name', 'type': 'str'}, 'description': {'key': 'description', 'type': 'str'}, 'type': {'key': 'type', 'type': 'str'}, 'connection_string': {'key': 'connectionString', 'type': 'str'}, 'container': {'key': 'container', 'type': 'SearchIndexerDataContainer'}, 'data_change_detection_policy': {'key': 'dataChangeDetectionPolicy', 'type': 'DataChangeDetectionPolicy'}, 'data_deletion_detection_policy': {'key': 'dataDeletionDetectionPolicy', 'type': 'DataDeletionDetectionPolicy'}, 'e_tag': {'key': '@odata\\.etag', 'type': 'str'}, } def __init__( self, **kwargs ): super(SearchIndexerDataSourceConnection, self).__init__(**kwargs) self.name = kwargs['name'] self.description = kwargs.get('description', None) self.type = kwargs['type'] self.connection_string = kwargs['connection_string'] self.container = kwargs['container'] self.data_change_detection_policy = kwargs.get('data_change_detection_policy', None) self.data_deletion_detection_policy = kwargs.get('data_deletion_detection_policy', None) self.e_tag = kwargs.get('e_tag', None)