azure.ai.formrecognizer.aio package¶

Raises

New in version 2023-07-31: The features keyword argument.

Example:

Analyze an invoice. For more samples see the samples folder.¶

from azure.core.credentials import AzureKeyCredential
from azure.ai.formrecognizer.aio import DocumentAnalysisClient

endpoint = os.environ["AZURE_FORM_RECOGNIZER_ENDPOINT"]
key = os.environ["AZURE_FORM_RECOGNIZER_KEY"]

document_analysis_client = DocumentAnalysisClient(
    endpoint=endpoint, credential=AzureKeyCredential(key)
)
async with document_analysis_client:
    with open(path_to_sample_documents, "rb") as f:
        poller = await document_analysis_client.begin_analyze_document(
            "prebuilt-invoice", document=f, locale="en-US"
        )
    invoices = await poller.result()

for idx, invoice in enumerate(invoices.documents):
    print(f"--------Analyzing invoice #{idx + 1}--------")
    vendor_name = invoice.fields.get("VendorName")
    if vendor_name:
        print(
            f"Vendor Name: {vendor_name.value} has confidence: {vendor_name.confidence}"
        )
    vendor_address = invoice.fields.get("VendorAddress")
    if vendor_address:
        print(
            f"Vendor Address: {vendor_address.value} has confidence: {vendor_address.confidence}"
        )
    vendor_address_recipient = invoice.fields.get("VendorAddressRecipient")
    if vendor_address_recipient:
        print(
            f"Vendor Address Recipient: {vendor_address_recipient.value} has confidence: {vendor_address_recipient.confidence}"
        )
    customer_name = invoice.fields.get("CustomerName")
    if customer_name:
        print(
            f"Customer Name: {customer_name.value} has confidence: {customer_name.confidence}"
        )
    customer_id = invoice.fields.get("CustomerId")
    if customer_id:
        print(
            f"Customer Id: {customer_id.value} has confidence: {customer_id.confidence}"
        )
    customer_address = invoice.fields.get("CustomerAddress")
    if customer_address:
        print(
            f"Customer Address: {customer_address.value} has confidence: {customer_address.confidence}"
        )
    customer_address_recipient = invoice.fields.get("CustomerAddressRecipient")
    if customer_address_recipient:
        print(
            f"Customer Address Recipient: {customer_address_recipient.value} has confidence: {customer_address_recipient.confidence}"
        )
    invoice_id = invoice.fields.get("InvoiceId")
    if invoice_id:
        print(
            f"Invoice Id: {invoice_id.value} has confidence: {invoice_id.confidence}"
        )
    invoice_date = invoice.fields.get("InvoiceDate")
    if invoice_date:
        print(
            f"Invoice Date: {invoice_date.value} has confidence: {invoice_date.confidence}"
        )
    invoice_total = invoice.fields.get("InvoiceTotal")
    if invoice_total:
        print(
            f"Invoice Total: {invoice_total.value} has confidence: {invoice_total.confidence}"
        )
    due_date = invoice.fields.get("DueDate")
    if due_date:
        print(f"Due Date: {due_date.value} has confidence: {due_date.confidence}")
    purchase_order = invoice.fields.get("PurchaseOrder")
    if purchase_order:
        print(
            f"Purchase Order: {purchase_order.value} has confidence: {purchase_order.confidence}"
        )
    billing_address = invoice.fields.get("BillingAddress")
    if billing_address:
        print(
            f"Billing Address: {billing_address.value} has confidence: {billing_address.confidence}"
        )
    billing_address_recipient = invoice.fields.get("BillingAddressRecipient")
    if billing_address_recipient:
        print(
            f"Billing Address Recipient: {billing_address_recipient.value} has confidence: {billing_address_recipient.confidence}"
        )
    shipping_address = invoice.fields.get("ShippingAddress")
    if shipping_address:
        print(
            f"Shipping Address: {shipping_address.value} has confidence: {shipping_address.confidence}"
        )
    shipping_address_recipient = invoice.fields.get("ShippingAddressRecipient")
    if shipping_address_recipient:
        print(
            f"Shipping Address Recipient: {shipping_address_recipient.value} has confidence: {shipping_address_recipient.confidence}"
        )
    print("Invoice items:")
    for idx, item in enumerate(invoice.fields.get("Items").value):
        print(f"...Item #{idx + 1}")
        item_description = item.value.get("Description")
        if item_description:
            print(
                f"......Description: {item_description.value} has confidence: {item_description.confidence}"
            )
        item_quantity = item.value.get("Quantity")
        if item_quantity:
            print(
                f"......Quantity: {item_quantity.value} has confidence: {item_quantity.confidence}"
            )
        unit = item.value.get("Unit")
        if unit:
            print(f"......Unit: {unit.value} has confidence: {unit.confidence}")
        unit_price = item.value.get("UnitPrice")
        if unit_price:
            unit_price_code = unit_price.value.code if unit_price.value.code else ""
            print(
                f"......Unit Price: {unit_price.value}{unit_price_code} has confidence: {unit_price.confidence}"
            )
        product_code = item.value.get("ProductCode")
        if product_code:
            print(
                f"......Product Code: {product_code.value} has confidence: {product_code.confidence}"
            )
        item_date = item.value.get("Date")
        if item_date:
            print(
                f"......Date: {item_date.value} has confidence: {item_date.confidence}"
            )
        tax = item.value.get("Tax")
        if tax:
            print(f"......Tax: {tax.value} has confidence: {tax.confidence}")
        amount = item.value.get("Amount")
        if amount:
            print(
                f"......Amount: {amount.value} has confidence: {amount.confidence}"
            )
    subtotal = invoice.fields.get("SubTotal")
    if subtotal:
        print(f"Subtotal: {subtotal.value} has confidence: {subtotal.confidence}")
    total_tax = invoice.fields.get("TotalTax")
    if total_tax:
        print(
            f"Total Tax: {total_tax.value} has confidence: {total_tax.confidence}"
        )
    previous_unpaid_balance = invoice.fields.get("PreviousUnpaidBalance")
    if previous_unpaid_balance:
        print(
            f"Previous Unpaid Balance: {previous_unpaid_balance.value} has confidence: {previous_unpaid_balance.confidence}"
        )
    amount_due = invoice.fields.get("AmountDue")
    if amount_due:
        print(
            f"Amount Due: {amount_due.value} has confidence: {amount_due.confidence}"
        )
    service_start_date = invoice.fields.get("ServiceStartDate")
    if service_start_date:
        print(
            f"Service Start Date: {service_start_date.value} has confidence: {service_start_date.confidence}"
        )
    service_end_date = invoice.fields.get("ServiceEndDate")
    if service_end_date:
        print(
            f"Service End Date: {service_end_date.value} has confidence: {service_end_date.confidence}"
        )
    service_address = invoice.fields.get("ServiceAddress")
    if service_address:
        print(
            f"Service Address: {service_address.value} has confidence: {service_address.confidence}"
        )
    service_address_recipient = invoice.fields.get("ServiceAddressRecipient")
    if service_address_recipient:
        print(
            f"Service Address Recipient: {service_address_recipient.value} has confidence: {service_address_recipient.confidence}"
        )
    remittance_address = invoice.fields.get("RemittanceAddress")
    if remittance_address:
        print(
            f"Remittance Address: {remittance_address.value} has confidence: {remittance_address.confidence}"
        )
    remittance_address_recipient = invoice.fields.get("RemittanceAddressRecipient")
    if remittance_address_recipient:
        print(
            f"Remittance Address Recipient: {remittance_address_recipient.value} has confidence: {remittance_address_recipient.confidence}"
        )

Analyze a custom document. For more samples see the samples folder.¶

from azure.core.credentials import AzureKeyCredential
from azure.ai.formrecognizer.aio import DocumentAnalysisClient

endpoint = os.environ["AZURE_FORM_RECOGNIZER_ENDPOINT"]
key = os.environ["AZURE_FORM_RECOGNIZER_KEY"]
model_id = os.getenv("CUSTOM_BUILT_MODEL_ID", custom_model_id)

document_analysis_client = DocumentAnalysisClient(
    endpoint=endpoint, credential=AzureKeyCredential(key)
)

async with document_analysis_client:
    # Make sure your document's type is included in the list of document types the custom model can analyze
    with open(path_to_sample_documents, "rb") as f:
        poller = await document_analysis_client.begin_analyze_document(
            model_id=model_id, document=f
        )
    result = await poller.result()

for idx, document in enumerate(result.documents):
    print(f"--------Analyzing document #{idx + 1}--------")
    print(f"Document has type {document.doc_type}")
    print(f"Document has document type confidence {document.confidence}")
    print(f"Document was analyzed with model with ID {result.model_id}")
    for name, field in document.fields.items():
        field_value = field.value if field.value else field.content
        print(
            f"......found field of type '{field.value_type}' with value '{field_value}' and with confidence {field.confidence}"
        )

# iterate over tables, lines, and selection marks on each page
for page in result.pages:
    print(f"\nLines found on page {page.page_number}")
    for line in page.lines:
        print(f"...Line '{line.content}'")
    for word in page.words:
        print(f"...Word '{word.content}' has a confidence of {word.confidence}")
    if page.selection_marks:
        print(f"\nSelection marks found on page {page.page_number}")
        for selection_mark in page.selection_marks:
            print(
                f"...Selection mark is '{selection_mark.state}' and has a confidence of {selection_mark.confidence}"
            )

for i, table in enumerate(result.tables):
    print(f"\nTable {i + 1} can be found on page:")
    for region in table.bounding_regions:
        print(f"...{region.page_number}")
    for cell in table.cells:
        print(
            f"...Cell[{cell.row_index}][{cell.column_index}] has text '{cell.content}'"
        )
print("-----------------------------------")

async begin_analyze_document_from_url(model_id: str, document_url: str, **kwargs: Any) → azure.core.polling._async_poller.AsyncLROPoller[azure.ai.formrecognizer._models.AnalyzeResult][source]¶

Analyze field text and semantic values from a given document. The input must be the location (URL) of the document to be analyzed.

Parameters

model_id (str) – A unique model identifier can be passed in as a string. Use this to specify the custom model ID or prebuilt model ID. Prebuilt model IDs supported can be found here: https://aka.ms/azsdk/formrecognizer/models
document_url (str) – The URL of the document to analyze. The input must be a valid, properly encoded (i.e. encode special characters, such as empty spaces), and publicly accessible URL. For service supported file types, see: https://aka.ms/azsdk/formrecognizer/supportedfiles.

Keyword Arguments

pages (str) – Custom page numbers for multi-page documents(PDF/TIFF). Input the page numbers and/or ranges of pages you want to get in the result. For a range of pages, use a hyphen, like pages=”1-3, 5-6”. Separate each page number or range with a comma.
locale (str) – Locale hint of the input document. See supported locales here: https://aka.ms/azsdk/formrecognizer/supportedlocales.
features (list[str]) – Document analysis features to enable.

Returns

An instance of an AsyncLROPoller. Call result() on the poller object to return a AnalyzeResult.

Return type

Raises

New in version 2023-07-31: The features keyword argument.

Example:

Analyze a receipt. For more samples see the samples folder.¶

from azure.core.credentials import AzureKeyCredential
from azure.ai.formrecognizer.aio import DocumentAnalysisClient

endpoint = os.environ["AZURE_FORM_RECOGNIZER_ENDPOINT"]
key = os.environ["AZURE_FORM_RECOGNIZER_KEY"]

document_analysis_client = DocumentAnalysisClient(
    endpoint=endpoint, credential=AzureKeyCredential(key)
)
async with document_analysis_client:
    url = "https://raw.githubusercontent.com/Azure/azure-sdk-for-python/main/sdk/formrecognizer/azure-ai-formrecognizer/tests/sample_forms/receipt/contoso-receipt.png"
    poller = await document_analysis_client.begin_analyze_document_from_url(
        "prebuilt-receipt", document_url=url
    )
    receipts = await poller.result()

for idx, receipt in enumerate(receipts.documents):
    print(f"--------Analysis of receipt #{idx + 1}--------")
    print(f"Receipt type: {receipt.doc_type if receipt.doc_type else 'N/A'}")
    merchant_name = receipt.fields.get("MerchantName")
    if merchant_name:
        print(
            f"Merchant Name: {merchant_name.value} has confidence: "
            f"{merchant_name.confidence}"
        )
    transaction_date = receipt.fields.get("TransactionDate")
    if transaction_date:
        print(
            f"Transaction Date: {transaction_date.value} has confidence: "
            f"{transaction_date.confidence}"
        )
    if receipt.fields.get("Items"):
        print("Receipt items:")
        for idx, item in enumerate(receipt.fields.get("Items").value):
            print(f"...Item #{idx + 1}")
            item_description = item.value.get("Description")
            if item_description:
                print(
                    f"......Item Description: {item_description.value} has confidence: "
                    f"{item_description.confidence}"
                )
            item_quantity = item.value.get("Quantity")
            if item_quantity:
                print(
                    f"......Item Quantity: {item_quantity.value} has confidence: "
                    f"{item_quantity.confidence}"
                )
            item_price = item.value.get("Price")
            if item_price:
                print(
                    f"......Individual Item Price: {item_price.value} has confidence: "
                    f"{item_price.confidence}"
                )
            item_total_price = item.value.get("TotalPrice")
            if item_total_price:
                print(
                    f"......Total Item Price: {item_total_price.value} has confidence: "
                    f"{item_total_price.confidence}"
                )
    subtotal = receipt.fields.get("Subtotal")
    if subtotal:
        print(f"Subtotal: {subtotal.value} has confidence: {subtotal.confidence}")
    tax = receipt.fields.get("TotalTax")
    if tax:
        print(f"Total tax: {tax.value} has confidence: {tax.confidence}")
    tip = receipt.fields.get("Tip")
    if tip:
        print(f"Tip: {tip.value} has confidence: {tip.confidence}")
    total = receipt.fields.get("Total")
    if total:
        print(f"Total: {total.value} has confidence: {total.confidence}")
    print("--------------------------------------")

async begin_classify_document(classifier_id: str, document: Union[bytes, IO[bytes]], **kwargs: Any) → azure.core.polling._async_poller.AsyncLROPoller[azure.ai.formrecognizer._models.AnalyzeResult][source]¶

Classify a document using a document classifier. For more information on how to build a custom classifier model, see https://aka.ms/azsdk/formrecognizer/buildclassifiermodel.

Parameters

classifier_id (str) – A unique document classifier identifier can be passed in as a string.
document (bytes or IO[bytes]) – File stream or bytes. For service supported file types, see: https://aka.ms/azsdk/formrecognizer/supportedfiles.

Returns

An instance of an AsyncLROPoller. Call result() on the poller object to return a AnalyzeResult.

Return type

Raises

New in version 2023-07-31: The begin_classify_document client method.

Example:

Classify a document. For more samples see the samples folder.¶

from azure.core.credentials import AzureKeyCredential
from azure.ai.formrecognizer.aio import DocumentAnalysisClient

endpoint = os.environ["AZURE_FORM_RECOGNIZER_ENDPOINT"]
key = os.environ["AZURE_FORM_RECOGNIZER_KEY"]
classifier_id = os.getenv("CLASSIFIER_ID", classifier_id)

document_analysis_client = DocumentAnalysisClient(
    endpoint=endpoint, credential=AzureKeyCredential(key)
)
async with document_analysis_client:
    with open(path_to_sample_documents, "rb") as f:
        poller = await document_analysis_client.begin_classify_document(
            classifier_id, document=f
        )
    result = await poller.result()

print("----Classified documents----")
for doc in result.documents:
    print(
        f"Found document of type '{doc.doc_type or 'N/A'}' with a confidence of {doc.confidence} contained on "
        f"the following pages: {[region.page_number for region in doc.bounding_regions]}"
    )

async begin_classify_document_from_url(classifier_id: str, document_url: str, **kwargs: Any) → azure.core.polling._async_poller.AsyncLROPoller[azure.ai.formrecognizer._models.AnalyzeResult][source]¶

Classify a given document with a document classifier. For more information on how to build a custom classifier model, see https://aka.ms/azsdk/formrecognizer/buildclassifiermodel. The input must be the location (URL) of the document to be classified.

Parameters

classifier_id (str) – A unique document classifier identifier can be passed in as a string.
document_url (str) – The URL of the document to classify. The input must be a valid, properly encoded (i.e. encode special characters, such as empty spaces), and publicly accessible URL of one of the supported formats: https://aka.ms/azsdk/formrecognizer/supportedfiles.

Returns

An instance of an AsyncLROPoller. Call result() on the poller object to return a AnalyzeResult.

Return type

Raises

AsyncDocumentModelAdministrationLROPoller[DocumentClassifierDetails]

New in version 2023-07-31: The begin_classify_document_from_url client method.

Example:

Classify a document. For more samples see the samples folder.¶

from azure.core.credentials import AzureKeyCredential
from azure.ai.formrecognizer.aio import DocumentAnalysisClient

endpoint = os.environ["AZURE_FORM_RECOGNIZER_ENDPOINT"]
key = os.environ["AZURE_FORM_RECOGNIZER_KEY"]
classifier_id = os.getenv("CLASSIFIER_ID", classifier_id)

document_analysis_client = DocumentAnalysisClient(
    endpoint=endpoint, credential=AzureKeyCredential(key)
)
async with document_analysis_client:
    url = "https://raw.githubusercontent.com/Azure/azure-sdk-for-python/main/sdk/formrecognizer/azure-ai-formrecognizer/tests/sample_forms/forms/IRS-1040.pdf"

    poller = await document_analysis_client.begin_classify_document_from_url(
        classifier_id, document_url=url
    )
    result = await poller.result()

print("----Classified documents----")
for doc in result.documents:
    print(
        f"Found document of type '{doc.doc_type or 'N/A'}' with a confidence of {doc.confidence} contained on "
        f"the following pages: {[region.page_number for region in doc.bounding_regions]}"
    )

async close() → None [source]¶: Close the DocumentAnalysisClient session.

class azure.ai.formrecognizer.aio.DocumentModelAdministrationClient(endpoint: str, credential: Union[azure.core.credentials.AzureKeyCredential, azure.core.credentials_async.AsyncTokenCredential], **kwargs: Any)[source]¶

DocumentModelAdministrationClient is the Form Recognizer interface to use for building and managing models.

It provides methods for building models and classifiers, as well as methods for viewing and deleting models and classifiers, viewing model and classifier operations, accessing account information, copying models to another Form Recognizer resource, and composing a new model from a collection of existing models.

Note

DocumentModelAdministrationClient should be used with API versions 2022-08-31 and up. To use API versions <=v2.1, instantiate a FormTrainingClient.

Parameters

endpoint (str) – Supported Cognitive Services endpoints (protocol and hostname, for example: https://westus2.api.cognitive.microsoft.com).
credential (AzureKeyCredential or TokenCredential) – Credentials needed for the client to connect to Azure. This is an instance of AzureKeyCredential if using an API key or a token credential from azure.identity.

Keyword Arguments

api_version (str or DocumentAnalysisApiVersion) – The API version of the service to use for requests. It defaults to the latest service version. Setting to an older version may result in reduced feature compatibility. To use API versions <=v2.1, instantiate a FormTrainingClient.

New in version 2022-08-31: The DocumentModelAdministrationClient and its client methods.

Example:

Creating the DocumentModelAdministrationClient with an endpoint and API key.¶

from azure.core.credentials import AzureKeyCredential
from azure.ai.formrecognizer.aio import DocumentModelAdministrationClient

endpoint = os.environ["AZURE_FORM_RECOGNIZER_ENDPOINT"]
key = os.environ["AZURE_FORM_RECOGNIZER_KEY"]

document_model_admin_client = DocumentModelAdministrationClient(
    endpoint, AzureKeyCredential(key)
)

Creating the DocumentModelAdministrationClient with a token credential.¶

"""DefaultAzureCredential will use the values from these environment
variables: AZURE_CLIENT_ID, AZURE_TENANT_ID, AZURE_CLIENT_SECRET
"""
from azure.ai.formrecognizer.aio import DocumentModelAdministrationClient
from azure.identity.aio import DefaultAzureCredential

endpoint = os.environ["AZURE_FORM_RECOGNIZER_ENDPOINT"]
credential = DefaultAzureCredential()

document_model_admin_client = DocumentModelAdministrationClient(
    endpoint, credential
)

async begin_build_document_classifier(doc_types: Mapping[str, azure.ai.formrecognizer._models.ClassifierDocumentTypeDetails], *, classifier_id: Optional[str] = None, description: Optional[str] = None, **kwargs: Any) → azure.ai.formrecognizer.aio._async_polling.AsyncDocumentModelAdministrationLROPoller[azure.ai.formrecognizer._models.DocumentClassifierDetails][source]¶

Build a document classifier. For more information on how to build and train a custom classifier model, see https://aka.ms/azsdk/formrecognizer/buildclassifiermodel.

Parameters

doc_types (Mapping[str, ClassifierDocumentTypeDetails]) – Mapping of document types to classify against.

Keyword Arguments

classifier_id (str) – Unique document classifier name. If not specified, a classifier ID will be created for you.
description (str) – Document classifier description.

Returns

An instance of an AsyncDocumentModelAdministrationLROPoller. Call result() on the poller object to return a DocumentClassifierDetails.

Return type

Raises

AsyncDocumentModelAdministrationLROPoller[DocumentModelDetails]

New in version 2023-07-31: The begin_build_document_classifier client method.

Example:

Build a document classifier.¶

import os
from azure.ai.formrecognizer.aio import DocumentModelAdministrationClient
from azure.ai.formrecognizer import (
    ClassifierDocumentTypeDetails,
    BlobSource,
    BlobFileListSource,
)
from azure.core.credentials import AzureKeyCredential

endpoint = os.environ["AZURE_FORM_RECOGNIZER_ENDPOINT"]
key = os.environ["AZURE_FORM_RECOGNIZER_KEY"]
container_sas_url = os.environ["CLASSIFIER_CONTAINER_SAS_URL"]

document_model_admin_client = DocumentModelAdministrationClient(
    endpoint=endpoint, credential=AzureKeyCredential(key)
)

async with document_model_admin_client:
    poller = await document_model_admin_client.begin_build_document_classifier(
        doc_types={
            "IRS-1040-A": ClassifierDocumentTypeDetails(
                source=BlobSource(
                    container_url=container_sas_url, prefix="IRS-1040-A/train"
                )
            ),
            "IRS-1040-D": ClassifierDocumentTypeDetails(
                source=BlobFileListSource(
                    container_url=container_sas_url, file_list="IRS-1040-D.jsonl"
                )
            ),
        },
        description="IRS document classifier",
    )
    result = await poller.result()
    print(f"Classifier ID: {result.classifier_id}")
    print(f"API version used to build the classifier model: {result.api_version}")
    print(f"Classifier description: {result.description}")
    print(f"Document classes used for training the model:")
    for doc_type, details in result.doc_types.items():
        print(f"Document type: {doc_type}")
        print(f"Container source: {details.source.container_url}\n")

async begin_build_document_model(build_mode: Union[str, ModelBuildMode], *, blob_container_url: str, prefix: Optional[str] = 'None', model_id: Optional[str] = 'None', description: Optional[str] = 'None', tags: Optional[Mapping[str, str]] = 'None', **kwargs: Any) → AsyncDocumentModelAdministrationLROPoller[DocumentModelDetails][source]¶

async begin_build_document_model(build_mode: Union[str, ModelBuildMode], *, blob_container_url: str, file_list: str, model_id: Optional[str] = 'None', description: Optional[str] = 'None', tags: Optional[Mapping[str, str]] = 'None', **kwargs: Any) → AsyncDocumentModelAdministrationLROPoller[DocumentModelDetails]

Build a custom document model.

The request must include a blob_container_url keyword parameter that is an externally accessible Azure storage blob container URI (preferably a Shared Access Signature URI). Note that a container URI (without SAS) is accepted only when the container is public or has a managed identity configured, see more about configuring managed identities to work with Form Recognizer here: https://docs.microsoft.com/azure/applied-ai-services/form-recognizer/managed-identities. Models are built using documents that are of the following content type - ‘application/pdf’, ‘image/jpeg’, ‘image/png’, ‘image/tiff’, ‘image/bmp’, or ‘image/heif’. Other types of content in the container is ignored.

Parameters

build_mode (str or ModelBuildMode) – The custom model build mode. Possible values include: “template”, “neural”. For more information about build modes, see: https://aka.ms/azsdk/formrecognizer/buildmode.

Keyword Arguments

blob_container_url (str) – An Azure Storage blob container’s SAS URI. A container URI (without SAS) can be used if the container is public or has a managed identity configured. For more information on setting up a training data set, see: https://aka.ms/azsdk/formrecognizer/buildtrainingset.
model_id (str) – A unique ID for your model. If not specified, a model ID will be created for you.
description (str) – An optional description to add to the model.
prefix (str) – A case-sensitive prefix string to filter documents in the blob container url path. For example, when using an Azure storage blob URI, use the prefix to restrict sub folders. prefix should end in ‘/’ to avoid cases where filenames share the same prefix.
file_list (str) – Path to a JSONL file within the container specifying a subset of documents for training.
tags (dict[str, str]) – List of user defined key-value tag attributes associated with the model.

Returns

An instance of an AsyncDocumentModelAdministrationLROPoller. Call result() on the poller object to return a DocumentModelDetails.

Return type

Raises

AsyncDocumentModelAdministrationLROPoller[DocumentModelDetails]

New in version 2023-07-31: The file_list keyword argument.

Example:

Building a model from training files.¶

from azure.ai.formrecognizer.aio import DocumentModelAdministrationClient
from azure.ai.formrecognizer import ModelBuildMode
from azure.core.credentials import AzureKeyCredential

endpoint = os.environ["AZURE_FORM_RECOGNIZER_ENDPOINT"]
key = os.environ["AZURE_FORM_RECOGNIZER_KEY"]
container_sas_url = os.environ["CONTAINER_SAS_URL"]

document_model_admin_client = DocumentModelAdministrationClient(
    endpoint, AzureKeyCredential(key)
)
async with document_model_admin_client:
    poller = await document_model_admin_client.begin_build_document_model(
        ModelBuildMode.TEMPLATE,
        blob_container_url=container_sas_url,
        description="my model description",
    )
    model = await poller.result()

print(f"Model ID: {model.model_id}")
print(f"Description: {model.description}")
print(f"Model created on: {model.created_on}")
print(f"Model expires on: {model.expires_on}")
print("Doc types the model can recognize:")
for name, doc_type in model.doc_types.items():
    print(
        f"Doc Type: '{name}' built with '{doc_type.build_mode}' mode which has the following fields:"
    )
    for field_name, field in doc_type.field_schema.items():
        print(
            f"Field: '{field_name}' has type '{field['type']}' and confidence score "
            f"{doc_type.field_confidence[field_name]}"
        )

async begin_compose_document_model(component_model_ids: List[str], **kwargs: Any) → azure.ai.formrecognizer.aio._async_polling.AsyncDocumentModelAdministrationLROPoller[azure.ai.formrecognizer._models.DocumentModelDetails][source]¶

Creates a composed document model from a collection of existing models.

A composed model allows multiple models to be called with a single model ID. When a document is submitted to be analyzed with a composed model ID, a classification step is first performed to route it to the correct custom model.

Parameters

component_model_ids (list[str]) – List of model IDs to use in the composed model.

Keyword Arguments

model_id (str) – A unique ID for your composed model. If not specified, a model ID will be created for you.
description (str) – An optional description to add to the model.
tags (dict[str, str]) – List of user defined key-value tag attributes associated with the model.

Returns

An instance of an AsyncDocumentModelAdministrationLROPoller. Call result() on the poller object to return a DocumentModelDetails.

Return type

Raises

AsyncDocumentModelAdministrationLROPoller[DocumentModelDetails]

Example:

Creating a composed model with existing models.¶

from azure.core.credentials import AzureKeyCredential
from azure.ai.formrecognizer.aio import DocumentModelAdministrationClient
from azure.ai.formrecognizer import ModelBuildMode

endpoint = os.environ["AZURE_FORM_RECOGNIZER_ENDPOINT"]
key = os.environ["AZURE_FORM_RECOGNIZER_KEY"]
po_supplies = os.environ["PURCHASE_ORDER_OFFICE_SUPPLIES_SAS_URL"]
po_equipment = os.environ["PURCHASE_ORDER_OFFICE_EQUIPMENT_SAS_URL"]
po_furniture = os.environ["PURCHASE_ORDER_OFFICE_FURNITURE_SAS_URL"]
po_cleaning_supplies = os.environ["PURCHASE_ORDER_OFFICE_CLEANING_SUPPLIES_SAS_URL"]

document_model_admin_client = DocumentModelAdministrationClient(
    endpoint=endpoint, credential=AzureKeyCredential(key)
)
async with document_model_admin_client:
    supplies_poller = await document_model_admin_client.begin_build_document_model(
        ModelBuildMode.TEMPLATE,
        blob_container_url=po_supplies,
        description="Purchase order-Office supplies",
    )
    equipment_poller = await document_model_admin_client.begin_build_document_model(
        ModelBuildMode.TEMPLATE,
        blob_container_url=po_equipment,
        description="Purchase order-Office Equipment",
    )
    furniture_poller = await document_model_admin_client.begin_build_document_model(
        ModelBuildMode.TEMPLATE,
        blob_container_url=po_furniture,
        description="Purchase order-Furniture",
    )
    cleaning_supplies_poller = (
        await document_model_admin_client.begin_build_document_model(
            ModelBuildMode.TEMPLATE,
            blob_container_url=po_cleaning_supplies,
            description="Purchase order-Cleaning Supplies",
        )
    )
    supplies_model = await supplies_poller.result()
    equipment_model = await equipment_poller.result()
    furniture_model = await furniture_poller.result()
    cleaning_supplies_model = await cleaning_supplies_poller.result()

    purchase_order_models = [
        supplies_model.model_id,
        equipment_model.model_id,
        furniture_model.model_id,
        cleaning_supplies_model.model_id,
    ]

    poller = await document_model_admin_client.begin_compose_document_model(
        purchase_order_models, description="Office Supplies Composed Model"
    )
    model = await poller.result()

print("Office Supplies Composed Model Info:")
print(f"Model ID: {model.model_id}")
print(f"Description: {model.description}")
print(f"Model created on: {model.created_on}")
print(f"Model expires on: {model.expires_on}")
print("Doc types the model can recognize:")
for name, doc_type in model.doc_types.items():
    print(f"Doc Type: '{name}' which has the following fields:")
    for field_name, field in doc_type.field_schema.items():
        print(
            f"Field: '{field_name}' has type '{field['type']}' and confidence score "
            f"{doc_type.field_confidence[field_name]}"
        )

async begin_copy_document_model_to(model_id: str, target: TargetAuthorization, **kwargs: Any) → azure.ai.formrecognizer.aio._async_polling.AsyncDocumentModelAdministrationLROPoller[azure.ai.formrecognizer._models.DocumentModelDetails][source]¶

Copy a document model stored in this resource (the source) to the user specified target Form Recognizer resource.

This should be called with the source Form Recognizer resource (with the model that is intended to be copied). The target parameter should be supplied from the target resource’s output from calling the get_copy_authorization() method.

Parameters

model_id (str) – Model identifier of the model to copy to target resource.
target (TargetAuthorization) – The copy authorization generated from the target resource’s call to get_copy_authorization().

Returns

An instance of a AsyncDocumentModelAdministrationLROPoller. Call result() on the poller object to return a DocumentModelDetails.

Return type

Raises

Example:

Copy a model from the source resource to the target resource¶

from azure.core.credentials import AzureKeyCredential
from azure.ai.formrecognizer.aio import DocumentModelAdministrationClient

source_endpoint = os.environ["AZURE_FORM_RECOGNIZER_SOURCE_ENDPOINT"]
source_key = os.environ["AZURE_FORM_RECOGNIZER_SOURCE_KEY"]
target_endpoint = os.environ["AZURE_FORM_RECOGNIZER_TARGET_ENDPOINT"]
target_key = os.environ["AZURE_FORM_RECOGNIZER_TARGET_KEY"]
source_model_id = os.getenv("AZURE_SOURCE_MODEL_ID", custom_model_id)

target_client = DocumentModelAdministrationClient(
    endpoint=target_endpoint, credential=AzureKeyCredential(target_key)
)
async with target_client:
    target = await target_client.get_copy_authorization(
        description="model copied from other resource"
    )

source_client = DocumentModelAdministrationClient(
    endpoint=source_endpoint, credential=AzureKeyCredential(source_key)
)
async with source_client:
    poller = await source_client.begin_copy_document_model_to(
        model_id=source_model_id,
        target=target,  # output from target client's call to get_copy_authorization()
    )
    copied_over_model = await poller.result()

print(f"Model ID: {copied_over_model.model_id}")
print(f"Description: {copied_over_model.description}")
print(f"Model created on: {copied_over_model.created_on}")
print(f"Model expires on: {copied_over_model.expires_on}")
print("Doc types the model can recognize:")
for name, doc_type in copied_over_model.doc_types.items():
    print(f"Doc Type: '{name}' which has the following fields:")
    for field_name, field in doc_type.field_schema.items():
        print(
            f"Field: '{field_name}' has type '{field['type']}' and confidence score "
            f"{doc_type.field_confidence[field_name]}"
        )

async close() → None [source]¶: Close the DocumentModelAdministrationClient session.

async delete_document_classifier(classifier_id: str, **kwargs: Any) → None [source]¶

Delete a document classifier.

Parameters: classifier_id (str) – Classifier identifier.
Returns: None
Return type: None
Raises: HttpResponseError or ResourceNotFoundError –

New in version 2023-07-31: The delete_document_classifier client method.

Example:

Delete a classifier.¶

await document_model_admin_client.delete_document_classifier(
    classifier_id=my_classifier.classifier_id
)

try:
    await document_model_admin_client.get_document_classifier(
        classifier_id=my_classifier.classifier_id
    )
except ResourceNotFoundError:
    print(
        f"Successfully deleted classifier with ID {my_classifier.classifier_id}"
    )

async delete_document_model(model_id: str, **kwargs: Any) → None [source]¶

Delete a custom document model.

Parameters: model_id (str) – Model identifier.
Returns: None
Return type: None
Raises: HttpResponseError or ResourceNotFoundError –

Example:

Delete a model.¶

await document_model_admin_client.delete_document_model(
    model_id=my_model.model_id
)

try:
    await document_model_admin_client.get_document_model(
        model_id=my_model.model_id
    )
except ResourceNotFoundError:
    print(f"Successfully deleted model with ID {my_model.model_id}")

async get_copy_authorization(**kwargs: Any) → TargetAuthorization[source]¶

Generate authorization for copying a custom model into the target Form Recognizer resource.

This should be called by the target resource (where the model will be copied to) and the output can be passed as the target parameter into begin_copy_document_model_to().

Keyword Arguments

model_id (str) – A unique ID for your copied model. If not specified, a model ID will be created for you.
description (str) – An optional description to add to the model.
tags (dict[str, str]) – List of user defined key-value tag attributes associated with the model.

Returns

A dictionary with values necessary for the copy authorization.

Return type

TargetAuthorization

Raises

get_document_analysis_client(**kwargs: Any) → azure.ai.formrecognizer.aio._document_analysis_client_async.DocumentAnalysisClient[source]¶

Get an instance of a DocumentAnalysisClient from DocumentModelAdministrationClient.

Return type: DocumentAnalysisClient
Returns: A DocumentAnalysisClient

async get_document_classifier(classifier_id: str, **kwargs: Any) → azure.ai.formrecognizer._models.DocumentClassifierDetails[source]¶

Get a document classifier by its ID.

Parameters: classifier_id (str) – Classifier identifier.
Returns: DocumentClassifierDetails
Return type: DocumentClassifierDetails
Raises: HttpResponseError or ResourceNotFoundError –

New in version 2023-07-31: The get_document_classifier client method.

Example:

Get a classifier by its ID.¶

my_classifier = await document_model_admin_client.get_document_classifier(
    classifier_id=classifier_model.classifier_id
)
print(f"\nClassifier ID: {my_classifier.classifier_id}")
print(f"Description: {my_classifier.description}")
print(f"Classifier created on: {my_classifier.created_on}")

async get_document_model(model_id: str, **kwargs: Any) → azure.ai.formrecognizer._models.DocumentModelDetails[source]¶

Get a document model by its ID.

Parameters: model_id (str) – Model identifier.
Returns: DocumentModelDetails
Return type: DocumentModelDetails
Raises: HttpResponseError or ResourceNotFoundError –

Example:

Get a model by its ID.¶

my_model = await document_model_admin_client.get_document_model(
    model_id=model.model_id
)
print(f"\nModel ID: {my_model.model_id}")
print(f"Description: {my_model.description}")
print(f"Model created on: {my_model.created_on}")
print(f"Model expires on: {my_model.expires_on}")

async get_operation(operation_id: str, **kwargs: Any) → azure.ai.formrecognizer._models.OperationDetails[source]¶

Get an operation by its ID.

Get an operation associated with the Form Recognizer resource. Note that operation information only persists for 24 hours. If the document model operation was successful, the model can be accessed using the get_document_model() or list_document_models() APIs.

Parameters: operation_id (str) – The operation ID.
Returns: OperationDetails
Return type: OperationDetails
Raises: HttpResponseError –

Example:

Get a document model operation by its ID.¶

# Get an operation by ID
try:
    first_operation = await operations.__anext__()

    print(f"\nGetting operation info by ID: {first_operation.operation_id}")
    operation_info = await document_model_admin_client.get_operation(
        first_operation.operation_id
    )
    if operation_info.status == "succeeded":
        print(f"My {operation_info.kind} operation is completed.")
        result = operation_info.result
        if result is not None:
            if operation_info.kind == "documentClassifierBuild":
                print(f"Classifier ID: {result.classifier_id}")
            else:
                print(f"Model ID: {result.model_id}")
    elif operation_info.status == "failed":
        print(f"My {operation_info.kind} operation failed.")
        error = operation_info.error
        if error is not None:
            print(f"{error.code}: {error.message}")
    else:
        print(f"My operation status is {operation_info.status}")
except StopAsyncIteration:
    print("No operations found.")

async get_resource_details(**kwargs: Any) → azure.ai.formrecognizer._models.ResourceDetails[source]¶

Get information about the models under the Form Recognizer resource.

Returns: Summary of custom models under the resource - model count and limit.
Return type: ResourceDetails
Raises: HttpResponseError –

Example:

Get model counts and limits under the Form Recognizer resource.¶

document_model_admin_client = DocumentModelAdministrationClient(
    endpoint=endpoint, credential=AzureKeyCredential(key)
)

async with document_model_admin_client:
    account_details = await document_model_admin_client.get_resource_details()
    print(
        f"Our resource has {account_details.custom_document_models.count} custom models, "
        f"and we can have at most {account_details.custom_document_models.limit} custom models"
    )
    neural_models = account_details.neural_document_model_quota
    print(
        f"The quota limit for custom neural document models is {neural_models.quota} and the resource has"
        f"used {neural_models.used}. The resource quota will reset on {neural_models.quota_resets_on}"
    )

list_document_classifiers(**kwargs: Any) → azure.core.async_paging.AsyncItemPaged[azure.ai.formrecognizer._models.DocumentClassifierDetails][source]¶

List information for each document classifier, including its classifier ID, description, and when it was created.

Returns: Pageable of DocumentClassifierDetails.
Return type: AsyncItemPaged[DocumentClassifierDetails]
Raises: HttpResponseError –

New in version 2023-07-31: The list_document_classifiers client method.

Example:

List all classifiers that were built successfully under the Form Recognizer resource.¶

classifiers = document_model_admin_client.list_document_classifiers()

print("We have the following 'ready' models with IDs and descriptions:")
async for classifier in classifiers:
    print(f"{classifier.classifier_id} | {classifier.description}")

list_document_models(**kwargs: Any) → azure.core.async_paging.AsyncItemPaged[azure.ai.formrecognizer._models.DocumentModelSummary][source]¶

List information for each model, including its model ID, description, and when it was created.

Returns: Pageable of DocumentModelSummary.
Return type: AsyncItemPaged[DocumentModelSummary]
Raises: HttpResponseError –

Example:

List all models that were built successfully under the Form Recognizer resource.¶

models = document_model_admin_client.list_document_models()

print("We have the following 'ready' models with IDs and descriptions:")
async for model in models:
    print(f"{model.model_id} | {model.description}")

list_operations(**kwargs: Any) → azure.core.async_paging.AsyncItemPaged[azure.ai.formrecognizer._models.OperationSummary][source]¶

List information for each operation.

Lists all operations associated with the Form Recognizer resource. Note that operation information only persists for 24 hours. If the document model operation was successful, the document model can be accessed using the get_document_model() or list_document_models() APIs.

Returns: A pageable of OperationSummary.
Return type: AsyncItemPaged[OperationSummary]
Raises: HttpResponseError –

Example:

List all document model operations in the past 24 hours.¶

from azure.core.credentials import AzureKeyCredential
from azure.ai.formrecognizer.aio import DocumentModelAdministrationClient

endpoint = os.environ["AZURE_FORM_RECOGNIZER_ENDPOINT"]
key = os.environ["AZURE_FORM_RECOGNIZER_KEY"]

document_model_admin_client = DocumentModelAdministrationClient(
    endpoint=endpoint, credential=AzureKeyCredential(key)
)

async with document_model_admin_client:
    operations = document_model_admin_client.list_operations()

    print("The following document model operations exist under my resource:")
    async for operation in operations:
        print(f"\nOperation ID: {operation.operation_id}")
        print(f"Operation kind: {operation.kind}")
        print(f"Operation status: {operation.status}")
        print(f"Operation percent completed: {operation.percent_completed}")
        print(f"Operation created on: {operation.created_on}")
        print(f"Operation last updated on: {operation.last_updated_on}")
        print(
            f"Resource location of successful operation: {operation.resource_location}"
        )

class azure.ai.formrecognizer.aio.FormRecognizerClient(endpoint: str, credential: Union[azure.core.credentials.AzureKeyCredential, azure.core.credentials_async.AsyncTokenCredential], **kwargs: Any)[source]¶

FormRecognizerClient extracts information from forms and images into structured data. It is the interface to use for analyzing with prebuilt models (receipts, business cards, invoices, identity documents), recognizing content/layout from forms, and analyzing custom forms from trained models. It provides different methods based on inputs from a URL and inputs from a stream.

Note

FormRecognizerClient should be used with API versions <=v2.1. To use API versions 2022-08-31 and up, instantiate a DocumentAnalysisClient.

Parameters

endpoint (str) – Supported Cognitive Services endpoints (protocol and hostname, for example: https://westus2.api.cognitive.microsoft.com).
credential (AzureKeyCredential or AsyncTokenCredential) – Credentials needed for the client to connect to Azure. This is an instance of AzureKeyCredential if using an API key or a token credential from azure.identity.

Keyword Arguments

api_version (str or FormRecognizerApiVersion) – The API version of the service to use for requests. It defaults to API version v2.1. Setting to an older version may result in reduced feature compatibility. To use the latest supported API version and features, instantiate a DocumentAnalysisClient instead.

Example:

Creating the FormRecognizerClient with an endpoint and API key.¶

from azure.core.credentials import AzureKeyCredential
from azure.ai.formrecognizer.aio import FormRecognizerClient
endpoint = os.environ["AZURE_FORM_RECOGNIZER_ENDPOINT"]
key = os.environ["AZURE_FORM_RECOGNIZER_KEY"]

form_recognizer_client = FormRecognizerClient(endpoint, AzureKeyCredential(key))

Creating the FormRecognizerClient with a token credential.¶

"""DefaultAzureCredential will use the values from these environment
variables: AZURE_CLIENT_ID, AZURE_TENANT_ID, AZURE_CLIENT_SECRET
"""
from azure.ai.formrecognizer.aio import FormRecognizerClient
from azure.identity.aio import DefaultAzureCredential

endpoint = os.environ["AZURE_FORM_RECOGNIZER_ENDPOINT"]
credential = DefaultAzureCredential()

form_recognizer_client = FormRecognizerClient(endpoint, credential)

async begin_recognize_business_cards(business_card: Union[bytes, IO[bytes]], **kwargs: Any) → azure.core.polling._async_poller.AsyncLROPoller[List[azure.ai.formrecognizer._models.RecognizedForm]][source]¶

Extract field text and semantic values from a given business card. The input document must be of one of the supported content types - ‘application/pdf’, ‘image/jpeg’, ‘image/png’, ‘image/tiff’ or ‘image/bmp’.

See fields found on a business card here: https://aka.ms/formrecognizer/businesscardfields

Parameters

business_card (bytes or IO[bytes]) – JPEG, PNG, PDF, TIFF, or BMP type file stream or bytes.

Keyword Arguments

locale (str) – Locale of the business card. Supported locales include: en-US, en-AU, en-CA, en-GB, and en-IN.
include_field_elements (bool) – Whether or not to include all lines per page and field elements such as lines, words, and selection marks for each form field.
content_type (str or FormContentType) – Content-type of the body sent to the API. Content-type is auto-detected, but can be overridden by passing this keyword argument. For options, see FormContentType.
pages (list[str]) – Custom page numbers for multi-page documents(PDF/TIFF). Input the page numbers and/or ranges of pages you want to get in the result. For a range of pages, use a hyphen, like pages=[“1-3”, “5-6”]. Separate each page number or range with a comma.
continuation_token (str) – A continuation token to restart a poller from a saved state.

Returns

An instance of an AsyncLROPoller. Call result() on the poller object to return a list[RecognizedForm].

Return type

Raises

New in version v2.1: The begin_recognize_business_cards client method

Example:

Recognize business cards from a file.¶

from azure.core.credentials import AzureKeyCredential
from azure.ai.formrecognizer.aio import FormRecognizerClient

endpoint = os.environ["AZURE_FORM_RECOGNIZER_ENDPOINT"]
key = os.environ["AZURE_FORM_RECOGNIZER_KEY"]

form_recognizer_client = FormRecognizerClient(
    endpoint=endpoint, credential=AzureKeyCredential(key)
)
async with form_recognizer_client:
    with open(path_to_sample_forms, "rb") as f:
        poller = await form_recognizer_client.begin_recognize_business_cards(business_card=f, locale="en-US")
    business_cards = await poller.result()

for idx, business_card in enumerate(business_cards):
    print("--------Recognizing business card #{}--------".format(idx+1))
    contact_names = business_card.fields.get("ContactNames")
    if contact_names:
        for contact_name in contact_names.value:
            print("Contact First Name: {} has confidence: {}".format(
                contact_name.value["FirstName"].value, contact_name.value["FirstName"].confidence
            ))
            print("Contact Last Name: {} has confidence: {}".format(
                contact_name.value["LastName"].value, contact_name.value["LastName"].confidence
            ))
    company_names = business_card.fields.get("CompanyNames")
    if company_names:
        for company_name in company_names.value:
            print("Company Name: {} has confidence: {}".format(company_name.value, company_name.confidence))
    departments = business_card.fields.get("Departments")
    if departments:
        for department in departments.value:
            print("Department: {} has confidence: {}".format(department.value, department.confidence))
    job_titles = business_card.fields.get("JobTitles")
    if job_titles:
        for job_title in job_titles.value:
            print("Job Title: {} has confidence: {}".format(job_title.value, job_title.confidence))
    emails = business_card.fields.get("Emails")
    if emails:
        for email in emails.value:
            print("Email: {} has confidence: {}".format(email.value, email.confidence))
    websites = business_card.fields.get("Websites")
    if websites:
        for website in websites.value:
            print("Website: {} has confidence: {}".format(website.value, website.confidence))
    addresses = business_card.fields.get("Addresses")
    if addresses:
        for address in addresses.value:
            print("Address: {} has confidence: {}".format(address.value, address.confidence))
    mobile_phones = business_card.fields.get("MobilePhones")
    if mobile_phones:
        for phone in mobile_phones.value:
            print("Mobile phone number: {} has confidence: {}".format(phone.value, phone.confidence))
    faxes = business_card.fields.get("Faxes")
    if faxes:
        for fax in faxes.value:
            print("Fax number: {} has confidence: {}".format(fax.value, fax.confidence))
    work_phones = business_card.fields.get("WorkPhones")
    if work_phones:
        for work_phone in work_phones.value:
            print("Work phone number: {} has confidence: {}".format(work_phone.value, work_phone.confidence))
    other_phones = business_card.fields.get("OtherPhones")
    if other_phones:
        for other_phone in other_phones.value:
            print("Other phone number: {} has confidence: {}".format(other_phone.value, other_phone.confidence))

async begin_recognize_business_cards_from_url(business_card_url: str, **kwargs: Any) → azure.core.polling._async_poller.AsyncLROPoller[List[azure.ai.formrecognizer._models.RecognizedForm]][source]¶

Extract field text and semantic values from a given business card. The input document must be the location (URL) of the card to be analyzed.

See fields found on a business card here: https://aka.ms/formrecognizer/businesscardfields

Parameters

business_card_url (str) – The URL of the business card to analyze. The input must be a valid, encoded URL of one of the supported formats: JPEG, PNG, PDF, TIFF, or BMP.

Keyword Arguments

locale (str) – Locale of the business card. Supported locales include: en-US, en-AU, en-CA, en-GB, and en-IN.
include_field_elements (bool) – Whether or not to include all lines per page and field elements such as lines, words, and selection marks for each form field.
pages (list[str]) – Custom page numbers for multi-page documents(PDF/TIFF). Input the page numbers and/or ranges of pages you want to get in the result. For a range of pages, use a hyphen, like pages=[“1-3”, “5-6”]. Separate each page number or range with a comma.
continuation_token (str) – A continuation token to restart a poller from a saved state.

Returns

An instance of an AsyncLROPoller. Call result() on the poller object to return a list[RecognizedForm].

Return type

Raises

New in version v2.1: The begin_recognize_business_cards_from_url client method

async begin_recognize_content(form: Union[bytes, IO[bytes]], **kwargs: Any) → azure.core.polling._async_poller.AsyncLROPoller[List[azure.ai.formrecognizer._models.FormPage]][source]¶

Extract text and content/layout information from a given document. The input document must be of one of the supported content types - ‘application/pdf’, ‘image/jpeg’, ‘image/png’, ‘image/tiff’ or ‘image/bmp’.

Parameters

form (bytes or IO[bytes]) – JPEG, PNG, PDF, TIFF, or BMP type file stream or bytes.

Keyword Arguments

pages (list[str]) – Custom page numbers for multi-page documents(PDF/TIFF). Input the page numbers and/or ranges of pages you want to get in the result. For a range of pages, use a hyphen, like pages=[“1-3”, “5-6”]. Separate each page number or range with a comma.
language (str) – The BCP-47 language code of the text in the document. See supported language codes here: https://docs.microsoft.com/azure/cognitive-services/form-recognizer/language-support. Content supports auto language identification and multilanguage documents, so only provide a language code if you would like to force the documented to be processed as that specific language.
reading_order (str) – Reading order algorithm to sort the text lines returned. Supported reading orders include: basic (default), natural. Set ‘basic’ to sort lines left to right and top to bottom, although in some cases proximity is treated with higher priority. Set ‘natural’ to sort lines by using positional information to keep nearby lines together.
content_type (str or FormContentType) – Content-type of the body sent to the API. Content-type is auto-detected, but can be overridden by passing this keyword argument. For options, see FormContentType.
continuation_token (str) – A continuation token to restart a poller from a saved state.

Returns

An instance of an AsyncLROPoller. Call result() on the poller object to return a list[FormPage].

Return type

AsyncLROPoller[list[FormPage]]

Raises

New in version v2.1: The pages, language and reading_order keyword arguments and support for image/bmp content

Example:

Recognize text and content/layout information from a form.¶

from azure.core.credentials import AzureKeyCredential
from azure.ai.formrecognizer.aio import FormRecognizerClient

endpoint = os.environ["AZURE_FORM_RECOGNIZER_ENDPOINT"]
key = os.environ["AZURE_FORM_RECOGNIZER_KEY"]

async with FormRecognizerClient(
    endpoint=endpoint, credential=AzureKeyCredential(key)
) as form_recognizer_client:

    with open(path_to_sample_forms, "rb") as f:
        poller = await form_recognizer_client.begin_recognize_content(form=f)

    form_pages = await poller.result()

    for idx, content in enumerate(form_pages):
        print("----Recognizing content from page #{}----".format(idx+1))
        print("Page has width: {} and height: {}, measured with unit: {}".format(
            content.width,
            content.height,
            content.unit
        ))
        for table_idx, table in enumerate(content.tables):
            print("Table # {} has {} rows and {} columns".format(table_idx, table.row_count, table.column_count))
            print("Table # {} location on page: {}".format(table_idx, format_bounding_box(table.bounding_box)))
            for cell in table.cells:
                print("...Cell[{}][{}] has text '{}' within bounding box '{}'".format(
                    cell.row_index,
                    cell.column_index,
                    cell.text,
                    format_bounding_box(cell.bounding_box)
                ))

        for line_idx, line in enumerate(content.lines):
            print("Line # {} has word count '{}' and text '{}' within bounding box '{}'".format(
                line_idx,
                len(line.words),
                line.text,
                format_bounding_box(line.bounding_box)
            ))
            if line.appearance:
                if line.appearance.style_name == "handwriting" and line.appearance.style_confidence > 0.8:
                    print("Text line '{}' is handwritten and might be a signature.".format(line.text))
            for word in line.words:
                print("...Word '{}' has a confidence of {}".format(word.text, word.confidence))

        for selection_mark in content.selection_marks:
            print("Selection mark is '{}' within bounding box '{}' and has a confidence of {}".format(
                selection_mark.state,
                format_bounding_box(selection_mark.bounding_box),
                selection_mark.confidence
            ))
        print("----------------------------------------")

async begin_recognize_content_from_url(form_url: str, **kwargs: Any) → azure.core.polling._async_poller.AsyncLROPoller[List[azure.ai.formrecognizer._models.FormPage]][source]¶

Extract text and layout information from a given document. The input document must be the location (URL) of the document to be analyzed.

Parameters

form_url (str) – The URL of the form to analyze. The input must be a valid, encoded URL of one of the supported formats: JPEG, PNG, PDF, TIFF, or BMP.

Keyword Arguments

pages (list[str]) – Custom page numbers for multi-page documents(PDF/TIFF). Input the page numbers and/or ranges of pages you want to get in the result. For a range of pages, use a hyphen, like pages=[“1-3”, “5-6”]. Separate each page number or range with a comma.
language (str) – The BCP-47 language code of the text in the document. See supported language codes here: https://docs.microsoft.com/azure/cognitive-services/form-recognizer/language-support. Content supports auto language identification and multilanguage documents, so only provide a language code if you would like to force the documented to be processed as that specific language.
reading_order (str) – Reading order algorithm to sort the text lines returned. Supported reading orders include: basic (default), natural. Set ‘basic’ to sort lines left to right and top to bottom, although in some cases proximity is treated with higher priority. Set ‘natural’ to sort lines by using positional information to keep nearby lines together.
continuation_token (str) – A continuation token to restart a poller from a saved state.

Returns

An instance of an AsyncLROPoller. Call result() on the poller object to return a list[FormPage].

Return type

AsyncLROPoller[list[FormPage]]

Raises

New in version v2.1: The pages, language and reading_order keyword arguments and support for image/bmp content

async begin_recognize_custom_forms(model_id: str, form: Union[bytes, IO[bytes]], **kwargs: Any) → azure.core.polling._async_poller.AsyncLROPoller[List[azure.ai.formrecognizer._models.RecognizedForm]][source]¶

Analyze a custom form with a model trained with or without labels. The form to analyze should be of the same type as the forms that were used to train the model. The input document must be of one of the supported content types - ‘application/pdf’, ‘image/jpeg’, ‘image/png’, ‘image/tiff’, or ‘image/bmp’.

Parameters

model_id (str) – Custom model identifier.
form (bytes or IO[bytes]) – JPEG, PNG, PDF, TIFF, or BMP type file stream or bytes.

Keyword Arguments

include_field_elements (bool) – Whether or not to include all lines per page and field elements such as lines, words, and selection marks for each form field.
content_type (str or FormContentType) – Content-type of the body sent to the API. Content-type is auto-detected, but can be overridden by passing this keyword argument. For options, see FormContentType.
pages (list[str]) – Custom page numbers for multi-page documents(PDF/TIFF). Input the page numbers and/or ranges of pages you want to get in the result. For a range of pages, use a hyphen, like pages=[“1-3”, “5-6”]. Separate each page number or range with a comma.
continuation_token (str) – A continuation token to restart a poller from a saved state.

Returns

An instance of an AsyncLROPoller. Call result() on the poller object to return a list[RecognizedForm].

Return type

Raises

Example:

Recognize fields and values from a custom form.¶

from azure.core.credentials import AzureKeyCredential
from azure.ai.formrecognizer.aio import FormRecognizerClient

endpoint = os.environ["AZURE_FORM_RECOGNIZER_ENDPOINT"]
key = os.environ["AZURE_FORM_RECOGNIZER_KEY"]
model_id = os.getenv("CUSTOM_TRAINED_MODEL_ID", custom_model_id)

async with FormRecognizerClient(
    endpoint=endpoint, credential=AzureKeyCredential(key)
) as form_recognizer_client:

    # Make sure your form's type is included in the list of form types the custom model can recognize
    with open(path_to_sample_forms, "rb") as f:
        poller = await form_recognizer_client.begin_recognize_custom_forms(
            model_id=model_id, form=f, include_field_elements=True
        )
    forms = await poller.result()

    for idx, form in enumerate(forms):
        print("--------Recognizing Form #{}--------".format(idx+1))
        print("Form has type {}".format(form.form_type))
        print("Form has form type confidence {}".format(form.form_type_confidence))
        print("Form was analyzed with model with ID {}".format(form.model_id))
        for name, field in form.fields.items():
            # each field is of type FormField
            # label_data is populated if you are using a model trained without labels,
            # since the service needs to make predictions for labels if not explicitly given to it.
            if field.label_data:
                print("...Field '{}' has label '{}' with a confidence score of {}".format(
                    name,
                    field.label_data.text,
                    field.confidence
                ))

            print("...Label '{}' has value '{}' with a confidence score of {}".format(
                field.label_data.text if field.label_data else name, field.value, field.confidence
            ))

        # iterate over tables, lines, and selection marks on each page
        for page in form.pages:
            for i, table in enumerate(page.tables):
                print("\nTable {} on page {}".format(i + 1, table.page_number))
                for cell in table.cells:
                    print("...Cell[{}][{}] has text '{}' with confidence {}".format(
                        cell.row_index, cell.column_index, cell.text, cell.confidence
                    ))
            print("\nLines found on page {}".format(page.page_number))
            for line in page.lines:
                print("...Line '{}' is made up of the following words: ".format(line.text))
                for word in line.words:
                    print("......Word '{}' has a confidence of {}".format(
                        word.text,
                        word.confidence
                    ))
            if page.selection_marks:
                print("\nSelection marks found on page {}".format(page.page_number))
                for selection_mark in page.selection_marks:
                    print("......Selection mark is '{}' and has a confidence of {}".format(
                        selection_mark.state,
                        selection_mark.confidence
                    ))

        print("-----------------------------------")

async begin_recognize_custom_forms_from_url(model_id: str, form_url: str, **kwargs: Any) → azure.core.polling._async_poller.AsyncLROPoller[List[azure.ai.formrecognizer._models.RecognizedForm]][source]¶

Analyze a custom form with a model trained with or without labels. The form to analyze should be of the same type as the forms that were used to train the model. The input document must be the location (URL) of the document to be analyzed.

Parameters

model_id (str) – Custom model identifier.
form_url (str) – The URL of the form to analyze. The input must be a valid, encoded URL of one of the supported formats: JPEG, PNG, PDF, TIFF, or BMP.

Keyword Arguments

include_field_elements (bool) – Whether or not to include all lines per page and field elements such as lines, words, and selection marks for each form field.
pages (list[str]) – Custom page numbers for multi-page documents(PDF/TIFF). Input the page numbers and/or ranges of pages you want to get in the result. For a range of pages, use a hyphen, like pages=[“1-3”, “5-6”]. Separate each page number or range with a comma.
continuation_token (str) – A continuation token to restart a poller from a saved state.

Returns

An instance of an AsyncLROPoller. Call result() on the poller object to return a list[RecognizedForm].

Return type

Raises

async begin_recognize_identity_documents(identity_document: Union[bytes, IO[bytes]], **kwargs: Any) → azure.core.polling._async_poller.AsyncLROPoller[List[azure.ai.formrecognizer._models.RecognizedForm]][source]¶

Extract field text and semantic values from a given identity document. The input document must be of one of the supported content types - ‘application/pdf’, ‘image/jpeg’, ‘image/png’, ‘image/tiff’ or ‘image/bmp’.

See fields found on an identity document here: https://aka.ms/formrecognizer/iddocumentfields

Parameters

identity_document (bytes or IO[bytes]) – JPEG, PNG, PDF, TIFF, or BMP type file stream or bytes.

Keyword Arguments

include_field_elements (bool) – Whether or not to include all lines per page and field elements such as lines, words, and selection marks for each form field.
content_type (str or FormContentType) – Content-type of the body sent to the API. Content-type is auto-detected, but can be overridden by passing this keyword argument. For options, see FormContentType.
continuation_token (str) – A continuation token to restart a poller from a saved state.
pages (list[str]) – Custom page numbers for multi-page documents(PDF/TIFF). Input the page numbers and/or ranges of pages you want to get in the result. For a range of pages, use a hyphen, like pages=[“1-3”, “5-6”]. Separate each page number or range with a comma.

Returns

An instance of an AsyncLROPoller. Call result() on the poller object to return a list[RecognizedForm].

Return type

Raises

New in version v2.1: The begin_recognize_identity_documents client method

Example:

Recognize identity documents from a file.¶

from azure.core.credentials import AzureKeyCredential
from azure.ai.formrecognizer.aio import FormRecognizerClient

endpoint = os.environ["AZURE_FORM_RECOGNIZER_ENDPOINT"]
key = os.environ["AZURE_FORM_RECOGNIZER_KEY"]

async with FormRecognizerClient(
    endpoint=endpoint, credential=AzureKeyCredential(key)
) as form_recognizer_client:
    
    with open(path_to_sample_forms, "rb") as f:
        poller = await form_recognizer_client.begin_recognize_identity_documents(identity_document=f)
    
    id_documents = await poller.result()

    for idx, id_document in enumerate(id_documents):
        print("--------Recognizing ID document #{}--------".format(idx+1))
        first_name = id_document.fields.get("FirstName")
        if first_name:
            print("First Name: {} has confidence: {}".format(first_name.value, first_name.confidence))
        last_name = id_document.fields.get("LastName")
        if last_name:
            print("Last Name: {} has confidence: {}".format(last_name.value, last_name.confidence))
        document_number = id_document.fields.get("DocumentNumber")
        if document_number:
            print("Document Number: {} has confidence: {}".format(document_number.value, document_number.confidence))
        dob = id_document.fields.get("DateOfBirth")
        if dob:
            print("Date of Birth: {} has confidence: {}".format(dob.value, dob.confidence))
        doe = id_document.fields.get("DateOfExpiration")
        if doe:
            print("Date of Expiration: {} has confidence: {}".format(doe.value, doe.confidence))
        sex = id_document.fields.get("Sex")
        if sex:
            print("Sex: {} has confidence: {}".format(sex.value, sex.confidence))
        address = id_document.fields.get("Address")
        if address:
            print("Address: {} has confidence: {}".format(address.value, address.confidence))
        country_region = id_document.fields.get("CountryRegion")
        if country_region:
            print("Country/Region: {} has confidence: {}".format(country_region.value, country_region.confidence))
        region = id_document.fields.get("Region")
        if region:
            print("Region: {} has confidence: {}".format(region.value, region.confidence))

async begin_recognize_identity_documents_from_url(identity_document_url: str, **kwargs: Any) → azure.core.polling._async_poller.AsyncLROPoller[List[azure.ai.formrecognizer._models.RecognizedForm]][source]¶

Extract field text and semantic values from a given identity document. The input document must be the location (URL) of the identity document to be analyzed.

See fields found on an identity document here: https://aka.ms/formrecognizer/iddocumentfields

Parameters

identity_document_url (str) – The URL of the identity document to analyze. The input must be a valid, encoded URL of one of the supported formats: JPEG, PNG, PDF, TIFF, or BMP.

Keyword Arguments

include_field_elements (bool) – Whether or not to include all lines per page and field elements such as lines, words, and selection marks for each form field.
continuation_token (str) – A continuation token to restart a poller from a saved state.
pages (list[str]) – Custom page numbers for multi-page documents(PDF/TIFF). Input the page numbers and/or ranges of pages you want to get in the result. For a range of pages, use a hyphen, like pages=[“1-3”, “5-6”]. Separate each page number or range with a comma.

Returns

An instance of an AsyncLROPoller. Call result() on the poller object to return a list[RecognizedForm].

Return type

Raises

New in version v2.1: The begin_recognize_identity_documents_from_url client method

async begin_recognize_invoices(invoice: Union[bytes, IO[bytes]], **kwargs: Any) → azure.core.polling._async_poller.AsyncLROPoller[List[azure.ai.formrecognizer._models.RecognizedForm]][source]¶

Extract field text and semantic values from a given invoice. The input document must be of one of the supported content types - ‘application/pdf’, ‘image/jpeg’, ‘image/png’, ‘image/tiff’ or ‘image/bmp’.

See fields found on a invoice here: https://aka.ms/formrecognizer/invoicefields

Parameters

invoice (bytes or IO[bytes]) – JPEG, PNG, PDF, TIFF, or BMP type file stream or bytes.

Keyword Arguments

locale (str) – Locale of the invoice. Supported locales include: en-US
include_field_elements (bool) – Whether or not to include all lines per page and field elements such as lines, words, and selection marks for each form field.
content_type (str or FormContentType) – Content-type of the body sent to the API. Content-type is auto-detected, but can be overridden by passing this keyword argument. For options, see FormContentType.
pages (list[str]) – Custom page numbers for multi-page documents(PDF/TIFF). Input the page numbers and/or ranges of pages you want to get in the result. For a range of pages, use a hyphen, like pages=[“1-3”, “5-6”]. Separate each page number or range with a comma.
continuation_token (str) – A continuation token to restart a poller from a saved state.

Returns

An instance of an AsyncLROPoller. Call result() on the poller object to return a list[RecognizedForm].

Return type

Raises

New in version v2.1: The begin_recognize_invoices client method

Example:

Recognize invoices from a file.¶

from azure.core.credentials import AzureKeyCredential
from azure.ai.formrecognizer.aio import FormRecognizerClient

endpoint = os.environ["AZURE_FORM_RECOGNIZER_ENDPOINT"]
key = os.environ["AZURE_FORM_RECOGNIZER_KEY"]

async with FormRecognizerClient(
    endpoint=endpoint, credential=AzureKeyCredential(key)
) as form_recognizer_client:
    with open(path_to_sample_forms, "rb") as f:
        poller = await form_recognizer_client.begin_recognize_invoices(invoice=f, locale="en-US")
    invoices = await poller.result()

    for idx, invoice in enumerate(invoices):
        print("--------Recognizing invoice #{}--------".format(idx+1))
        vendor_name = invoice.fields.get("VendorName")
        if vendor_name:
            print("Vendor Name: {} has confidence: {}".format(vendor_name.value, vendor_name.confidence))
        vendor_address = invoice.fields.get("VendorAddress")
        if vendor_address:
            print("Vendor Address: {} has confidence: {}".format(vendor_address.value, vendor_address.confidence))
        vendor_address_recipient = invoice.fields.get("VendorAddressRecipient")
        if vendor_address_recipient:
            print("Vendor Address Recipient: {} has confidence: {}".format(vendor_address_recipient.value, vendor_address_recipient.confidence))
        customer_name = invoice.fields.get("CustomerName")
        if customer_name:
            print("Customer Name: {} has confidence: {}".format(customer_name.value, customer_name.confidence))
        customer_id = invoice.fields.get("CustomerId")
        if customer_id:
            print("Customer Id: {} has confidence: {}".format(customer_id.value, customer_id.confidence))
        customer_address = invoice.fields.get("CustomerAddress")
        if customer_address:
            print("Customer Address: {} has confidence: {}".format(customer_address.value, customer_address.confidence))
        customer_address_recipient = invoice.fields.get("CustomerAddressRecipient")
        if customer_address_recipient:
            print("Customer Address Recipient: {} has confidence: {}".format(customer_address_recipient.value, customer_address_recipient.confidence))
        invoice_id = invoice.fields.get("InvoiceId")
        if invoice_id:
            print("Invoice Id: {} has confidence: {}".format(invoice_id.value, invoice_id.confidence))
        invoice_date = invoice.fields.get("InvoiceDate")
        if invoice_date:
            print("Invoice Date: {} has confidence: {}".format(invoice_date.value, invoice_date.confidence))
        invoice_total = invoice.fields.get("InvoiceTotal")
        if invoice_total:
            print("Invoice Total: {} has confidence: {}".format(invoice_total.value, invoice_total.confidence))
        due_date = invoice.fields.get("DueDate")
        if due_date:
            print("Due Date: {} has confidence: {}".format(due_date.value, due_date.confidence))
        purchase_order = invoice.fields.get("PurchaseOrder")
        if purchase_order:
            print("Purchase Order: {} has confidence: {}".format(purchase_order.value, purchase_order.confidence))
        billing_address = invoice.fields.get("BillingAddress")
        if billing_address:
            print("Billing Address: {} has confidence: {}".format(billing_address.value, billing_address.confidence))
        billing_address_recipient = invoice.fields.get("BillingAddressRecipient")
        if billing_address_recipient:
            print("Billing Address Recipient: {} has confidence: {}".format(billing_address_recipient.value, billing_address_recipient.confidence))
        shipping_address = invoice.fields.get("ShippingAddress")
        if shipping_address:
            print("Shipping Address: {} has confidence: {}".format(shipping_address.value, shipping_address.confidence))
        shipping_address_recipient = invoice.fields.get("ShippingAddressRecipient")
        if shipping_address_recipient:
            print("Shipping Address Recipient: {} has confidence: {}".format(shipping_address_recipient.value, shipping_address_recipient.confidence))
        print("Invoice items:")
        for idx, item in enumerate(invoice.fields.get("Items").value):
            print("...Item #{}".format(idx+1))
            item_description = item.value.get("Description")
            if item_description:
                print("......Description: {} has confidence: {}".format(item_description.value, item_description.confidence))
            item_quantity = item.value.get("Quantity")
            if item_quantity:
                print("......Quantity: {} has confidence: {}".format(item_quantity.value, item_quantity.confidence))
            unit = item.value.get("Unit")
            if unit:
                print("......Unit: {} has confidence: {}".format(unit.value, unit.confidence))
            unit_price = item.value.get("UnitPrice")
            if unit_price:
                print("......Unit Price: {} has confidence: {}".format(unit_price.value, unit_price.confidence))
            product_code = item.value.get("ProductCode")
            if product_code:
                print("......Product Code: {} has confidence: {}".format(product_code.value, product_code.confidence))
            item_date = item.value.get("Date")
            if item_date:
                print("......Date: {} has confidence: {}".format(item_date.value, item_date.confidence))
            tax = item.value.get("Tax")
            if tax:
                print("......Tax: {} has confidence: {}".format(tax.value, tax.confidence))
            amount = item.value.get("Amount")
            if amount:
                print("......Amount: {} has confidence: {}".format(amount.value, amount.confidence))
        subtotal = invoice.fields.get("SubTotal")
        if subtotal:
            print("Subtotal: {} has confidence: {}".format(subtotal.value, subtotal.confidence))
        total_tax = invoice.fields.get("TotalTax")
        if total_tax:
            print("Total Tax: {} has confidence: {}".format(total_tax.value, total_tax.confidence))
        previous_unpaid_balance = invoice.fields.get("PreviousUnpaidBalance")
        if previous_unpaid_balance:
            print("Previous Unpaid Balance: {} has confidence: {}".format(previous_unpaid_balance.value, previous_unpaid_balance.confidence))
        amount_due = invoice.fields.get("AmountDue")
        if amount_due:
            print("Amount Due: {} has confidence: {}".format(amount_due.value, amount_due.confidence))
        service_start_date = invoice.fields.get("ServiceStartDate")
        if service_start_date:
            print("Service Start Date: {} has confidence: {}".format(service_start_date.value, service_start_date.confidence))
        service_end_date = invoice.fields.get("ServiceEndDate")
        if service_end_date:
            print("Service End Date: {} has confidence: {}".format(service_end_date.value, service_end_date.confidence))
        service_address = invoice.fields.get("ServiceAddress")
        if service_address:
            print("Service Address: {} has confidence: {}".format(service_address.value, service_address.confidence))
        service_address_recipient = invoice.fields.get("ServiceAddressRecipient")
        if service_address_recipient:
            print("Service Address Recipient: {} has confidence: {}".format(service_address_recipient.value, service_address_recipient.confidence))
        remittance_address = invoice.fields.get("RemittanceAddress")
        if remittance_address:
            print("Remittance Address: {} has confidence: {}".format(remittance_address.value, remittance_address.confidence))
        remittance_address_recipient = invoice.fields.get("RemittanceAddressRecipient")
        if remittance_address_recipient:
            print("Remittance Address Recipient: {} has confidence: {}".format(remittance_address_recipient.value, remittance_address_recipient.confidence))

async begin_recognize_invoices_from_url(invoice_url: str, **kwargs: Any) → azure.core.polling._async_poller.AsyncLROPoller[List[azure.ai.formrecognizer._models.RecognizedForm]][source]¶

Extract field text and semantic values from a given invoice. The input document must be the location (URL) of the invoice to be analyzed.

See fields found on a invoice card here: https://aka.ms/formrecognizer/invoicefields

Parameters

invoice_url (str) – The URL of the invoice to analyze. The input must be a valid, encoded URL of one of the supported formats: JPEG, PNG, PDF, TIFF, or BMP.

Keyword Arguments

locale (str) – Locale of the invoice. Supported locales include: en-US
include_field_elements (bool) – Whether or not to include all lines per page and field elements such as lines, words, and selection marks for each form field.
pages (list[str]) – Custom page numbers for multi-page documents(PDF/TIFF). Input the page numbers and/or ranges of pages you want to get in the result. For a range of pages, use a hyphen, like pages=[“1-3”, “5-6”]. Separate each page number or range with a comma.
continuation_token (str) – A continuation token to restart a poller from a saved state.

Returns

An instance of an AsyncLROPoller. Call result() on the poller object to return a list[RecognizedForm].

Return type

Raises

New in version v2.1: The begin_recognize_invoices_from_url client method

async begin_recognize_receipts(receipt: Union[bytes, IO[bytes]], **kwargs: Any) → azure.core.polling._async_poller.AsyncLROPoller[List[azure.ai.formrecognizer._models.RecognizedForm]][source]¶

Extract field text and semantic values from a given sales receipt. The input document must be of one of the supported content types - ‘application/pdf’, ‘image/jpeg’, ‘image/png’, ‘image/tiff’ or ‘image/bmp’.

See fields found on a receipt here: https://aka.ms/formrecognizer/receiptfields

Parameters

receipt (bytes or IO[bytes]) – JPEG, PNG, PDF, TIFF, or BMP type file stream or bytes.

Keyword Arguments

include_field_elements (bool) – Whether or not to include all lines per page and field elements such as lines, words, and selection marks for each form field.
content_type (str or FormContentType) – Content-type of the body sent to the API. Content-type is auto-detected, but can be overridden by passing this keyword argument. For options, see FormContentType.
continuation_token (str) – A continuation token to restart a poller from a saved state.
locale (str) – Locale of the receipt. Supported locales include: en-US, en-AU, en-CA, en-GB, and en-IN.
pages (list[str]) – Custom page numbers for multi-page documents(PDF/TIFF). Input the page numbers and/or ranges of pages you want to get in the result. For a range of pages, use a hyphen, like pages=[“1-3”, “5-6”]. Separate each page number or range with a comma.

Returns

An instance of an AsyncLROPoller. Call result() on the poller object to return a list[RecognizedForm].

Return type

Raises

New in version v2.1: The locale keyword argument and support for image/bmp content

Example:

Recognize sales receipt fields.¶

from azure.core.credentials import AzureKeyCredential
from azure.ai.formrecognizer.aio import FormRecognizerClient

endpoint = os.environ["AZURE_FORM_RECOGNIZER_ENDPOINT"]
key = os.environ["AZURE_FORM_RECOGNIZER_KEY"]

async with FormRecognizerClient(
    endpoint=endpoint, credential=AzureKeyCredential(key)
) as form_recognizer_client:

    with open(path_to_sample_forms, "rb") as f:
        poller = await form_recognizer_client.begin_recognize_receipts(receipt=f, locale="en-US")

    receipts = await poller.result()

    for idx, receipt in enumerate(receipts):
        print("--------Recognizing receipt #{}--------".format(idx+1))
        receipt_type = receipt.fields.get("ReceiptType")
        if receipt_type:
            print("Receipt Type: {} has confidence: {}".format(receipt_type.value, receipt_type.confidence))
        merchant_name = receipt.fields.get("MerchantName")
        if merchant_name:
            print("Merchant Name: {} has confidence: {}".format(merchant_name.value, merchant_name.confidence))
        transaction_date = receipt.fields.get("TransactionDate")
        if transaction_date:
            print("Transaction Date: {} has confidence: {}".format(transaction_date.value, transaction_date.confidence))
        if receipt.fields.get("Items"):
            print("Receipt items:")
            for idx, item in enumerate(receipt.fields.get("Items").value):
                print("...Item #{}".format(idx+1))
                item_name = item.value.get("Name")
                if item_name:
                    print("......Item Name: {} has confidence: {}".format(item_name.value, item_name.confidence))
                item_quantity = item.value.get("Quantity")
                if item_quantity:
                    print("......Item Quantity: {} has confidence: {}".format(item_quantity.value, item_quantity.confidence))
                item_price = item.value.get("Price")
                if item_price:
                    print("......Individual Item Price: {} has confidence: {}".format(item_price.value, item_price.confidence))
                item_total_price = item.value.get("TotalPrice")
                if item_total_price:
                    print("......Total Item Price: {} has confidence: {}".format(item_total_price.value, item_total_price.confidence))
        subtotal = receipt.fields.get("Subtotal")
        if subtotal:
            print("Subtotal: {} has confidence: {}".format(subtotal.value, subtotal.confidence))
        tax = receipt.fields.get("Tax")
        if tax:
            print("Tax: {} has confidence: {}".format(tax.value, tax.confidence))
        tip = receipt.fields.get("Tip")
        if tip:
            print("Tip: {} has confidence: {}".format(tip.value, tip.confidence))
        total = receipt.fields.get("Total")
        if total:
            print("Total: {} has confidence: {}".format(total.value, total.confidence))
        print("--------------------------------------")

async begin_recognize_receipts_from_url(receipt_url: str, **kwargs: Any) → azure.core.polling._async_poller.AsyncLROPoller[List[azure.ai.formrecognizer._models.RecognizedForm]][source]¶

Extract field text and semantic values from a given sales receipt. The input document must be the location (URL) of the receipt to be analyzed.

See fields found on a receipt here: https://aka.ms/formrecognizer/receiptfields

Parameters

receipt_url (str) – The URL of the receipt to analyze. The input must be a valid, encoded URL of one of the supported formats: JPEG, PNG, PDF, TIFF, or BMP.

Keyword Arguments

include_field_elements (bool) – Whether or not to include all lines per page and field elements such as lines, words, and selection marks for each form field.
continuation_token (str) – A continuation token to restart a poller from a saved state.
locale (str) – Locale of the receipt. Supported locales include: en-US, en-AU, en-CA, en-GB, and en-IN.
pages (list[str]) – Custom page numbers for multi-page documents(PDF/TIFF). Input the page numbers and/or ranges of pages you want to get in the result. For a range of pages, use a hyphen, like pages=[“1-3”, “5-6”]. Separate each page number or range with a comma.

Returns

An instance of an AsyncLROPoller. Call result() on the poller object to return a list[RecognizedForm].

Return type

Raises

AsyncLROPoller[CustomFormModelInfo]

New in version v2.1: The locale keyword argument and support for image/bmp content

Example:

Recognize sales receipt fields from a URL.¶

from azure.core.credentials import AzureKeyCredential
from azure.ai.formrecognizer.aio import FormRecognizerClient

endpoint = os.environ["AZURE_FORM_RECOGNIZER_ENDPOINT"]
key = os.environ["AZURE_FORM_RECOGNIZER_KEY"]

async with FormRecognizerClient(
    endpoint=endpoint, credential=AzureKeyCredential(key)
) as form_recognizer_client:
    url = "https://raw.githubusercontent.com/Azure/azure-sdk-for-python/main/sdk/formrecognizer/azure-ai-formrecognizer/tests/sample_forms/receipt/contoso-receipt.png"
    poller = await form_recognizer_client.begin_recognize_receipts_from_url(receipt_url=url)
    receipts = await poller.result()

    for idx, receipt in enumerate(receipts):
        print("--------Recognizing receipt #{}--------".format(idx+1))
        receipt_type = receipt.fields.get("ReceiptType")
        if receipt_type:
            print("Receipt Type: {} has confidence: {}".format(receipt_type.value, receipt_type.confidence))
        merchant_name = receipt.fields.get("MerchantName")
        if merchant_name:
            print("Merchant Name: {} has confidence: {}".format(merchant_name.value, merchant_name.confidence))
        transaction_date = receipt.fields.get("TransactionDate")
        if transaction_date:
            print("Transaction Date: {} has confidence: {}".format(transaction_date.value, transaction_date.confidence))
        if receipt.fields.get("Items"):
            print("Receipt items:")
            for idx, item in enumerate(receipt.fields.get("Items").value):
                print("...Item #{}".format(idx+1))
                item_name = item.value.get("Name")
                if item_name:
                    print("......Item Name: {} has confidence: {}".format(item_name.value, item_name.confidence))
                item_quantity = item.value.get("Quantity")
                if item_quantity:
                    print("......Item Quantity: {} has confidence: {}".format(item_quantity.value, item_quantity.confidence))
                item_price = item.value.get("Price")
                if item_price:
                    print("......Individual Item Price: {} has confidence: {}".format(item_price.value, item_price.confidence))
                item_total_price = item.value.get("TotalPrice")
                if item_total_price:
                    print("......Total Item Price: {} has confidence: {}".format(item_total_price.value, item_total_price.confidence))
        subtotal = receipt.fields.get("Subtotal")
        if subtotal:
            print("Subtotal: {} has confidence: {}".format(subtotal.value, subtotal.confidence))
        tax = receipt.fields.get("Tax")
        if tax:
            print("Tax: {} has confidence: {}".format(tax.value, tax.confidence))
        tip = receipt.fields.get("Tip")
        if tip:
            print("Tip: {} has confidence: {}".format(tip.value, tip.confidence))
        total = receipt.fields.get("Total")
        if total:
            print("Total: {} has confidence: {}".format(total.value, total.confidence))
        print("--------------------------------------")

async close() → None [source]¶: Close the FormRecognizerClient session.

class azure.ai.formrecognizer.aio.FormTrainingClient(endpoint: str, credential: Union[azure.core.credentials.AzureKeyCredential, azure.core.credentials_async.AsyncTokenCredential], **kwargs: Any)[source]¶

FormTrainingClient is the Form Recognizer interface to use for creating and managing custom models. It provides methods for training models on the forms you provide, as well as methods for viewing and deleting models, accessing account properties, copying models to another Form Recognizer resource, and composing models from a collection of existing models trained with labels.

Note

FormTrainingClient should be used with API versions <=v2.1. To use API versions 2022-08-31 and up, instantiate a DocumentModelAdministrationClient.

Parameters

endpoint (str) – Supported Cognitive Services endpoints (protocol and hostname, for example: https://westus2.api.cognitive.microsoft.com).
credential (AzureKeyCredential or AsyncTokenCredential) – Credentials needed for the client to connect to Azure. This is an instance of AzureKeyCredential if using an API key or a token credential from azure.identity.

Keyword Arguments

api_version (str or FormRecognizerApiVersion) – The API version of the service to use for requests. It defaults to API version v2.1. Setting to an older version may result in reduced feature compatibility. To use the latest supported API version and features, instantiate a DocumentModelAdministrationClient instead.

Example:

Creating the FormTrainingClient with an endpoint and API key.¶

from azure.core.credentials import AzureKeyCredential
from azure.ai.formrecognizer.aio import FormTrainingClient
endpoint = os.environ["AZURE_FORM_RECOGNIZER_ENDPOINT"]
key = os.environ["AZURE_FORM_RECOGNIZER_KEY"]

form_training_client = FormTrainingClient(endpoint, AzureKeyCredential(key))

Creating the FormTrainingClient with a token credential.¶

"""DefaultAzureCredential will use the values from these environment
variables: AZURE_CLIENT_ID, AZURE_TENANT_ID, AZURE_CLIENT_SECRET
"""
from azure.ai.formrecognizer.aio import FormTrainingClient
from azure.identity.aio import DefaultAzureCredential

endpoint = os.environ["AZURE_FORM_RECOGNIZER_ENDPOINT"]
credential = DefaultAzureCredential()

form_training_client = FormTrainingClient(endpoint, credential)

async begin_copy_model(model_id: str, target: Dict[str, Union[str, int]], **kwargs: Any) → azure.core.polling._async_poller.AsyncLROPoller[azure.ai.formrecognizer._models.CustomFormModelInfo][source]¶

Copy a custom model stored in this resource (the source) to the user specified target Form Recognizer resource. This should be called with the source Form Recognizer resource (with the model that is intended to be copied). The target parameter should be supplied from the target resource’s output from calling the get_copy_authorization() method.

Parameters

model_id (str) – Model identifier of the model to copy to target resource.
Union[str, int]] target (Dict[str,) – The copy authorization generated from the target resource’s call to get_copy_authorization().

Keyword Arguments

continuation_token (str) – A continuation token to restart a poller from a saved state.

Returns

An instance of an AsyncLROPoller. Call result() on the poller object to return a CustomFormModelInfo.

Return type

Raises

AsyncLROPoller[CustomFormModel]

Example:

Copy a model from the source resource to the target resource¶

source_client = FormTrainingClient(endpoint=source_endpoint, credential=AzureKeyCredential(source_key))

async with source_client:
    poller = await source_client.begin_copy_model(
        model_id=source_model_id,
        target=target  # output from target client's call to get_copy_authorization()
    )
    copied_over_model = await poller.result()

    print("Model ID: {}".format(copied_over_model.model_id))
    print("Status: {}".format(copied_over_model.status))

async begin_create_composed_model(model_ids: List[str], **kwargs: Any) → azure.core.polling._async_poller.AsyncLROPoller[azure.ai.formrecognizer._models.CustomFormModel][source]¶

Creates a composed model from a collection of existing models that were trained with labels.

A composed model allows multiple models to be called with a single model ID. When a document is submitted to be analyzed with a composed model ID, a classification step is first performed to route it to the correct custom model.

Parameters

model_ids (list[str]) – List of model IDs to use in the composed model.

Keyword Arguments

model_name (str) – An optional, user-defined name to associate with your model.
continuation_token (str) – A continuation token to restart a poller from a saved state.

Returns

An instance of an AsyncLROPoller. Call result() on the poller object to return a CustomFormModel.

Return type

Raises

AsyncLROPoller[CustomFormModel]

New in version v2.1: The begin_create_composed_model client method

Example:

Create a composed model¶

from azure.core.credentials import AzureKeyCredential
from azure.ai.formrecognizer.aio import FormTrainingClient

endpoint = os.environ["AZURE_FORM_RECOGNIZER_ENDPOINT"]
key = os.environ["AZURE_FORM_RECOGNIZER_KEY"]
po_supplies = os.environ['PURCHASE_ORDER_OFFICE_SUPPLIES_SAS_URL_V2']
po_equipment = os.environ['PURCHASE_ORDER_OFFICE_EQUIPMENT_SAS_URL_V2']
po_furniture = os.environ['PURCHASE_ORDER_OFFICE_FURNITURE_SAS_URL_V2']
po_cleaning_supplies = os.environ['PURCHASE_ORDER_OFFICE_CLEANING_SUPPLIES_SAS_URL_V2']

form_training_client = FormTrainingClient(endpoint=endpoint, credential=AzureKeyCredential(key))
async with form_training_client:
    supplies_poller = await form_training_client.begin_training(
        po_supplies, use_training_labels=True, model_name="Purchase order - Office supplies"
    )
    equipment_poller = await form_training_client.begin_training(
        po_equipment, use_training_labels=True, model_name="Purchase order - Office Equipment"
    )
    furniture_poller = await form_training_client.begin_training(
        po_furniture, use_training_labels=True, model_name="Purchase order - Furniture"
    )
    cleaning_supplies_poller = await form_training_client.begin_training(
        po_cleaning_supplies, use_training_labels=True, model_name="Purchase order - Cleaning Supplies"
    )
    supplies_model = await supplies_poller.result()
    equipment_model = await equipment_poller.result()
    furniture_model = await furniture_poller.result()
    cleaning_supplies_model = await cleaning_supplies_poller.result()

    models_trained_with_labels = [
        supplies_model.model_id,
        equipment_model.model_id,
        furniture_model.model_id,
        cleaning_supplies_model.model_id
    ]

    poller = await form_training_client.begin_create_composed_model(
        models_trained_with_labels, model_name="Office Supplies Composed Model"
    )
    model = await poller.result()

print("Office Supplies Composed Model Info:")
print("Model ID: {}".format(model.model_id))
print("Model name: {}".format(model.model_name))
print("Is this a composed model?: {}".format(model.properties.is_composed_model))
print("Status: {}".format(model.status))
print("Composed model creation started on: {}".format(model.training_started_on))
print("Creation completed on: {}".format(model.training_completed_on))

async begin_training(training_files_url: str, use_training_labels: bool, **kwargs: Any) → azure.core.polling._async_poller.AsyncLROPoller[azure.ai.formrecognizer._models.CustomFormModel][source]¶

Create and train a custom model. The request must include a training_files_url parameter that is an externally accessible Azure storage blob container URI (preferably a Shared Access Signature URI). Note that a container URI (without SAS) is accepted only when the container is public or has a managed identity configured, see more about configuring managed identities to work with Form Recognizer here: https://docs.microsoft.com/azure/applied-ai-services/form-recognizer/managed-identities. Models are trained using documents that are of the following content type - ‘application/pdf’, ‘image/jpeg’, ‘image/png’, ‘image/tiff’, or ‘image/bmp’. Other types of content in the container is ignored.

Parameters

training_files_url (str) – An Azure Storage blob container’s SAS URI. A container URI (without SAS) can be used if the container is public or has a managed identity configured. For more information on setting up a training data set, see: https://aka.ms/azsdk/formrecognizer/buildtrainingset.
use_training_labels (bool) – Whether to train with labels or not. Corresponding labeled files must exist in the blob container if set to True.

Keyword Arguments

prefix (str) – A case-sensitive prefix string to filter documents in the source path for training. For example, when using an Azure storage blob URI, use the prefix to restrict sub folders for training.
include_subfolders (bool) – A flag to indicate if subfolders within the set of prefix folders will also need to be included when searching for content to be preprocessed. Not supported if training with labels.
model_name (str) – An optional, user-defined name to associate with your model.
continuation_token (str) – A continuation token to restart a poller from a saved state.

Returns

An instance of an AsyncLROPoller. Call result() on the poller object to return a CustomFormModel.

Return type

Raises

HttpResponseError – Note that if the training fails, the exception is raised, but a model with an “invalid” status is still created. You can delete this model by calling delete_model()

New in version v2.1: The model_name keyword argument

Example:

Training a model (without labels) with your custom forms.¶

from azure.ai.formrecognizer.aio import FormTrainingClient
from azure.core.credentials import AzureKeyCredential

endpoint = os.environ["AZURE_FORM_RECOGNIZER_ENDPOINT"]
key = os.environ["AZURE_FORM_RECOGNIZER_KEY"]
container_sas_url = os.environ["CONTAINER_SAS_URL_V2"]

async with FormTrainingClient(
    endpoint, AzureKeyCredential(key)
) as form_training_client:

    poller = await form_training_client.begin_training(container_sas_url, use_training_labels=False)
    model = await poller.result()

    # Custom model information
    print("Model ID: {}".format(model.model_id))
    print("Status: {}".format(model.status))
    print("Model name: {}".format(model.model_name))
    print("Training started on: {}".format(model.training_started_on))
    print("Training completed on: {}".format(model.training_completed_on))

    print("Recognized fields:")
    # Looping through the submodels, which contains the fields they were trained on
    for submodel in model.submodels:
        print("...The submodel has form type '{}'".format(submodel.form_type))
        for name, field in submodel.fields.items():
            print("...The model found field '{}' to have label '{}'".format(
                name, field.label
            ))

async close() → None [source]¶: Close the FormTrainingClient session.

async delete_model(model_id: str, **kwargs: Any) → None [source]¶

Mark model for deletion. Model artifacts will be permanently removed within a predetermined period.

Parameters: model_id (str) – Model identifier.
Returns: None
Return type: None
Raises: HttpResponseError or ResourceNotFoundError –

Example:

Delete a custom model.¶

await form_training_client.delete_model(model_id=custom_model.model_id)

try:
    await form_training_client.get_custom_model(model_id=custom_model.model_id)
except ResourceNotFoundError:
    print("Successfully deleted model with id {}".format(custom_model.model_id))

async get_account_properties(**kwargs: Any) → azure.ai.formrecognizer._models.AccountProperties[source]¶

Get information about the models on the form recognizer account.

Returns: Summary of models on account - custom model count, custom model limit.
Return type: AccountProperties
Raises: HttpResponseError –

Example:

Get properties for the form recognizer account.¶

async with FormTrainingClient(
    endpoint=endpoint, credential=AzureKeyCredential(key)
) as form_training_client:
    # First, we see how many custom models we have, and what our limit is
    account_properties = await form_training_client.get_account_properties()
    print("Our account has {} custom models, and we can have at most {} custom models\n".format(
        account_properties.custom_model_count, account_properties.custom_model_limit
    ))

async get_copy_authorization(resource_id: str, resource_region: str, **kwargs: Any) → Dict[str, Union[str, int]][source]¶

Generate authorization for copying a custom model into the target Form Recognizer resource. This should be called by the target resource (where the model will be copied to) and the output can be passed as the target parameter into begin_copy_model().

Parameters

resource_id (str) – Azure Resource Id of the target Form Recognizer resource where the model will be copied to.
resource_region (str) – Location of the target Form Recognizer resource. A valid Azure region name supported by Cognitive Services. For example, ‘westus’, ‘eastus’ etc. See https://azure.microsoft.com/global-infrastructure/services/?products=cognitive-services for the regional availability of Cognitive Services.

Returns

A dictionary with values for the copy authorization - “modelId”, “accessToken”, “resourceId”, “resourceRegion”, and “expirationDateTimeTicks”.

Return type

Dict[str, Union[str, int]]

Raises