本文属于机器翻译版本。若本译文内容与英语原文存在差异,则一律以英文原文为准。
以下程序是在 Python 程序 Amazon Kendra 中使用的示例。此程序执行以下操作:
-
使用CreateIndex操作创建新索引。
-
等待索引创建完成。它使用该DescribeIndex操作来监视索引的状态。
-
一旦索引处于活动状态,它就会使用CreateDataSource操作创建数据源。
-
等待数据来源创建完成。它使用该DescribeDataSource操作来监视数据源的状态。
-
当数据源处于活动状态时,它会使用StartDataSourceSyncJob操作将索引与数据源的内容同步。
import boto3
from botocore.exceptions import ClientError
import pprint
import time
kendra = boto3.client("kendra")
print("Create an index.")
# Provide a name for the index
index_name = "python-getting-started-index"
# Provide an optional decription for the index
description = "Getting started index"
# Provide the IAM role ARN required for indexes
index_role_arn = "arn:aws:iam::${accountId}:role/KendraRoleForGettingStartedIndex"
try:
index_response = kendra.create_index(
Description = description,
Name = index_name,
RoleArn = index_role_arn
)
pprint.pprint(index_response)
index_id = index_response["Id"]
print("Wait for Amazon Kendra to create the index.")
while True:
# Get the details of the index, such as the status
index_description = kendra.describe_index(
Id = index_id
)
# When status is not CREATING quit.
status = index_description["Status"]
print(" Creating index. Status: "+status)
time.sleep(60)
if status != "CREATING":
break
print("Create an S3 data source.")
# Provide a name for the data source
data_source_name = "python-getting-started-data-source"
# Provide an optional description for the data source
data_source_description = "Getting started data source."
# Provide the IAM role ARN required for data sources
data_source_role_arn = "arn:aws:iam::${accountId}:role/KendraRoleForGettingStartedDataSource"
# Provide the data source connection information
S3_bucket_name = "S3-bucket-name"
data_source_type = "S3"
# Configure the data source
configuration = {"S3Configuration":
{
"BucketName": S3_bucket_name
}
}
"""
If you connect to your data source using a template schema,
configure the template schema
configuration = {"TemplateConfiguration":
{
"Template": {JSON schema}
}
}
"""
data_source_response = kendra.create_data_source(
Name = data_source_name,
Description = data_source_name,
RoleArn = data_source_role_arn,
Type = data_source_type,
Configuration = configuration,
IndexId = index_id
)
pprint.pprint(data_source_response)
data_source_id = data_source_response["Id"]
print("Wait for Amazon Kendra to create the data source.")
while True:
# Get the details of the data source, such as the status
data_source_description = kendra.describe_data_source(
Id = data_source_id,
IndexId = index_id
)
# If status is not CREATING, then quit
status = data_source_description["Status"]
print(" Creating data source. Status: "+status)
time.sleep(60)
if status != "CREATING":
break
print("Synchronize the data source.")
sync_response = kendra.start_data_source_sync_job(
Id = data_source_id,
IndexId = index_id
)
pprint.pprint(sync_response)
print("Wait for the data source to sync with the index.")
while True:
jobs = kendra.list_data_source_sync_jobs(
Id = data_source_id,
IndexId = index_id
)
# For this example, there should be one job
status = jobs["History"][0]["Status"]
print(" Syncing data source. Status: "+status)
if status != "SYNCING":
break
time.sleep(60)
except ClientError as e:
print("%s" % e)
print("Program ends.")