本文属于机器翻译版本。若本译文内容与英语原文存在差异,则一律以英文原文为准。
使用 Step Functions 创建和管理亚马逊 SageMaker 任务
学习如何使用 Step Functions 在上创建和管理作业 SageMaker。本页列出了支持的 SageMaker API操作,并提供了创建转 SageMaker 换、训练、标注和处理作业的示例Task
状态。
要了解如何与集成 AWS Step Functions 中的服务,参见集成 服务和。在 Step Functions API 中向服务传递参数
优化 SageMaker 集成的关键特性
-
支持运行作业 (.sync) 集成模式。
-
没有针对请求响应集成模式的优化。
-
不支持等待带有任务令牌的回调集成模式。
支持的 SageMaker APIs
-
-
CreateLabelingJob
-支持.sync
集成模式。 -
CreateProcessingJob
-支持.sync
集成模式。 -
CreateTrainingJob
-支持.sync
集成模式。 -
CreateTransformJob
-支持.sync
集成模式。注意
AWS Step Functions 不会自动为创建策略
CreateTransformJob
。您必须将内联策略附加到创建的角色。想要查询更多的信息, 请参阅此示例IAM策略:CreateTrainingJob. -
SageMaker 转换 Job 示例
以下内容包括创建 Amazon SageMaker 转换任务的Task
状态,其中指定了DataSource
和的 Amazon S3 位置TransformOutput
。
{
"SageMaker CreateTransformJob": {
"Type": "Task",
"Resource": "arn:aws:states:::sagemaker:createTransformJob.sync",
"Parameters": {
"ModelName": "SageMakerCreateTransformJobModel-9iFBKsYti9vr",
"TransformInput": {
"CompressionType": "None",
"ContentType": "text/csv",
"DataSource": {
"S3DataSource": {
"S3DataType": "S3Prefix",
"S3Uri": "s3://amzn-s3-demo-source-bucket1/TransformJobDataInput.txt"
}
}
},
"TransformOutput": {
"S3OutputPath": "s3://amzn-s3-demo-source-bucket1/TransformJobOutputPath"
},
"TransformResources": {
"InstanceCount": 1,
"InstanceType": "ml.m4.xlarge"
},
"TransformJobName": "sfn-binary-classification-prediction"
},
"Next": "ValidateOutput"
},
SageMaker 训练 Job 示例
以下内容包括创建 Amazon SageMaker 训练作业的Task
状态。
{
"SageMaker CreateTrainingJob":{
"Type":"Task",
"Resource":"arn:aws:states:::sagemaker:createTrainingJob.sync",
"Parameters":{
"TrainingJobName":"search-model",
"ResourceConfig":{
"InstanceCount":4,
"InstanceType":"ml.c4.8xlarge",
"VolumeSizeInGB":20
},
"HyperParameters":{
"mode":"batch_skipgram",
"epochs":"5",
"min_count":"5",
"sampling_threshold":"0.0001",
"learning_rate":"0.025",
"window_size":"5",
"vector_dim":"300",
"negative_samples":"5",
"batch_size":"11"
},
"AlgorithmSpecification":{
"TrainingImage":"...",
"TrainingInputMode":"File"
},
"OutputDataConfig":{
"S3OutputPath":"s3://amzn-s3-demo-destination-bucket1/doc-search/model"
},
"StoppingCondition":{
"MaxRuntimeInSeconds":100000
},
"RoleArn":"arn:aws:iam::123456789012:role/docsearch-stepfunction-iam-role",
"InputDataConfig":[
{
"ChannelName":"train",
"DataSource":{
"S3DataSource":{
"S3DataType":"S3Prefix",
"S3Uri":"s3://amzn-s3-demo-destination-bucket1/doc-search/interim-data/training-data/",
"S3DataDistributionType":"FullyReplicated"
}
}
}
]
},
"Retry":[
{
"ErrorEquals":[
"SageMaker.AmazonSageMakerException"
],
"IntervalSeconds":1,
"MaxAttempts":100,
"BackoffRate":1.1
},
{
"ErrorEquals":[
"SageMaker.ResourceLimitExceededException"
],
"IntervalSeconds":60,
"MaxAttempts":5000,
"BackoffRate":1
},
{
"ErrorEquals":[
"States.Timeout"
],
"IntervalSeconds":1,
"MaxAttempts":5,
"BackoffRate":1
}
],
"Catch":[
{
"ErrorEquals":[
"States.ALL"
],
"ResultPath":"$.cause",
"Next":"Sagemaker Training Job Error"
}
],
"Next":"Delete Interim Data Job"
}
}
SageMaker 标注 Job 示例
以下内容包括创建 Amazon SageMaker 贴标任务的Task
状态。
{
"StartAt": "SageMaker CreateLabelingJob",
"TimeoutSeconds": 3600,
"States": {
"SageMaker CreateLabelingJob": {
"Type": "Task",
"Resource": "arn:aws:states:::sagemaker:createLabelingJob.sync",
"Parameters": {
"HumanTaskConfig": {
"AnnotationConsolidationConfig": {
"AnnotationConsolidationLambdaArn": "arn:aws:lambda:us-west-2:123456789012:function:ACS-TextMultiClass"
},
"NumberOfHumanWorkersPerDataObject": 1,
"PreHumanTaskLambdaArn": "arn:aws:lambda:us-west-2:123456789012:function:PRE-TextMultiClass",
"TaskDescription": "Classify the following text",
"TaskKeywords": [
"tc",
"Labeling"
],
"TaskTimeLimitInSeconds": 300,
"TaskTitle": "Classify short bits of text",
"UiConfig": {
"UiTemplateS3Uri": "s3://amzn-s3-demo-bucket/TextClassification.template"
},
"WorkteamArn": "arn:aws:sagemaker:us-west-2:123456789012:workteam/private-crowd/ExampleTesting"
},
"InputConfig": {
"DataAttributes": {
"ContentClassifiers": [
"FreeOfPersonallyIdentifiableInformation",
"FreeOfAdultContent"
]
},
"DataSource": {
"S3DataSource": {
"ManifestS3Uri": "s3://amzn-s3-demo-bucket/manifest.json"
}
}
},
"LabelAttributeName": "Categories",
"LabelCategoryConfigS3Uri": "s3://amzn-s3-demo-bucket/labelcategories.json",
"LabelingJobName": "example-job-name",
"OutputConfig": {
"S3OutputPath": "s3://amzn-s3-demo-bucket/output"
},
"RoleArn": "arn:aws:iam::123456789012:role/service-role/AmazonSageMaker-ExecutionRole",
"StoppingConditions": {
"MaxHumanLabeledObjectCount": 10000,
"MaxPercentageOfInputDatasetLabeled": 100
}
},
"Next": "ValidateOutput"
},
"ValidateOutput": {
"Type": "Choice",
"Choices": [
{
"Not": {
"Variable": "$.LabelingJobArn",
"StringEquals": ""
},
"Next": "Succeed"
}
],
"Default": "Fail"
},
"Succeed": {
"Type": "Succeed"
},
"Fail": {
"Type": "Fail",
"Error": "InvalidOutput",
"Cause": "Output is not what was expected. This could be due to a service outage or a misconfigured service integration."
}
}
}
SageMaker 处理 Job 示例
以下内容包括创建 Amazon SageMaker 处理任务的Task
状态。
{
"StartAt": "SageMaker CreateProcessingJob Sync",
"TimeoutSeconds": 3600,
"States": {
"SageMaker CreateProcessingJob Sync": {
"Type": "Task",
"Resource": "arn:aws:states:::sagemaker:createProcessingJob.sync",
"Parameters": {
"AppSpecification": {
"ImageUri": "737474898029.dkr.ecr.sa-east-1.amazonaws.com/sagemaker-scikit-learn:0.20.0-cpu-py3"
},
"ProcessingResources": {
"ClusterConfig": {
"InstanceCount": 1,
"InstanceType": "ml.t3.medium",
"VolumeSizeInGB": 10
}
},
"RoleArn": "arn:aws:iam::123456789012:role/SM-003-CreateProcessingJobAPIExecutionRole",
"ProcessingJobName.$": "$.id"
},
"Next": "ValidateOutput"
},
"ValidateOutput": {
"Type": "Choice",
"Choices": [
{
"Not": {
"Variable": "$.ProcessingJobArn",
"StringEquals": ""
},
"Next": "Succeed"
}
],
"Default": "Fail"
},
"Succeed": {
"Type": "Succeed"
},
"Fail": {
"Type": "Fail",
"Error": "InvalidConnectorOutput",
"Cause": "Connector output is not what was expected. This could be due to a service outage or a misconfigured connector."
}
}
}
IAM致电 Amazon 的政策 SageMaker
以下示例模板演示了如何操作 AWS Step Functions 根据状态机定义中的资源生成IAM策略。有关更多信息,请参阅Step Functions 如何为集成服务生成IAM策略 和在 Step Functions 中探索服务集成模式。
注意
对于这些示例,
请参阅角色的 Amazon 资源名称 (ARN),该IAM SageMaker 角色用于访问模型工件和 docker 镜像,以便在 ML 计算实例上部署,或用于批量转换任务。有关更多信息,请参阅 Amazon SageMaker 角色。[[roleArn]]
CreateTrainingJob
静态资源
动态资源
CreateTransformJob
注意
AWS Step Functions 当您创建与集成的状态机CreateTransformJob
时,不会自动创建策略 SageMaker。您必须根据以下IAM示例之一将内联策略附加到已创建的角色。
静态资源
动态资源