

本文属于机器翻译版本。若本译文内容与英语原文存在差异，则一律以英文原文为准。

# 使用 Amazon EC2 Auto Scaling 组创建队列基础设施
<a name="create-auto-scaling"></a>

本节介绍如何创建 Amazon EC2 Auto Scaling 队列。

使用下面的 CloudFormation YAML 模板创建一个 Amazon EC2 Auto Scaling (Auto Scaling) 组、一个包含两个子网、一个实例配置文件和一个实例访问角色的亚马逊虚拟私有云 (Amazon VPC)。这些是在子网中使用 Auto Scaling 启动实例所必需的。

您应该查看并更新实例类型列表以满足您的渲染需求。

有关 CloudFormation YAML 模板中使用的资源和参数的完整说明，请参阅《*AWS CloudFormation 用户指南》*中的 Deadl [ine Cloud 资源类型参考](https://docs.aws.amazon.com/AWSCloudFormation/latest/UserGuide/AWS_Deadline.html)。

**创建 Amazon EC2 Auto Scaling 队列**

1. 使用以下示例创建定义`FarmID``FleetID`、和`AMIId`参数的 CloudFormation 模板。将模板保存到本地计算机上的`.YAML`文件中。

   ```
   AWSTemplateFormatVersion: 2010-09-09
   Description: Amazon Deadline Cloud customer-managed fleet
   Parameters:
     FarmId:
       Type: String
       Description: Farm ID
     FleetId:
       Type: String
       Description: Fleet ID
     AMIId:
       Type: String
       Description: AMI ID for launching workers
   Resources:
     deadlineVPC:
       Type: 'AWS::EC2::VPC'
       Properties:
         CidrBlock: 100.100.0.0/16
     deadlineWorkerSecurityGroup:
       Type: 'AWS::EC2::SecurityGroup'
       Properties:
         GroupDescription: !Join
           - ' '
           - - Security group created for Deadline Cloud workers in the fleet
             - !Ref FleetId
         GroupName: !Join
           - ''
           - - deadlineWorkerSecurityGroup-
             - !Ref FleetId
         SecurityGroupEgress:
           - CidrIp: 0.0.0.0/0
             IpProtocol: '-1'
         SecurityGroupIngress: []
         VpcId: !Ref deadlineVPC
     deadlineIGW:
       Type: 'AWS::EC2::InternetGateway'
       Properties: {}
     deadlineVPCGatewayAttachment:
       Type: 'AWS::EC2::VPCGatewayAttachment'
       Properties:
         VpcId: !Ref deadlineVPC
         InternetGatewayId: !Ref deadlineIGW
     deadlinePublicRouteTable:
       Type: 'AWS::EC2::RouteTable'
       Properties:
         VpcId: !Ref deadlineVPC
     deadlinePublicRoute:
       Type: 'AWS::EC2::Route'
       Properties:
         RouteTableId: !Ref deadlinePublicRouteTable
         DestinationCidrBlock: 0.0.0.0/0
         GatewayId: !Ref deadlineIGW
       DependsOn:
         - deadlineIGW
         - deadlineVPCGatewayAttachment
     deadlinePublicSubnet0:
       Type: 'AWS::EC2::Subnet'
       Properties:
         VpcId: !Ref deadlineVPC
         CidrBlock: 100.100.16.0/22
         AvailabilityZone: !Join
           - ''
           - - !Ref 'AWS::Region'
             - a
     deadlineSubnetRouteTableAssociation0:
       Type: 'AWS::EC2::SubnetRouteTableAssociation'
       Properties:
         RouteTableId: !Ref deadlinePublicRouteTable
         SubnetId: !Ref deadlinePublicSubnet0
     deadlinePublicSubnet1:
       Type: 'AWS::EC2::Subnet'
       Properties:
         VpcId: !Ref deadlineVPC
         CidrBlock: 100.100.20.0/22
         AvailabilityZone: !Join
           - ''
           - - !Ref 'AWS::Region'
             - c
     deadlineSubnetRouteTableAssociation1:
       Type: 'AWS::EC2::SubnetRouteTableAssociation'
       Properties:
         RouteTableId: !Ref deadlinePublicRouteTable
         SubnetId: !Ref deadlinePublicSubnet1
     deadlineInstanceAccessAccessRole:
       Type: 'AWS::IAM::Role'
       Properties:
         RoleName: !Join
           - '-'
           - - deadline
             - InstanceAccess
             - !Ref FleetId
         AssumeRolePolicyDocument:
           Statement:
             - Effect: Allow
               Principal:
                 Service: ec2.amazonaws.com
               Action:
                 - 'sts:AssumeRole'
         Path: /     
         ManagedPolicyArns:
           - 'arn:aws:iam::aws:policy/CloudWatchAgentServerPolicy'
           - 'arn:aws:iam::aws:policy/AmazonSSMManagedInstanceCore'
           - 'arn:aws:iam::aws:policy/AWSDeadlineCloud-WorkerHost'  
     deadlineInstanceProfile:
       Type: 'AWS::IAM::InstanceProfile'
       Properties:
         Path: /
         Roles:
           - !Ref deadlineInstanceAccessAccessRole
     deadlineLaunchTemplate:
       Type: 'AWS::EC2::LaunchTemplate'
       Properties:
         LaunchTemplateName: !Join
           - ''
           - - deadline-LT-
             - !Ref FleetId
         LaunchTemplateData:
           NetworkInterfaces:
             - DeviceIndex: 0
               AssociatePublicIpAddress: true
               Groups:
                 - !Ref deadlineWorkerSecurityGroup
               DeleteOnTermination: true
           ImageId: !Ref AMIId
           InstanceInitiatedShutdownBehavior: terminate
           IamInstanceProfile:
             Arn: !GetAtt
               - deadlineInstanceProfile
               - Arn
           MetadataOptions:
             HttpTokens: required
             HttpEndpoint: enabled
   
     deadlineAutoScalingGroup:
       Type: 'AWS::AutoScaling::AutoScalingGroup'
       Properties:
         AutoScalingGroupName: !Join
           - ''
           - - deadline-ASG-autoscalable-
             - !Ref FleetId
         MinSize: 0
         MaxSize: 10
         VPCZoneIdentifier:
           - !Ref deadlinePublicSubnet0
           - !Ref deadlinePublicSubnet1
         NewInstancesProtectedFromScaleIn: true
         MixedInstancesPolicy:
           InstancesDistribution:
             OnDemandBaseCapacity: 0
             OnDemandPercentageAboveBaseCapacity: 0
             SpotAllocationStrategy: capacity-optimized
             OnDemandAllocationStrategy: lowest-price
           LaunchTemplate:
             LaunchTemplateSpecification:
               LaunchTemplateId: !Ref deadlineLaunchTemplate
               Version: !GetAtt
                 - deadlineLaunchTemplate
                 - LatestVersionNumber
             Overrides:
               - InstanceType: m5.large
               - InstanceType: m5d.large
               - InstanceType: m5a.large
               - InstanceType: m5ad.large
               - InstanceType: m5n.large
               - InstanceType: m5dn.large
               - InstanceType: m4.large
               - InstanceType: m3.large
               - InstanceType: r5.large
               - InstanceType: r5d.large
               - InstanceType: r5a.large
               - InstanceType: r5ad.large
               - InstanceType: r5n.large
               - InstanceType: r5dn.large
               - InstanceType: r4.large
         MetricsCollection:
           - Granularity: 1Minute
             Metrics:
               - GroupMinSize
               - GroupMaxSize
               - GroupDesiredCapacity
               - GroupInServiceInstances
               - GroupTotalInstances
               - GroupInServiceCapacity
               - GroupTotalCapacity
   ```

1. 在 [https://console.aws.amazon.com/cloudformat](https://console.aws.amazon.com/cloudformation/) ion 上打开 CloudFormation 控制台。

   使用 CloudFormation 控制台按照上传您创建的模板文件的说明创建堆栈。有关更多信息，请参阅《*AWS CloudFormation 用户指南》*中的[在 CloudFormation 控制台上创建堆栈](https://docs.aws.amazon.com/AWSCloudFormation/latest/UserGuide/cfn-console-create-stack.html)。

**注意**  
附加到您的工作人员的 Amazon EC2 实例的 IAM 角色的证书可用于在该工作程序上运行*的所有*进程，包括作业。工作人员应拥有最少的操作权限：`deadline:CreateWorker`和 `deadline:AssumeFleetRoleForWorker.`
工作器代理获取队列角色的凭证，并对其进行配置以供运行作业使用。Amazon EC2 实例配置文件角色不应包含您的任务所需的权限。

## 使用 Deadline Cloud 规模推荐功能自动扩展您的 Amazon EC2 机群
<a name="autoscale-ec2-fleet"></a>

Deadline Cloud 利用亚马逊 EC2 Auto Scaling（Auto Scaling）组自动扩展亚马逊 EC2 客户管理的队列 (CMF)。您需要配置舰队模式并在您的账户中部署所需的基础架构，以实现队列自动扩展。您部署的基础架构将适用于所有舰队，因此您只需要设置一次即可。

基本工作流程是：将舰队模式配置为 auto scale，然后，每当建议的舰队规模 EventBridge 发生变化时，Deadline Cloud 就会为该舰队发送一个事件（一个事件包含舰队 ID、推荐的舰队规模和其他元数据）。您将有一 EventBridge 条规则来筛选相关事件，并使用 Lambda 来使用它们。Lambda 将与亚马逊 EC2 Auto Scaling 集成`AutoScalingGroup`，以自动扩展亚马逊 EC2 队列。

### 将舰队模式设置为 `EVENT_BASED_AUTO_SCALING`
<a name="set-fleet-mode"></a>

将您的舰队模式配置为`EVENT_BASED_AUTO_SCALING`. 您可以使用控制台来执行此操作，也可以使用直接调 AWS CLI 用`CreateFleet`或 `UpdateFleet` API。配置模式后，每当建议的队列规模发生变化时，Deadline Cloud 就会开始发送 EventBridge事件。
+ `UpdateFleet`命令示例：

  ```
  aws deadline update-fleet \
    --farm-id FARM_ID \
    --fleet-id FLEET_ID \
    --configuration file://configuration.json
  ```
+ `CreateFleet`命令示例：

  ```
  aws deadline create-fleet \
    --farm-id FARM_ID \
    --display-name "Fleet name" \
    --max-worker-count 10 \
    --configuration file://configuration.json
  ```

以下是在上述 CLI 命令中`configuration.json`使用的示例（`--configuration file://configuration.json`）。
+ 要在队列上启用 Auto Scaling，应将模式设置为`EVENT_BASED_AUTO_SCALING`。
+ `workerCapabilities`这些是您创建 CMF 时分配给它的默认值。如果您需要增加 CMF 的可用资源，可以更改这些值。

 配置队列模式后，Deadline Cloud 开始为该队列发出舰队规模建议事件。

```
{
    "customerManaged": {
        "mode": "EVENT_BASED_AUTO_SCALING",       
        "workerCapabilities": {
            "vCpuCount": {
                "min": 1,
                "max": 4
            },
            "memoryMiB": {
                "min": 1024,
                "max": 4096
            },
            "osFamily": "linux",
            "cpuArchitectureType": "x86_64"
        }
    }
}
```

#### 使用 CloudFormation 模板部署 Auto Scaling 堆栈
<a name="deploy-stack"></a>

您可以设置 EventBridge 规则来筛选事件，设置用于使用事件和控制 Auto Scaling 的 Lambda，以及用于存储未处理事件的 SQS 队列。使用以下 CloudFormation 模板部署堆栈中的所有内容。成功部署资源后，您可以提交任务，队列将自动扩展。

```
Resources:
  AutoScalingLambda:
    Type: 'AWS::Lambda::Function'
    Properties:
      Code:
        ZipFile: |-
          """
          This lambda is configured to handle "Fleet Size Recommendation Change"
          messages. It will handle all such events, and requires
          that the ASG is named based on the fleet id. It will scale up/down the fleet
          based on the recommended fleet size in the message.
          
          Example EventBridge message:
          {
              "version": "0",
              "id": "6a7e8feb-b491-4cf7-a9f1-bf3703467718",
              "detail-type": "Fleet Size Recommendation Change",
              "source": "aws.deadline",
              "account": "111122223333",
              "time": "2017-12-22T18:43:48Z",
              "region": "us-west-1",
              "resources": [],
              "detail": {
                  "farmId": "farm-12345678900000000000000000000000",
                  "fleetId": "fleet-12345678900000000000000000000000",
                  "oldFleetSize": 1,
                  "newFleetSize": 5,
              }
          }
          """
          
          import json
          import boto3
          import logging

          logger = logging.getLogger()
          logger.setLevel(logging.INFO)

          auto_scaling_client = boto3.client("autoscaling")

          def lambda_handler(event, context):
              logger.info(event)
              event_detail = event["detail"]
              fleet_id = event_detail["fleetId"]
              desired_capacity = event_detail["newFleetSize"]

              asg_name = f"deadline-ASG-autoscalable-{fleet_id}"
              auto_scaling_client.set_desired_capacity(
                  AutoScalingGroupName=asg_name,
                  DesiredCapacity=desired_capacity,
                  HonorCooldown=False,
              )

              return {
                  'statusCode': 200,
                  'body': json.dumps(f'Successfully set desired_capacity for {asg_name} to {desired_capacity}')
              }
      Handler: index.lambda_handler
      Role: !GetAtt 
        - AutoScalingLambdaServiceRole
        - Arn
      Runtime: python3.11
    DependsOn:
      - AutoScalingLambdaServiceRoleDefaultPolicy
      - AutoScalingLambdaServiceRole
  AutoScalingEventRule:
    Type: 'AWS::Events::Rule'
    Properties:
      EventPattern:
        source:
          - aws.deadline
        detail-type:
          - Fleet Size Recommendation Change
      State: ENABLED
      Targets:
        - Arn: !GetAtt 
            - AutoScalingLambda
            - Arn
          DeadLetterConfig:
            Arn: !GetAtt 
              - UnprocessedAutoScalingEventQueue
              - Arn
          Id: Target0
          RetryPolicy:
            MaximumRetryAttempts: 15
  AutoScalingEventRuleTargetPermission:
    Type: 'AWS::Lambda::Permission'
    Properties:
      Action: 'lambda:InvokeFunction'
      FunctionName: !GetAtt 
        - AutoScalingLambda
        - Arn
      Principal: events.amazonaws.com
      SourceArn: !GetAtt 
        - AutoScalingEventRule
        - Arn
  AutoScalingLambdaServiceRole:
    Type: 'AWS::IAM::Role'
    Properties:
      AssumeRolePolicyDocument:
        Statement:
          - Action: 'sts:AssumeRole'
            Effect: Allow
            Principal:
              Service: lambda.amazonaws.com
        Version: 2012-10-17
      ManagedPolicyArns:
        - !Join 
          - ''
          - - 'arn:'
            - !Ref 'AWS::Partition'
            - ':iam::aws:policy/service-role/AWSLambdaBasicExecutionRole'
  AutoScalingLambdaServiceRoleDefaultPolicy:
    Type: 'AWS::IAM::Policy'
    Properties:
      PolicyDocument:
        Statement:
          - Action: 'autoscaling:SetDesiredCapacity'
            Effect: Allow
            Resource: '*'
        Version: 2012-10-17
      PolicyName: AutoScalingLambdaServiceRoleDefaultPolicy
      Roles:
        - !Ref AutoScalingLambdaServiceRole
  UnprocessedAutoScalingEventQueue:
    Type: 'AWS::SQS::Queue'
    Properties:
      QueueName: deadline-unprocessed-autoscaling-events
    UpdateReplacePolicy: Delete
    DeletionPolicy: Delete
  UnprocessedAutoScalingEventQueuePolicy:
    Type: 'AWS::SQS::QueuePolicy'
    Properties:
      PolicyDocument:
        Statement:
          - Action: 'sqs:SendMessage'
            Condition:
              ArnEquals:
                'aws:SourceArn': !GetAtt 
                  - AutoScalingEventRule
                  - Arn
            Effect: Allow
            Principal:
              Service: events.amazonaws.com
            Resource: !GetAtt 
              - UnprocessedAutoScalingEventQueue
              - Arn
        Version: 2012-10-17
      Queues:
        - !Ref UnprocessedAutoScalingEventQueue
```

## 执行舰队运行状况检查
<a name="fleet-health-check"></a>

创建队列后，您应构建自定义运行状况检查，以确保您的队列保持健康且没有停滞的实例，从而帮助避免不必要的成本。请参阅[部署 Deadline Cloud 舰队运行状况检查](https://github.com/aws-deadline/deadline-cloud-samples/tree/mainline/cloudformation/farm_templates/cmf_templates) GitHub。运行状况检查可以降低在未被发现的情况下意外更改您的Amazon Machine Image启动模板或网络配置的风险。