There are more AWS SDK examples available in the AWS Doc SDK Examples
Getting started with Amazon SageMaker Feature Store
The following code example shows how to:
Set up IAM permissions
Create a SageMaker execution role
Create feature groups
Clean up resources
- Bash
-
- AWS CLI with Bash script
-
Note
There's more on GitHub. Find the complete example and learn how to set up and run in the Sample developer tutorials
repository. #!/bin/bash # Amazon SageMaker Feature Store Tutorial Script - Version 3 # This script demonstrates how to use Amazon SageMaker Feature Store with AWS CLI # Setup logging LOG_FILE="sagemaker-featurestore-tutorial.log" exec > >(tee -a "$LOG_FILE") 2>&1 echo "Starting SageMaker Feature Store tutorial script at $(date)" echo "All commands and outputs will be logged to $LOG_FILE" echo "" # Track created resources for cleanup CREATED_RESOURCES=() # Function to handle errors handle_error() { echo "ERROR: $1" echo "Attempting to clean up resources..." cleanup_resources exit 1 } # Function to check command status check_status() { if echo "$1" | grep -i "error" > /dev/null; then handle_error "$1" fi } # Function to wait for feature group to be created wait_for_feature_group() { local feature_group_name=$1 local status="Creating" echo "Waiting for feature group ${feature_group_name} to be created..." while [ "$status" = "Creating" ]; do sleep 5 status=$(aws sagemaker describe-feature-group \ --feature-group-name "${feature_group_name}" \ --query 'FeatureGroupStatus' \ --output text) echo "Current status: ${status}" if [ "$status" = "Failed" ]; then handle_error "Feature group ${feature_group_name} creation failed" fi done echo "Feature group ${feature_group_name} is now ${status}" } # Function to clean up resources cleanup_resources() { echo "Cleaning up resources..." # Clean up in reverse order for ((i=${#CREATED_RESOURCES[@]}-1; i>=0; i--)); do resource="${CREATED_RESOURCES[$i]}" resource_type=$(echo "$resource" | cut -d: -f1) resource_name=$(echo "$resource" | cut -d: -f2) echo "Deleting $resource_type: $resource_name" case "$resource_type" in "FeatureGroup") aws sagemaker delete-feature-group --feature-group-name "$resource_name" ;; "S3Bucket") echo "Emptying S3 bucket: $resource_name" aws s3 rm "s3://$resource_name" --recursive 2>/dev/null echo "Deleting S3 bucket: $resource_name" aws s3api delete-bucket --bucket "$resource_name" 2>/dev/null ;; "IAMRole") echo "Detaching policies from role: $resource_name" aws iam detach-role-policy --role-name "$resource_name" --policy-arn "arn:aws:iam::aws:policy/AmazonSageMakerFullAccess" 2>/dev/null aws iam detach-role-policy --role-name "$resource_name" --policy-arn "arn:aws:iam::aws:policy/AmazonS3FullAccess" 2>/dev/null echo "Deleting IAM role: $resource_name" aws iam delete-role --role-name "$resource_name" 2>/dev/null ;; *) echo "Unknown resource type: $resource_type" ;; esac done } # Function to create SageMaker execution role create_sagemaker_role() { local role_name="SageMakerFeatureStoreRole-$(openssl rand -hex 4)" echo "Creating SageMaker execution role: $role_name" >&2 # Create trust policy document local trust_policy='{ "Version":"2012-10-17", "Statement": [ { "Effect": "Allow", "Principal": { "Service": "sagemaker.amazonaws.com" }, "Action": "sts:AssumeRole" } ] }' # Create the role local role_result=$(aws iam create-role \ --role-name "$role_name" \ --assume-role-policy-document "$trust_policy" \ --description "SageMaker execution role for Feature Store tutorial" 2>&1) if echo "$role_result" | grep -i "error" > /dev/null; then handle_error "Failed to create IAM role: $role_result" fi echo "Role created successfully" >&2 CREATED_RESOURCES+=("IAMRole:$role_name") # Attach necessary policies echo "Attaching policies to role..." >&2 # SageMaker execution policy local policy1_result=$(aws iam attach-role-policy \ --role-name "$role_name" \ --policy-arn "arn:aws:iam::aws:policy/AmazonSageMakerFullAccess" 2>&1) if echo "$policy1_result" | grep -i "error" > /dev/null; then handle_error "Failed to attach SageMaker policy: $policy1_result" fi # S3 access policy local policy2_result=$(aws iam attach-role-policy \ --role-name "$role_name" \ --policy-arn "arn:aws:iam::aws:policy/AmazonS3FullAccess" 2>&1) if echo "$policy2_result" | grep -i "error" > /dev/null; then handle_error "Failed to attach S3 policy: $policy2_result" fi # Get account ID for role ARN local account_id=$(aws sts get-caller-identity --query Account --output text) local role_arn="arn:aws:iam::${account_id}:role/${role_name}" echo "Role ARN: $role_arn" >&2 echo "Waiting 10 seconds for role to propagate..." >&2 sleep 10 # Return only the role ARN to stdout echo "$role_arn" } # Handle SageMaker execution role ROLE_ARN="" if [ -z "$1" ]; then echo "Creating SageMaker execution role automatically..." ROLE_ARN=$(create_sagemaker_role) if [ -z "$ROLE_ARN" ]; then handle_error "Failed to create SageMaker execution role" fi else ROLE_ARN="$1" # Validate the role ARN ROLE_NAME=$(echo "$ROLE_ARN" | sed 's/.*role\///') ROLE_CHECK=$(aws iam get-role --role-name "$ROLE_NAME" 2>&1) if echo "$ROLE_CHECK" | grep -i "error" > /dev/null; then echo "Creating a new role automatically..." ROLE_ARN=$(create_sagemaker_role) if [ -z "$ROLE_ARN" ]; then handle_error "Failed to create SageMaker execution role" fi fi fi # Handle cleanup option AUTO_CLEANUP="" if [ -n "$2" ]; then AUTO_CLEANUP="$2" fi # Generate a random identifier for resource names RANDOM_ID=$(openssl rand -hex 4) echo "Using random identifier: $RANDOM_ID" # Set variables ACCOUNT_ID=$(aws sts get-caller-identity --query Account --output text) check_status "$ACCOUNT_ID" echo "Account ID: $ACCOUNT_ID" # Get current region REGION=$(aws configure get region) if [ -z "$REGION" ]; then REGION="us-east-1" echo "No default region configured, using: $REGION" else echo "Using region: $REGION" fi S3_BUCKET_NAME="sagemaker-featurestore-${RANDOM_ID}-${ACCOUNT_ID}" PREFIX="featurestore-tutorial" CURRENT_TIME=$(date +%s) echo "Creating S3 bucket: $S3_BUCKET_NAME" # Create bucket in current region if [ "$REGION" = "us-east-1" ]; then BUCKET_RESULT=$(aws s3api create-bucket --bucket "$S3_BUCKET_NAME" \ --region "$REGION" 2>&1) else BUCKET_RESULT=$(aws s3api create-bucket --bucket "$S3_BUCKET_NAME" \ --region "$REGION" \ --create-bucket-configuration LocationConstraint="$REGION" 2>&1) fi if echo "$BUCKET_RESULT" | grep -i "error" > /dev/null; then echo "Failed to create S3 bucket: $BUCKET_RESULT" exit 1 fi echo "$BUCKET_RESULT" CREATED_RESOURCES+=("S3Bucket:$S3_BUCKET_NAME") # Block public access to the bucket BLOCK_RESULT=$(aws s3api put-public-access-block \ --bucket "$S3_BUCKET_NAME" \ --public-access-block-configuration "BlockPublicAcls=true,IgnorePublicAcls=true,BlockPublicPolicy=true,RestrictPublicBuckets=true" 2>&1) if echo "$BLOCK_RESULT" | grep -i "error" > /dev/null; then echo "Failed to block public access to S3 bucket: $BLOCK_RESULT" cleanup_resources exit 1 fi # Create feature groups echo "Creating feature groups..." # Create customers feature group CUSTOMERS_FEATURE_GROUP_NAME="customers-feature-group-${RANDOM_ID}" echo "Creating customers feature group: $CUSTOMERS_FEATURE_GROUP_NAME" CUSTOMERS_RESPONSE=$(aws sagemaker create-feature-group \ --feature-group-name "$CUSTOMERS_FEATURE_GROUP_NAME" \ --record-identifier-feature-name "customer_id" \ --event-time-feature-name "EventTime" \ --feature-definitions '[ {"FeatureName": "customer_id", "FeatureType": "Integral"}, {"FeatureName": "name", "FeatureType": "String"}, {"FeatureName": "age", "FeatureType": "Integral"}, {"FeatureName": "address", "FeatureType": "String"}, {"FeatureName": "membership_type", "FeatureType": "String"}, {"FeatureName": "EventTime", "FeatureType": "Fractional"} ]' \ --online-store-config '{"EnableOnlineStore": true}' \ --offline-store-config '{ "S3StorageConfig": { "S3Uri": "s3://'${S3_BUCKET_NAME}'/'${PREFIX}'" }, "DisableGlueTableCreation": false }' \ --role-arn "$ROLE_ARN" 2>&1) if echo "$CUSTOMERS_RESPONSE" | grep -i "error" > /dev/null; then echo "Failed to create customers feature group: $CUSTOMERS_RESPONSE" cleanup_resources exit 1 fi echo "$CUSTOMERS_RESPONSE" CREATED_RESOURCES+=("FeatureGroup:$CUSTOMERS_FEATURE_GROUP_NAME") # Create orders feature group ORDERS_FEATURE_GROUP_NAME="orders-feature-group-${RANDOM_ID}" echo "Creating orders feature group: $ORDERS_FEATURE_GROUP_NAME" ORDERS_RESPONSE=$(aws sagemaker create-feature-group \ --feature-group-name "$ORDERS_FEATURE_GROUP_NAME" \ --record-identifier-feature-name "customer_id" \ --event-time-feature-name "EventTime" \ --feature-definitions '[ {"FeatureName": "customer_id", "FeatureType": "Integral"}, {"FeatureName": "order_id", "FeatureType": "String"}, {"FeatureName": "order_date", "FeatureType": "String"}, {"FeatureName": "product", "FeatureType": "String"}, {"FeatureName": "quantity", "FeatureType": "Integral"}, {"FeatureName": "amount", "FeatureType": "Fractional"}, {"FeatureName": "EventTime", "FeatureType": "Fractional"} ]' \ --online-store-config '{"EnableOnlineStore": true}' \ --offline-store-config '{ "S3StorageConfig": { "S3Uri": "s3://'${S3_BUCKET_NAME}'/'${PREFIX}'" }, "DisableGlueTableCreation": false }' \ --role-arn "$ROLE_ARN" 2>&1) if echo "$ORDERS_RESPONSE" | grep -i "error" > /dev/null; then echo "Failed to create orders feature group: $ORDERS_RESPONSE" cleanup_resources exit 1 fi echo "$ORDERS_RESPONSE" CREATED_RESOURCES+=("FeatureGroup:$ORDERS_FEATURE_GROUP_NAME") # Wait for feature groups to be created wait_for_feature_group "$CUSTOMERS_FEATURE_GROUP_NAME" wait_for_feature_group "$ORDERS_FEATURE_GROUP_NAME" # Ingest data into feature groups echo "Ingesting data into feature groups..." # Ingest customer data echo "Ingesting customer data..." CUSTOMER1_RESPONSE=$(aws sagemaker-featurestore-runtime put-record \ --feature-group-name "$CUSTOMERS_FEATURE_GROUP_NAME" \ --record '[ {"FeatureName": "customer_id", "ValueAsString": "573291"}, {"FeatureName": "name", "ValueAsString": "John Doe"}, {"FeatureName": "age", "ValueAsString": "35"}, {"FeatureName": "address", "ValueAsString": "123 Main St"}, {"FeatureName": "membership_type", "ValueAsString": "premium"}, {"FeatureName": "EventTime", "ValueAsString": "'${CURRENT_TIME}'"} ]' 2>&1) if echo "$CUSTOMER1_RESPONSE" | grep -i "error" > /dev/null; then echo "Failed to ingest customer 1 data: $CUSTOMER1_RESPONSE" cleanup_resources exit 1 fi echo "$CUSTOMER1_RESPONSE" CUSTOMER2_RESPONSE=$(aws sagemaker-featurestore-runtime put-record \ --feature-group-name "$CUSTOMERS_FEATURE_GROUP_NAME" \ --record '[ {"FeatureName": "customer_id", "ValueAsString": "109382"}, {"FeatureName": "name", "ValueAsString": "Jane Smith"}, {"FeatureName": "age", "ValueAsString": "28"}, {"FeatureName": "address", "ValueAsString": "456 Oak Ave"}, {"FeatureName": "membership_type", "ValueAsString": "standard"}, {"FeatureName": "EventTime", "ValueAsString": "'${CURRENT_TIME}'"} ]' 2>&1) if echo "$CUSTOMER2_RESPONSE" | grep -i "error" > /dev/null; then echo "Failed to ingest customer 2 data: $CUSTOMER2_RESPONSE" cleanup_resources exit 1 fi echo "$CUSTOMER2_RESPONSE" # Ingest order data echo "Ingesting order data..." ORDER1_RESPONSE=$(aws sagemaker-featurestore-runtime put-record \ --feature-group-name "$ORDERS_FEATURE_GROUP_NAME" \ --record '[ {"FeatureName": "customer_id", "ValueAsString": "573291"}, {"FeatureName": "order_id", "ValueAsString": "ORD-001"}, {"FeatureName": "order_date", "ValueAsString": "2023-01-15"}, {"FeatureName": "product", "ValueAsString": "Laptop"}, {"FeatureName": "quantity", "ValueAsString": "1"}, {"FeatureName": "amount", "ValueAsString": "1299.99"}, {"FeatureName": "EventTime", "ValueAsString": "'${CURRENT_TIME}'"} ]' 2>&1) if echo "$ORDER1_RESPONSE" | grep -i "error" > /dev/null; then echo "Failed to ingest order 1 data: $ORDER1_RESPONSE" cleanup_resources exit 1 fi echo "$ORDER1_RESPONSE" ORDER2_RESPONSE=$(aws sagemaker-featurestore-runtime put-record \ --feature-group-name "$ORDERS_FEATURE_GROUP_NAME" \ --record '[ {"FeatureName": "customer_id", "ValueAsString": "109382"}, {"FeatureName": "order_id", "ValueAsString": "ORD-002"}, {"FeatureName": "order_date", "ValueAsString": "2023-01-20"}, {"FeatureName": "product", "ValueAsString": "Smartphone"}, {"FeatureName": "quantity", "ValueAsString": "1"}, {"FeatureName": "amount", "ValueAsString": "899.99"}, {"FeatureName": "EventTime", "ValueAsString": "'${CURRENT_TIME}'"} ]' 2>&1) if echo "$ORDER2_RESPONSE" | grep -i "error" > /dev/null; then echo "Failed to ingest order 2 data: $ORDER2_RESPONSE" cleanup_resources exit 1 fi echo "$ORDER2_RESPONSE" # Retrieve records from feature groups echo "Retrieving records from feature groups..." # Get a single customer record echo "Getting customer record with ID 573291:" CUSTOMER_RECORD=$(aws sagemaker-featurestore-runtime get-record \ --feature-group-name "$CUSTOMERS_FEATURE_GROUP_NAME" \ --record-identifier-value-as-string "573291" 2>&1) if echo "$CUSTOMER_RECORD" | grep -i "error" > /dev/null; then echo "Failed to get customer record: $CUSTOMER_RECORD" cleanup_resources exit 1 fi echo "$CUSTOMER_RECORD" # Get multiple records using batch-get-record echo "Getting multiple records using batch-get-record:" BATCH_RECORDS=$(aws sagemaker-featurestore-runtime batch-get-record \ --identifiers '[ { "FeatureGroupName": "'${CUSTOMERS_FEATURE_GROUP_NAME}'", "RecordIdentifiersValueAsString": ["573291", "109382"] }, { "FeatureGroupName": "'${ORDERS_FEATURE_GROUP_NAME}'", "RecordIdentifiersValueAsString": ["573291", "109382"] } ]' 2>&1) if echo "$BATCH_RECORDS" | grep -i "error" > /dev/null && ! echo "$BATCH_RECORDS" | grep -i "Records" > /dev/null; then echo "Failed to get batch records: $BATCH_RECORDS" cleanup_resources exit 1 fi echo "$BATCH_RECORDS" # List feature groups echo "Listing feature groups:" FEATURE_GROUPS=$(aws sagemaker list-feature-groups 2>&1) if echo "$FEATURE_GROUPS" | grep -i "error" > /dev/null; then echo "Failed to list feature groups: $FEATURE_GROUPS" cleanup_resources exit 1 fi echo "$FEATURE_GROUPS" # Display summary of created resources echo "" echo "===========================================" echo "TUTORIAL COMPLETED SUCCESSFULLY!" echo "===========================================" echo "Resources created:" echo "- S3 Bucket: $S3_BUCKET_NAME" echo "- Customers Feature Group: $CUSTOMERS_FEATURE_GROUP_NAME" echo "- Orders Feature Group: $ORDERS_FEATURE_GROUP_NAME" if [[ " ${CREATED_RESOURCES[@]} " =~ " IAMRole:" ]]; then echo "- IAM Role: $(echo "${CREATED_RESOURCES[@]}" | grep -o 'IAMRole:[^[:space:]]*' | cut -d: -f2)" fi echo "" echo "You can now:" echo "1. View your feature groups in the SageMaker console" echo "2. Query the offline store using Amazon Athena" echo "3. Use the feature groups in your ML workflows" echo "===========================================" echo "" # Handle cleanup if [ "$AUTO_CLEANUP" = "y" ]; then echo "Auto-cleanup enabled. Starting cleanup..." cleanup_resources echo "Cleanup completed." elif [ "$AUTO_CLEANUP" = "n" ]; then echo "Auto-cleanup disabled. Resources will remain in your account." echo "To clean up later, run this script again with cleanup option 'y'" else echo "===========================================" echo "CLEANUP CONFIRMATION" echo "===========================================" echo "Do you want to clean up all created resources? (y/n): " read -r CLEANUP_CHOICE if [[ "$CLEANUP_CHOICE" =~ ^[Yy]$ ]]; then echo "Starting cleanup..." cleanup_resources echo "Cleanup completed." else echo "Skipping cleanup. Resources will remain in your account." echo "To clean up later, delete the following resources:" echo "- Feature Groups: $CUSTOMERS_FEATURE_GROUP_NAME, $ORDERS_FEATURE_GROUP_NAME" echo "- S3 Bucket: $S3_BUCKET_NAME" if [[ " ${CREATED_RESOURCES[@]} " =~ " IAMRole:" ]]; then echo "- IAM Role: $(echo "${CREATED_RESOURCES[@]}" | grep -o 'IAMRole:[^[:space:]]*' | cut -d: -f2)" fi echo "" echo "Estimated ongoing cost: ~$0.01 per month for online store" fi fi echo "Script completed at $(date)"-
For API details, see the following topics in AWS CLI Command Reference.
-
Getting started with Amazon S3
Getting started with Amazon Textract