There are more AWS SDK examples available in the AWS Doc SDK Examples
Getting started with Amazon OpenSearch Service
The following code example shows how to:
Create an OpenSearch Service domain
Upload data to your domain
Clean up resources
- Bash
-
- AWS CLI with Bash script
-
Note
There's more on GitHub. Find the complete example and learn how to set up and run in the Sample developer tutorials
repository. #!/bin/bash # Amazon OpenSearch Service Getting Started Script - Version 8 Fixed # This script creates an OpenSearch domain, uploads data, searches documents, and cleans up resources # Based on the tested and working 4-tutorial-final.md # FIXES IN V8-FIXED: # 1. Fixed syntax error with regex pattern matching # 2. Fixed access policy to be more permissive and work with fine-grained access control # 3. Added proper resource-based policy that allows both IAM and internal user database access # 4. Improved authentication test with better error handling # 5. Better debugging and troubleshooting information set -e # Exit on any error # Set up logging LOG_FILE="opensearch_tutorial_v8_fixed.log" exec > >(tee -a "$LOG_FILE") 2>&1 echo "Starting Amazon OpenSearch Service tutorial script v8-fixed at $(date)" echo "All commands and outputs will be logged to $LOG_FILE" # Track if domain was successfully created DOMAIN_CREATED=false DOMAIN_ACTIVE=false # Error handling function handle_error() { echo "ERROR: $1" echo "Attempting to clean up resources..." cleanup_resources exit 1 } # Function to clean up resources cleanup_resources() { echo "Cleaning up resources..." if [[ "$DOMAIN_CREATED" == "true" ]]; then echo "Checking if domain $DOMAIN_NAME exists before attempting to delete..." # Check if domain exists before trying to delete if aws opensearch describe-domain --domain-name "$DOMAIN_NAME" > /dev/null 2>&1; then echo "Domain $DOMAIN_NAME exists. Proceeding with deletion." aws opensearch delete-domain --domain-name "$DOMAIN_NAME" echo "Domain deletion initiated. This may take several minutes to complete." else echo "Domain $DOMAIN_NAME does not exist or is not accessible. No deletion needed." fi else echo "No domain was successfully created. Nothing to clean up." fi } # Set up trap for cleanup on script exit trap cleanup_resources EXIT # Generate a random identifier for resource names to avoid conflicts RANDOM_ID=$(openssl rand -hex 4) DOMAIN_NAME="movies-${RANDOM_ID}" MASTER_USER="master-user" MASTER_PASSWORD='Master-Password123!' echo "Using domain name: $DOMAIN_NAME" echo "Using master username: $MASTER_USER" echo "Using master password: $MASTER_PASSWORD" # Get AWS account ID (matches tutorial) echo "Retrieving AWS account ID..." ACCOUNT_ID=$(aws sts get-caller-identity --query Account --output text) if [[ $? -ne 0 ]] || [[ -z "$ACCOUNT_ID" ]]; then handle_error "Failed to retrieve AWS account ID. Please check your AWS credentials." fi echo "AWS Account ID: $ACCOUNT_ID" # Get current region (matches tutorial) echo "Retrieving current AWS region..." AWS_REGION=$(aws configure get region) if [[ -z "$AWS_REGION" ]]; then AWS_REGION="us-east-1" echo "No region found in AWS config, defaulting to $AWS_REGION" else echo "Using AWS region: $AWS_REGION" fi # Step 1: Create an OpenSearch Service Domain echo "Creating OpenSearch Service domain..." echo "This may take 15-30 minutes to complete." # FIXED: Create a more permissive access policy that works with fine-grained access control # This policy allows both IAM users and the internal user database to work ACCESS_POLICY="{\"Version\":\"2012-10-17\",\"Statement\":[{\"Effect\":\"Allow\",\"Principal\":{\"AWS\":\"*\"},\"Action\":[\"es:ESHttpGet\",\"es:ESHttpPut\",\"es:ESHttpPost\",\"es:ESHttpDelete\",\"es:ESHttpHead\"],\"Resource\":\"arn:aws:es:${AWS_REGION}:${ACCOUNT_ID}:domain/${DOMAIN_NAME}/*\"}]}" echo "Access policy created for region: $AWS_REGION" echo "Access policy: $ACCESS_POLICY" # Create the domain (matches tutorial command exactly) echo "Creating domain $DOMAIN_NAME..." CREATE_OUTPUT=$(aws opensearch create-domain \ --domain-name "$DOMAIN_NAME" \ --engine-version "OpenSearch_2.11" \ --cluster-config "InstanceType=t3.small.search,InstanceCount=1,ZoneAwarenessEnabled=false" \ --ebs-options "EBSEnabled=true,VolumeType=gp3,VolumeSize=10" \ --node-to-node-encryption-options "Enabled=true" \ --encryption-at-rest-options "Enabled=true" \ --domain-endpoint-options "EnforceHTTPS=true" \ --advanced-security-options "Enabled=true,InternalUserDatabaseEnabled=true,MasterUserOptions={MasterUserName=$MASTER_USER,MasterUserPassword=$MASTER_PASSWORD}" \ --access-policies "$ACCESS_POLICY" 2>&1) # Check if domain creation was successful if [[ $? -ne 0 ]]; then echo "Failed to create OpenSearch domain:" echo "$CREATE_OUTPUT" handle_error "Domain creation failed" fi # Verify the domain was actually created by checking the output if echo "$CREATE_OUTPUT" | grep -q "DomainStatus"; then echo "Domain creation initiated successfully." DOMAIN_CREATED=true else echo "Domain creation output:" echo "$CREATE_OUTPUT" handle_error "Domain creation may have failed - no DomainStatus in response" fi # Wait for domain to become active (improved logic) echo "Waiting for domain to become active..." RETRY_COUNT=0 MAX_RETRIES=45 # 45 minutes with 60 second intervals while [[ $RETRY_COUNT -lt $MAX_RETRIES ]]; do echo "Checking domain status... (attempt $((RETRY_COUNT+1))/$MAX_RETRIES)" # Get domain status DOMAIN_STATUS=$(aws opensearch describe-domain --domain-name "$DOMAIN_NAME" 2>&1) if [[ $? -ne 0 ]]; then echo "Error checking domain status:" echo "$DOMAIN_STATUS" # If domain not found after several attempts, it likely failed to create if [[ $RETRY_COUNT -gt 5 ]] && echo "$DOMAIN_STATUS" | grep -q "ResourceNotFoundException"; then handle_error "Domain not found after multiple attempts. Domain creation likely failed." fi echo "Will retry in 60 seconds..." else # Check if domain is no longer processing if echo "$DOMAIN_STATUS" | grep -q '"Processing": false'; then DOMAIN_ACTIVE=true echo "Domain is now active!" break else echo "Domain is still being created. Checking again in 60 seconds..." fi fi sleep 60 RETRY_COUNT=$((RETRY_COUNT+1)) done # Verify domain is active if [[ "$DOMAIN_ACTIVE" != "true" ]]; then echo "Domain creation is taking longer than expected ($((MAX_RETRIES)) minutes)." echo "You can check the status later using:" echo "aws opensearch describe-domain --domain-name $DOMAIN_NAME" handle_error "Domain did not become active within the expected time" fi # Get domain endpoint (matches tutorial) echo "Retrieving domain endpoint..." DOMAIN_ENDPOINT=$(aws opensearch describe-domain --domain-name "$DOMAIN_NAME" --query 'DomainStatus.Endpoint' --output text) if [[ $? -ne 0 ]] || [[ -z "$DOMAIN_ENDPOINT" ]] || [[ "$DOMAIN_ENDPOINT" == "None" ]]; then handle_error "Failed to get domain endpoint" fi echo "Domain endpoint: $DOMAIN_ENDPOINT" # Wait additional time for fine-grained access control to be fully ready echo "Domain is active, but waiting additional time for fine-grained access control to be fully ready..." echo "Fine-grained access control can take several minutes to initialize after domain becomes active." echo "Waiting 8 minutes for full initialization..." sleep 480 # Wait 8 minutes for fine-grained access control to be ready # Verify variables are set correctly (matches tutorial) echo "Verifying configuration..." echo "Domain endpoint: $DOMAIN_ENDPOINT" echo "Master user: $MASTER_USER" echo "Password set: $(if [ -n "$MASTER_PASSWORD" ]; then echo "Yes"; else echo "No"; fi)" # Step 2: Upload Data to the Domain echo "Preparing to upload data to the domain..." # Create a file for the single document (matches tutorial exactly) echo "Creating single document JSON file..." cat > single_movie.json << EOF { "director": "Burton, Tim", "genre": ["Comedy","Sci-Fi"], "year": 1996, "actor": ["Jack Nicholson","Pierce Brosnan","Sarah Jessica Parker"], "title": "Mars Attacks!" } EOF # Create a file for bulk documents (matches tutorial exactly) echo "Creating bulk documents JSON file..." cat > bulk_movies.json << EOF { "index" : { "_index": "movies", "_id" : "2" } } {"director": "Frankenheimer, John", "genre": ["Drama", "Mystery", "Thriller", "Crime"], "year": 1962, "actor": ["Lansbury, Angela", "Sinatra, Frank", "Leigh, Janet", "Harvey, Laurence", "Silva, Henry", "Frees, Paul", "Gregory, James", "Bissell, Whit", "McGiver, John", "Parrish, Leslie", "Edwards, James", "Flowers, Bess", "Dhiegh, Khigh", "Payne, Julie", "Kleeb, Helen", "Gray, Joe", "Nalder, Reggie", "Stevens, Bert", "Masters, Michael", "Lowell, Tom"], "title": "The Manchurian Candidate"} { "index" : { "_index": "movies", "_id" : "3" } } {"director": "Baird, Stuart", "genre": ["Action", "Crime", "Thriller"], "year": 1998, "actor": ["Downey Jr., Robert", "Jones, Tommy Lee", "Snipes, Wesley", "Pantoliano, Joe", "Jacob, Irène", "Nelligan, Kate", "Roebuck, Daniel", "Malahide, Patrick", "Richardson, LaTanya", "Wood, Tom", "Kosik, Thomas", "Stellate, Nick", "Minkoff, Robert", "Brown, Spitfire", "Foster, Reese", "Spielbauer, Bruce", "Mukherji, Kevin", "Cray, Ed", "Fordham, David", "Jett, Charlie"], "title": "U.S. Marshals"} { "index" : { "_index": "movies", "_id" : "4" } } {"director": "Ray, Nicholas", "genre": ["Drama", "Romance"], "year": 1955, "actor": ["Hopper, Dennis", "Wood, Natalie", "Dean, James", "Mineo, Sal", "Backus, Jim", "Platt, Edward", "Ray, Nicholas", "Hopper, William", "Allen, Corey", "Birch, Paul", "Hudson, Rochelle", "Doran, Ann", "Hicks, Chuck", "Leigh, Nelson", "Williams, Robert", "Wessel, Dick", "Bryar, Paul", "Sessions, Almira", "McMahon, David", "Peters Jr., House"], "title": "Rebel Without a Cause"} EOF # Check if curl is installed if ! command -v curl &> /dev/null; then echo "Warning: curl is not installed. Skipping data upload and search steps." echo "You can manually upload the data later using the commands in the tutorial." else # IMPROVED: Test authentication with multiple approaches echo "Testing authentication with the OpenSearch domain..." echo "This test checks if fine-grained access control is ready for data operations." # Test 1: Basic authentication with root endpoint echo "Testing basic authentication with root endpoint..." AUTH_TEST_RESULT=$(curl -s -w "\nHTTP_CODE:%{http_code}" \ --user "${MASTER_USER}:${MASTER_PASSWORD}" \ --request GET \ "https://${DOMAIN_ENDPOINT}/" 2>&1) echo "Basic auth test result:" echo "$AUTH_TEST_RESULT" # Extract HTTP status code HTTP_CODE=$(echo "$AUTH_TEST_RESULT" | grep "HTTP_CODE:" | cut -d: -f2) # Function to check if HTTP code is 2xx is_success_code() { local code=$1 if [[ "$code" =~ ^2[0-9][0-9]$ ]]; then return 0 else return 1 fi } # Check if basic authentication test was successful (200 or 2xx status codes) if is_success_code "$HTTP_CODE"; then echo "✓ Basic authentication test successful! (HTTP $HTTP_CODE)" AUTH_SUCCESS=true AUTH_METHOD="basic" else echo "Basic authentication failed with HTTP code: $HTTP_CODE" # Test 2: Try cluster health endpoint echo "Testing with cluster health endpoint..." HEALTH_TEST_RESULT=$(curl -s -w "\nHTTP_CODE:%{http_code}" \ --user "${MASTER_USER}:${MASTER_PASSWORD}" \ --request GET \ "https://${DOMAIN_ENDPOINT}/_cluster/health" 2>&1) echo "Cluster health test result:" echo "$HEALTH_TEST_RESULT" HEALTH_HTTP_CODE=$(echo "$HEALTH_TEST_RESULT" | grep "HTTP_CODE:" | cut -d: -f2) if is_success_code "$HEALTH_HTTP_CODE"; then echo "✓ Cluster health authentication test successful! (HTTP $HEALTH_HTTP_CODE)" AUTH_SUCCESS=true AUTH_METHOD="basic" else echo "Cluster health authentication also failed with HTTP code: $HEALTH_HTTP_CODE" # Check for specific error patterns if echo "$AUTH_TEST_RESULT" | grep -q "anonymous is not authorized"; then echo "Error: Request is being treated as anonymous (authentication not working)" elif echo "$AUTH_TEST_RESULT" | grep -q "Unauthorized"; then echo "Error: Authentication credentials rejected" elif echo "$AUTH_TEST_RESULT" | grep -q "Forbidden"; then echo "Error: Authentication succeeded but access is forbidden" fi echo "Waiting additional time and retrying with exponential backoff..." # Retry authentication test with exponential backoff AUTH_RETRY_COUNT=0 MAX_AUTH_RETRIES=5 WAIT_TIME=60 AUTH_SUCCESS=false while [[ $AUTH_RETRY_COUNT -lt $MAX_AUTH_RETRIES ]]; do echo "Retrying authentication test (attempt $((AUTH_RETRY_COUNT+1))/$MAX_AUTH_RETRIES) after ${WAIT_TIME} seconds..." sleep $WAIT_TIME # Try both endpoints AUTH_TEST_RESULT=$(curl -s -w "\nHTTP_CODE:%{http_code}" \ --user "${MASTER_USER}:${MASTER_PASSWORD}" \ --request GET \ "https://${DOMAIN_ENDPOINT}/" 2>&1) HTTP_CODE=$(echo "$AUTH_TEST_RESULT" | grep "HTTP_CODE:" | cut -d: -f2) echo "Retry result (HTTP $HTTP_CODE):" echo "$AUTH_TEST_RESULT" if is_success_code "$HTTP_CODE"; then echo "✓ Authentication test successful after retry! (HTTP $HTTP_CODE)" AUTH_SUCCESS=true AUTH_METHOD="basic" break fi # Also try cluster health HEALTH_TEST_RESULT=$(curl -s -w "\nHTTP_CODE:%{http_code}" \ --user "${MASTER_USER}:${MASTER_PASSWORD}" \ --request GET \ "https://${DOMAIN_ENDPOINT}/_cluster/health" 2>&1) HEALTH_HTTP_CODE=$(echo "$HEALTH_TEST_RESULT" | grep "HTTP_CODE:" | cut -d: -f2) if is_success_code "$HEALTH_HTTP_CODE"; then echo "✓ Cluster health authentication successful after retry! (HTTP $HEALTH_HTTP_CODE)" AUTH_SUCCESS=true AUTH_METHOD="basic" break fi AUTH_RETRY_COUNT=$((AUTH_RETRY_COUNT+1)) # Exponential backoff: double the wait time each retry (max 10 minutes) WAIT_TIME=$((WAIT_TIME * 2)) if [[ $WAIT_TIME -gt 600 ]]; then WAIT_TIME=600 fi done fi fi # Proceed with data operations if authentication is working if [[ "$AUTH_SUCCESS" == "true" ]]; then echo "Authentication successful using $AUTH_METHOD method. Proceeding with data operations." # Upload single document (matches tutorial exactly) echo "Uploading single document..." UPLOAD_RESULT=$(curl -s -w "\nHTTP_CODE:%{http_code}" \ --user "${MASTER_USER}:${MASTER_PASSWORD}" \ --request PUT \ --header 'Content-Type: application/json' \ --data @single_movie.json \ "https://${DOMAIN_ENDPOINT}/movies/_doc/1" 2>&1) echo "Upload response:" echo "$UPLOAD_RESULT" UPLOAD_HTTP_CODE=$(echo "$UPLOAD_RESULT" | grep "HTTP_CODE:" | cut -d: -f2) if is_success_code "$UPLOAD_HTTP_CODE" && echo "$UPLOAD_RESULT" | grep -q '"result"'; then echo "✓ Single document uploaded successfully! (HTTP $UPLOAD_HTTP_CODE)" else echo "⚠ Warning: Single document upload may have failed (HTTP $UPLOAD_HTTP_CODE)" fi # Upload bulk documents (matches tutorial exactly) echo "Uploading bulk documents..." BULK_RESULT=$(curl -s -w "\nHTTP_CODE:%{http_code}" \ --user "${MASTER_USER}:${MASTER_PASSWORD}" \ --request POST \ --header 'Content-Type: application/x-ndjson' \ --data-binary @bulk_movies.json \ "https://${DOMAIN_ENDPOINT}/movies/_bulk" 2>&1) echo "Bulk upload response:" echo "$BULK_RESULT" BULK_HTTP_CODE=$(echo "$BULK_RESULT" | grep "HTTP_CODE:" | cut -d: -f2) if is_success_code "$BULK_HTTP_CODE" && echo "$BULK_RESULT" | grep -q '"errors": false'; then echo "✓ Bulk documents uploaded successfully! (HTTP $BULK_HTTP_CODE)" else echo "⚠ Warning: Bulk document upload may have failed (HTTP $BULK_HTTP_CODE)" fi # Wait a moment for indexing echo "Waiting for documents to be indexed..." sleep 5 # Step 3: Search Documents (matches tutorial exactly) echo "Searching for documents containing 'mars'..." SEARCH_RESULT=$(curl -s -w "\nHTTP_CODE:%{http_code}" \ --user "${MASTER_USER}:${MASTER_PASSWORD}" \ --request GET \ "https://${DOMAIN_ENDPOINT}/movies/_search?q=mars&pretty=true" 2>&1) SEARCH_HTTP_CODE=$(echo "$SEARCH_RESULT" | grep "HTTP_CODE:" | cut -d: -f2) echo "Search results for 'mars' (HTTP $SEARCH_HTTP_CODE):" echo "$SEARCH_RESULT" echo "Searching for documents containing 'rebel'..." REBEL_SEARCH=$(curl -s -w "\nHTTP_CODE:%{http_code}" \ --user "${MASTER_USER}:${MASTER_PASSWORD}" \ --request GET \ "https://${DOMAIN_ENDPOINT}/movies/_search?q=rebel&pretty=true" 2>&1) REBEL_HTTP_CODE=$(echo "$REBEL_SEARCH" | grep "HTTP_CODE:" | cut -d: -f2) echo "Search results for 'rebel' (HTTP $REBEL_HTTP_CODE):" echo "$REBEL_SEARCH" # Verify search results if is_success_code "$SEARCH_HTTP_CODE" && echo "$SEARCH_RESULT" | grep -q '"hits"'; then echo "✓ Search functionality is working!" else echo "⚠ Warning: Search may not be working properly." fi else echo "" echo "==========================================" echo "AUTHENTICATION TROUBLESHOOTING" echo "==========================================" echo "Authentication failed after all retries. This may be due to:" echo "1. Fine-grained access control not fully initialized (most common)" echo "2. Domain configuration issues" echo "3. Network connectivity issues" echo "4. AWS credentials or permissions issues" echo "" echo "DOMAIN CONFIGURATION DEBUG:" echo "Let's check the domain configuration..." # Debug domain configuration DOMAIN_CONFIG=$(aws opensearch describe-domain --domain-name "$DOMAIN_NAME" --query 'DomainStatus.{AdvancedSecurityOptions: AdvancedSecurityOptions, AccessPolicies: AccessPolicies}' --output json 2>&1) echo "Domain configuration:" echo "$DOMAIN_CONFIG" echo "" echo "MANUAL TESTING COMMANDS:" echo "You can try these commands manually in 10-15 minutes:" echo "" echo "# Test basic authentication:" echo "curl --user \"${MASTER_USER}:${MASTER_PASSWORD}\" \"https://${DOMAIN_ENDPOINT}/\"" echo "" echo "# Test cluster health:" echo "curl --user \"${MASTER_USER}:${MASTER_PASSWORD}\" \"https://${DOMAIN_ENDPOINT}/_cluster/health\"" echo "" echo "# Upload single document:" echo "curl --user \"${MASTER_USER}:${MASTER_PASSWORD}\" --request PUT --header 'Content-Type: application/json' --data @single_movie.json \"https://${DOMAIN_ENDPOINT}/movies/_doc/1\"" echo "" echo "# Search for documents:" echo "curl --user \"${MASTER_USER}:${MASTER_PASSWORD}\" \"https://${DOMAIN_ENDPOINT}/movies/_search?q=mars&pretty=true\"" echo "" echo "TROUBLESHOOTING TIPS:" echo "- Wait 10-15 more minutes and try the manual commands" echo "- Check AWS CloudTrail logs for authentication errors" echo "- Verify your AWS region is correct: $AWS_REGION" echo "- Ensure your AWS credentials have OpenSearch permissions" echo "- Try accessing OpenSearch Dashboards to verify the master user works" echo "" echo "Skipping data upload and search operations for now." echo "The domain is created and accessible via OpenSearch Dashboards." fi fi # Display OpenSearch Dashboards URL (matches tutorial) echo "" echo "===========================================" echo "OPENSEARCH DASHBOARDS ACCESS" echo "===========================================" echo "OpenSearch Dashboards URL: https://${DOMAIN_ENDPOINT}/_dashboards/" echo "Username: $MASTER_USER" echo "Password: $MASTER_PASSWORD" echo "" echo "You can access OpenSearch Dashboards using these credentials." echo "If you uploaded data successfully, you can create an index pattern for 'movies'." echo "" # Summary of created resources echo "" echo "===========================================" echo "RESOURCES CREATED" echo "===========================================" echo "OpenSearch Domain Name: $DOMAIN_NAME" echo "OpenSearch Domain Endpoint: $DOMAIN_ENDPOINT" echo "AWS Region: $AWS_REGION" echo "Master Username: $MASTER_USER" echo "Master Password: $MASTER_PASSWORD" echo "" echo "ESTIMATED COST: ~$0.038/hour (~$0.91/day) until deleted" echo "" echo "Make sure to save these details for future reference." echo "" # Ask user if they want to clean up resources echo "" echo "===========================================" echo "CLEANUP CONFIRMATION" echo "===========================================" echo "Do you want to clean up all created resources now? (y/n): " read -r CLEANUP_CHOICE if [[ "${CLEANUP_CHOICE,,}" == "y" ]]; then echo "Cleaning up resources..." aws opensearch delete-domain --domain-name "$DOMAIN_NAME" echo "✓ Cleanup initiated. Domain deletion may take several minutes to complete." echo "" echo "You can check the deletion status using:" echo "aws opensearch describe-domain --domain-name $DOMAIN_NAME" echo "" echo "When deletion is complete, you'll see a 'Domain not found' error." else echo "Resources will NOT be deleted automatically." echo "" echo "To delete the domain later, use:" echo "aws opensearch delete-domain --domain-name $DOMAIN_NAME" echo "" echo "⚠ IMPORTANT: Keeping these resources will incur ongoing AWS charges!" echo " Estimated cost: ~$0.038/hour (~$0.91/day)" fi # Clean up temporary files echo "Cleaning up temporary files..." rm -f single_movie.json bulk_movies.json # Disable the trap since we're handling cleanup manually trap - EXIT echo "" echo "===========================================" echo "SCRIPT COMPLETED SUCCESSFULLY" echo "===========================================" echo "Script completed at $(date)" echo "All output has been logged to: $LOG_FILE" echo "" echo "Next steps:" echo "1. Access OpenSearch Dashboards at: https://${DOMAIN_ENDPOINT}/_dashboards/" echo "2. Create visualizations and dashboards" echo "3. Explore the OpenSearch API" echo "4. Remember to delete resources when done to avoid charges"-
For API details, see the following topics in AWS CLI Command Reference.
-
Scenarios
EventBridge