There are more AWS SDK examples available in the AWS Doc SDK Examples
Getting started with Amazon Athena
The following code example shows how to:
Create an S3 bucket for query results
Create a database
Create a table
Run a query
Create and use named queries
Clean up resources
- Bash
-
- AWS CLI with Bash script
-
Note
There's more on GitHub. Find the complete example and learn how to set up and run in the Sample developer tutorials
repository. #!/bin/bash # Amazon Athena Getting Started Script # This script demonstrates how to use Amazon Athena with AWS CLI # It creates a database, table, runs queries, and manages named queries # Set up logging LOG_FILE="athena-tutorial.log" exec > >(tee -a "$LOG_FILE") 2>&1 echo "Starting Amazon Athena Getting Started Tutorial..." echo "Logging to $LOG_FILE" # Function to handle errors handle_error() { echo "ERROR: $1" echo "Resources created:" if [ -n "$NAMED_QUERY_ID" ]; then echo "- Named Query: $NAMED_QUERY_ID" fi if [ -n "$DATABASE_NAME" ]; then echo "- Database: $DATABASE_NAME" if [ -n "$TABLE_NAME" ]; then echo "- Table: $TABLE_NAME in $DATABASE_NAME" fi fi if [ -n "$S3_BUCKET" ]; then echo "- S3 Bucket: $S3_BUCKET" fi echo "Exiting..." exit 1 } # Generate a random identifier for S3 bucket RANDOM_ID=$(openssl rand -hex 6) S3_BUCKET="athena-${RANDOM_ID}" DATABASE_NAME="mydatabase" TABLE_NAME="cloudfront_logs" # Get the current AWS region AWS_REGION=$(aws configure get region) if [ -z "$AWS_REGION" ]; then AWS_REGION="us-east-1" echo "No AWS region found in configuration, defaulting to $AWS_REGION" fi echo "Using AWS Region: $AWS_REGION" # Create S3 bucket for Athena query results echo "Creating S3 bucket for Athena query results: $S3_BUCKET" CREATE_BUCKET_RESULT=$(aws s3 mb "s3://$S3_BUCKET" 2>&1) if echo "$CREATE_BUCKET_RESULT" | grep -i "error"; then handle_error "Failed to create S3 bucket: $CREATE_BUCKET_RESULT" fi echo "$CREATE_BUCKET_RESULT" # Step 1: Create a database echo "Step 1: Creating Athena database: $DATABASE_NAME" CREATE_DB_RESULT=$(aws athena start-query-execution \ --query-string "CREATE DATABASE IF NOT EXISTS $DATABASE_NAME" \ --result-configuration "OutputLocation=s3://$S3_BUCKET/output/" 2>&1) if echo "$CREATE_DB_RESULT" | grep -i "error"; then handle_error "Failed to create database: $CREATE_DB_RESULT" fi QUERY_ID=$(echo "$CREATE_DB_RESULT" | grep -o '"QueryExecutionId": "[^"]*' | cut -d'"' -f4) echo "Database creation query ID: $QUERY_ID" # Wait for database creation to complete echo "Waiting for database creation to complete..." while true; do QUERY_STATUS=$(aws athena get-query-execution --query-execution-id "$QUERY_ID" --query "QueryExecution.Status.State" --output text 2>&1) if [ "$QUERY_STATUS" = "SUCCEEDED" ]; then echo "Database creation completed successfully." break elif [ "$QUERY_STATUS" = "FAILED" ] || [ "$QUERY_STATUS" = "CANCELLED" ]; then handle_error "Database creation failed with status: $QUERY_STATUS" fi echo "Database creation in progress, status: $QUERY_STATUS" sleep 2 done # Verify the database was created echo "Verifying database creation..." LIST_DB_RESULT=$(aws athena list-databases --catalog-name AwsDataCatalog 2>&1) if echo "$LIST_DB_RESULT" | grep -i "error"; then handle_error "Failed to list databases: $LIST_DB_RESULT" fi echo "$LIST_DB_RESULT" # Step 2: Create a table echo "Step 2: Creating Athena table: $TABLE_NAME" # Replace the region placeholder in the S3 location CREATE_TABLE_QUERY="CREATE EXTERNAL TABLE IF NOT EXISTS $DATABASE_NAME.$TABLE_NAME ( \`Date\` DATE, Time STRING, Location STRING, Bytes INT, RequestIP STRING, Method STRING, Host STRING, Uri STRING, Status INT, Referrer STRING, os STRING, Browser STRING, BrowserVersion STRING ) ROW FORMAT SERDE 'org.apache.hadoop.hive.serde2.RegexSerDe' WITH SERDEPROPERTIES ( \"input.regex\" = \"^(?!#)([^ ]+)\\\\s+([^ ]+)\\\\s+([^ ]+)\\\\s+([^ ]+)\\\\s+([^ ]+)\\\\s+([^ ]+)\\\\s+([^ ]+)\\\\s+([^ ]+)\\\\s+([^ ]+)\\\\s+([^ ]+)\\\\s+[^\\\\(]+[\\\\(]([^\\\\;]+).*\\\\%20([^\\\\/]+)[\\\\/](.*)$\" ) LOCATION 's3://athena-examples-us-east-1/cloudfront/plaintext/';" CREATE_TABLE_RESULT=$(aws athena start-query-execution \ --query-string "$CREATE_TABLE_QUERY" \ --result-configuration "OutputLocation=s3://$S3_BUCKET/output/" 2>&1) if echo "$CREATE_TABLE_RESULT" | grep -i "error"; then handle_error "Failed to create table: $CREATE_TABLE_RESULT" fi QUERY_ID=$(echo "$CREATE_TABLE_RESULT" | grep -o '"QueryExecutionId": "[^"]*' | cut -d'"' -f4) echo "Table creation query ID: $QUERY_ID" # Wait for table creation to complete echo "Waiting for table creation to complete..." while true; do QUERY_STATUS=$(aws athena get-query-execution --query-execution-id "$QUERY_ID" --query "QueryExecution.Status.State" --output text 2>&1) if [ "$QUERY_STATUS" = "SUCCEEDED" ]; then echo "Table creation completed successfully." break elif [ "$QUERY_STATUS" = "FAILED" ] || [ "$QUERY_STATUS" = "CANCELLED" ]; then handle_error "Table creation failed with status: $QUERY_STATUS" fi echo "Table creation in progress, status: $QUERY_STATUS" sleep 2 done # Verify the table was created echo "Verifying table creation..." LIST_TABLE_RESULT=$(aws athena list-table-metadata \ --catalog-name AwsDataCatalog \ --database-name "$DATABASE_NAME" 2>&1) if echo "$LIST_TABLE_RESULT" | grep -i "error"; then handle_error "Failed to list tables: $LIST_TABLE_RESULT" fi echo "$LIST_TABLE_RESULT" # Step 3: Query data echo "Step 3: Running a query on the table..." QUERY="SELECT os, COUNT(*) count FROM $DATABASE_NAME.$TABLE_NAME WHERE date BETWEEN date '2014-07-05' AND date '2014-08-05' GROUP BY os" QUERY_RESULT=$(aws athena start-query-execution \ --query-string "$QUERY" \ --result-configuration "OutputLocation=s3://$S3_BUCKET/output/" 2>&1) if echo "$QUERY_RESULT" | grep -i "error"; then handle_error "Failed to run query: $QUERY_RESULT" fi QUERY_ID=$(echo "$QUERY_RESULT" | grep -o '"QueryExecutionId": "[^"]*' | cut -d'"' -f4) echo "Query execution ID: $QUERY_ID" # Wait for query to complete echo "Waiting for query to complete..." while true; do QUERY_STATUS=$(aws athena get-query-execution --query-execution-id "$QUERY_ID" --query "QueryExecution.Status.State" --output text 2>&1) if [ "$QUERY_STATUS" = "SUCCEEDED" ]; then echo "Query completed successfully." break elif [ "$QUERY_STATUS" = "FAILED" ] || [ "$QUERY_STATUS" = "CANCELLED" ]; then handle_error "Query failed with status: $QUERY_STATUS" fi echo "Query in progress, status: $QUERY_STATUS" sleep 2 done # Get query results echo "Getting query results..." RESULTS=$(aws athena get-query-results --query-execution-id "$QUERY_ID" 2>&1) if echo "$RESULTS" | grep -i "error"; then handle_error "Failed to get query results: $RESULTS" fi echo "$RESULTS" # Download results from S3 echo "Downloading query results from S3..." S3_PATH=$(aws athena get-query-execution --query-execution-id "$QUERY_ID" --query "QueryExecution.ResultConfiguration.OutputLocation" --output text 2>&1) if echo "$S3_PATH" | grep -i "error"; then handle_error "Failed to get S3 path for results: $S3_PATH" fi DOWNLOAD_RESULT=$(aws s3 cp "$S3_PATH" "./query-results.csv" 2>&1) if echo "$DOWNLOAD_RESULT" | grep -i "error"; then handle_error "Failed to download query results: $DOWNLOAD_RESULT" fi echo "Query results downloaded to query-results.csv" # Step 4: Create a named query echo "Step 4: Creating a named query..." NAMED_QUERY_RESULT=$(aws athena create-named-query \ --name "OS Count Query" \ --description "Count of operating systems in CloudFront logs" \ --database "$DATABASE_NAME" \ --query-string "$QUERY" 2>&1) if echo "$NAMED_QUERY_RESULT" | grep -i "error"; then handle_error "Failed to create named query: $NAMED_QUERY_RESULT" fi NAMED_QUERY_ID=$(echo "$NAMED_QUERY_RESULT" | grep -o '"NamedQueryId": "[^"]*' | cut -d'"' -f4) echo "Named query created with ID: $NAMED_QUERY_ID" # List named queries echo "Listing named queries..." LIST_QUERIES_RESULT=$(aws athena list-named-queries 2>&1) if echo "$LIST_QUERIES_RESULT" | grep -i "error"; then handle_error "Failed to list named queries: $LIST_QUERIES_RESULT" fi echo "$LIST_QUERIES_RESULT" # Get the named query details echo "Getting named query details..." GET_QUERY_RESULT=$(aws athena get-named-query --named-query-id "$NAMED_QUERY_ID" 2>&1) if echo "$GET_QUERY_RESULT" | grep -i "error"; then handle_error "Failed to get named query: $GET_QUERY_RESULT" fi echo "$GET_QUERY_RESULT" # Execute the named query echo "Executing the named query..." QUERY_STRING=$(aws athena get-named-query --named-query-id "$NAMED_QUERY_ID" --query "NamedQuery.QueryString" --output text 2>&1) if echo "$QUERY_STRING" | grep -i "error"; then handle_error "Failed to get query string: $QUERY_STRING" fi EXEC_RESULT=$(aws athena start-query-execution \ --query-string "$QUERY_STRING" \ --result-configuration "OutputLocation=s3://$S3_BUCKET/output/" 2>&1) if echo "$EXEC_RESULT" | grep -i "error"; then handle_error "Failed to execute named query: $EXEC_RESULT" fi QUERY_ID=$(echo "$EXEC_RESULT" | grep -o '"QueryExecutionId": "[^"]*' | cut -d'"' -f4) echo "Named query execution ID: $QUERY_ID" # Wait for named query to complete echo "Waiting for named query execution to complete..." while true; do QUERY_STATUS=$(aws athena get-query-execution --query-execution-id "$QUERY_ID" --query "QueryExecution.Status.State" --output text 2>&1) if [ "$QUERY_STATUS" = "SUCCEEDED" ]; then echo "Named query execution completed successfully." break elif [ "$QUERY_STATUS" = "FAILED" ] || [ "$QUERY_STATUS" = "CANCELLED" ]; then handle_error "Named query execution failed with status: $QUERY_STATUS" fi echo "Named query execution in progress, status: $QUERY_STATUS" sleep 2 done # Summary of resources created echo "" echo "===========================================" echo "RESOURCES CREATED" echo "===========================================" echo "- S3 Bucket: $S3_BUCKET" echo "- Database: $DATABASE_NAME" echo "- Table: $TABLE_NAME" echo "- Named Query: $NAMED_QUERY_ID" echo "- Query results saved to: query-results.csv" echo "===========================================" # Prompt for cleanup echo "" echo "===========================================" echo "CLEANUP CONFIRMATION" echo "===========================================" echo "Do you want to clean up all created resources? (y/n): " read -r CLEANUP_CHOICE if [[ "$CLEANUP_CHOICE" =~ ^[Yy]$ ]]; then echo "Starting cleanup..." # Delete named query echo "Deleting named query: $NAMED_QUERY_ID" DELETE_QUERY_RESULT=$(aws athena delete-named-query --named-query-id "$NAMED_QUERY_ID" 2>&1) if echo "$DELETE_QUERY_RESULT" | grep -i "error"; then echo "Warning: Failed to delete named query: $DELETE_QUERY_RESULT" else echo "Named query deleted successfully." fi # Drop table echo "Dropping table: $TABLE_NAME" DROP_TABLE_RESULT=$(aws athena start-query-execution \ --query-string "DROP TABLE IF EXISTS $DATABASE_NAME.$TABLE_NAME" \ --result-configuration "OutputLocation=s3://$S3_BUCKET/output/" 2>&1) if echo "$DROP_TABLE_RESULT" | grep -i "error"; then echo "Warning: Failed to drop table: $DROP_TABLE_RESULT" else QUERY_ID=$(echo "$DROP_TABLE_RESULT" | grep -o '"QueryExecutionId": "[^"]*' | cut -d'"' -f4) echo "Waiting for table deletion to complete..." while true; do QUERY_STATUS=$(aws athena get-query-execution --query-execution-id "$QUERY_ID" --query "QueryExecution.Status.State" --output text 2>&1) if [ "$QUERY_STATUS" = "SUCCEEDED" ]; then echo "Table dropped successfully." break elif [ "$QUERY_STATUS" = "FAILED" ] || [ "$QUERY_STATUS" = "CANCELLED" ]; then echo "Warning: Table deletion failed with status: $QUERY_STATUS" break fi echo "Table deletion in progress, status: $QUERY_STATUS" sleep 2 done fi # Drop database echo "Dropping database: $DATABASE_NAME" DROP_DB_RESULT=$(aws athena start-query-execution \ --query-string "DROP DATABASE IF EXISTS $DATABASE_NAME" \ --result-configuration "OutputLocation=s3://$S3_BUCKET/output/" 2>&1) if echo "$DROP_DB_RESULT" | grep -i "error"; then echo "Warning: Failed to drop database: $DROP_DB_RESULT" else QUERY_ID=$(echo "$DROP_DB_RESULT" | grep -o '"QueryExecutionId": "[^"]*' | cut -d'"' -f4) echo "Waiting for database deletion to complete..." while true; do QUERY_STATUS=$(aws athena get-query-execution --query-execution-id "$QUERY_ID" --query "QueryExecution.Status.State" --output text 2>&1) if [ "$QUERY_STATUS" = "SUCCEEDED" ]; then echo "Database dropped successfully." break elif [ "$QUERY_STATUS" = "FAILED" ] || [ "$QUERY_STATUS" = "CANCELLED" ]; then echo "Warning: Database deletion failed with status: $QUERY_STATUS" break fi echo "Database deletion in progress, status: $QUERY_STATUS" sleep 2 done fi # Empty and delete S3 bucket echo "Emptying S3 bucket: $S3_BUCKET" EMPTY_BUCKET_RESULT=$(aws s3 rm "s3://$S3_BUCKET" --recursive 2>&1) if echo "$EMPTY_BUCKET_RESULT" | grep -i "error"; then echo "Warning: Failed to empty S3 bucket: $EMPTY_BUCKET_RESULT" else echo "S3 bucket emptied successfully." fi echo "Deleting S3 bucket: $S3_BUCKET" DELETE_BUCKET_RESULT=$(aws s3 rb "s3://$S3_BUCKET" 2>&1) if echo "$DELETE_BUCKET_RESULT" | grep -i "error"; then echo "Warning: Failed to delete S3 bucket: $DELETE_BUCKET_RESULT" else echo "S3 bucket deleted successfully." fi echo "Cleanup completed." else echo "Cleanup skipped. Resources will remain in your AWS account." fi echo "Tutorial completed successfully!"-
For API details, see the following topics in AWS CLI Command Reference.
-
Get started with tags
Getting started with Amazon EMR