AWS SDK 또는 CLI와 함께 SelectObjectContent 사용 - Amazon Simple Storage Service

AWS SDK 또는 CLI와 함께 SelectObjectContent 사용

다음 코드 예제는 SelectObjectContent의 사용 방법을 보여 줍니다.

CLI
AWS CLI

SQL 문을 기반으로 Amazon S3 객체의 콘텐츠를 필터링하는 방법

다음 select-object-content 예시에서는 지정된 SQL my-data-file.csv 문으로 객체를 필터링하고 출력을 파일로 보냅니다.

aws s3api select-object-content \ --bucket my-bucket \ --key my-data-file.csv \ --expression "select * from s3object limit 100" \ --expression-type 'SQL' \ --input-serialization '{"CSV": {}, "CompressionType": "NONE"}' \ --output-serialization '{"CSV": {}}' "output.csv"

이 명령은 출력을 생성하지 않습니다.

Java
SDK for Java 2.x
참고

GitHub에 더 많은 내용이 있습니다. AWS코드 예시 리포지토리에서 전체 예시를 찾고 설정 및 실행하는 방법을 배워보세요.

다음 예시에서는 JSON 객체를 사용한 쿼리를 보여줍니다. 전체 예시는 CSV 객체의 사용도 보여줍니다.

import org.slf4j.Logger; import org.slf4j.LoggerFactory; import software.amazon.awssdk.core.async.AsyncRequestBody; import software.amazon.awssdk.core.async.BlockingInputStreamAsyncRequestBody; import software.amazon.awssdk.core.exception.SdkException; import software.amazon.awssdk.services.s3.S3AsyncClient; import software.amazon.awssdk.services.s3.model.CSVInput; import software.amazon.awssdk.services.s3.model.CSVOutput; import software.amazon.awssdk.services.s3.model.CompressionType; import software.amazon.awssdk.services.s3.model.ExpressionType; import software.amazon.awssdk.services.s3.model.FileHeaderInfo; import software.amazon.awssdk.services.s3.model.InputSerialization; import software.amazon.awssdk.services.s3.model.JSONInput; import software.amazon.awssdk.services.s3.model.JSONOutput; import software.amazon.awssdk.services.s3.model.JSONType; import software.amazon.awssdk.services.s3.model.ObjectIdentifier; import software.amazon.awssdk.services.s3.model.OutputSerialization; import software.amazon.awssdk.services.s3.model.Progress; import software.amazon.awssdk.services.s3.model.PutObjectResponse; import software.amazon.awssdk.services.s3.model.SelectObjectContentRequest; import software.amazon.awssdk.services.s3.model.SelectObjectContentResponseHandler; import software.amazon.awssdk.services.s3.model.Stats; import java.io.IOException; import java.net.URL; import java.util.ArrayList; import java.util.List; import java.util.UUID; import java.util.concurrent.CompletableFuture; public class SelectObjectContentExample { static final Logger logger = LoggerFactory.getLogger(SelectObjectContentExample.class); static final String BUCKET_NAME = "select-object-content-" + UUID.randomUUID(); static final S3AsyncClient s3AsyncClient = S3AsyncClient.create(); static String FILE_CSV = "csv"; static String FILE_JSON = "json"; static String URL_CSV = "https://raw.githubusercontent.com/mledoze/countries/master/dist/countries.csv"; static String URL_JSON = "https://raw.githubusercontent.com/mledoze/countries/master/dist/countries.json"; public static void main(String[] args) { SelectObjectContentExample selectObjectContentExample = new SelectObjectContentExample(); try { SelectObjectContentExample.setUp(); selectObjectContentExample.runSelectObjectContentMethodForJSON(); selectObjectContentExample.runSelectObjectContentMethodForCSV(); } catch (SdkException e) { logger.error(e.getMessage(), e); System.exit(1); } finally { SelectObjectContentExample.tearDown(); } } EventStreamInfo runSelectObjectContentMethodForJSON() { // Set up request parameters. final String queryExpression = "select * from s3object[*][*] c where c.area < 350000"; final String fileType = FILE_JSON; InputSerialization inputSerialization = InputSerialization.builder() .json(JSONInput.builder().type(JSONType.DOCUMENT).build()) .compressionType(CompressionType.NONE) .build(); OutputSerialization outputSerialization = OutputSerialization.builder() .json(JSONOutput.builder().recordDelimiter(null).build()) .build(); // Build the SelectObjectContentRequest. SelectObjectContentRequest select = SelectObjectContentRequest.builder() .bucket(BUCKET_NAME) .key(FILE_JSON) .expression(queryExpression) .expressionType(ExpressionType.SQL) .inputSerialization(inputSerialization) .outputSerialization(outputSerialization) .build(); EventStreamInfo eventStreamInfo = new EventStreamInfo(); // Call the selectObjectContent method with the request and a response handler. // Supply an EventStreamInfo object to the response handler to gather records and information from the response. s3AsyncClient.selectObjectContent(select, buildResponseHandler(eventStreamInfo)).join(); // Log out information gathered while processing the response stream. long recordCount = eventStreamInfo.getRecords().stream().mapToInt(record -> record.split("\n").length ).sum(); logger.info("Total records {}: {}", fileType, recordCount); logger.info("Visitor onRecords for fileType {} called {} times", fileType, eventStreamInfo.getCountOnRecordsCalled()); logger.info("Visitor onStats for fileType {}, {}", fileType, eventStreamInfo.getStats()); logger.info("Visitor onContinuations for fileType {}, {}", fileType, eventStreamInfo.getCountContinuationEvents()); return eventStreamInfo; } static SelectObjectContentResponseHandler buildResponseHandler(EventStreamInfo eventStreamInfo) { // Use a Visitor to process the response stream. This visitor logs information and gathers details while processing. final SelectObjectContentResponseHandler.Visitor visitor = SelectObjectContentResponseHandler.Visitor.builder() .onRecords(r -> { logger.info("Record event received."); eventStreamInfo.addRecord(r.payload().asUtf8String()); eventStreamInfo.incrementOnRecordsCalled(); }) .onCont(ce -> { logger.info("Continuation event received."); eventStreamInfo.incrementContinuationEvents(); }) .onProgress(pe -> { Progress progress = pe.details(); logger.info("Progress event received:\n bytesScanned:{}\nbytesProcessed: {}\nbytesReturned:{}", progress.bytesScanned(), progress.bytesProcessed(), progress.bytesReturned()); }) .onEnd(ee -> logger.info("End event received.")) .onStats(se -> { logger.info("Stats event received."); eventStreamInfo.addStats(se.details()); }) .build(); // Build the SelectObjectContentResponseHandler with the visitor that processes the stream. return SelectObjectContentResponseHandler.builder() .subscriber(visitor).build(); } // The EventStreamInfo class is used to store information gathered while processing the response stream. static class EventStreamInfo { private final List<String> records = new ArrayList<>(); private Integer countOnRecordsCalled = 0; private Integer countContinuationEvents = 0; private Stats stats; void incrementOnRecordsCalled() { countOnRecordsCalled++; } void incrementContinuationEvents() { countContinuationEvents++; } void addRecord(String record) { records.add(record); } void addStats(Stats stats) { this.stats = stats; } public List<String> getRecords() { return records; } public Integer getCountOnRecordsCalled() { return countOnRecordsCalled; } public Integer getCountContinuationEvents() { return countContinuationEvents; } public Stats getStats() { return stats; } }
  • API 세부 정보는 AWS SDK for Java 2.x API 참조의 SelectObjectContent를 참조하세요.

AWS SDK 개발자 가이드 및 코드 예시의 전체 목록은 AWS SDK와 함께 이 서비스 사용 단원을 참조하세요. 이 주제에는 시작하기에 대한 정보와 이전 SDK 버전에 대한 세부 정보도 포함되어 있습니다.