an AWS SDK 또는 CLISelectObjectContent와 함께 사용 - AWS SDK 코드 예제

AWS Doc SDK ExamplesWord AWS SDK 리포지토리에는 더 많은 GitHub 예제가 있습니다.

기계 번역으로 제공되는 번역입니다. 제공된 번역과 원본 영어의 내용이 상충하는 경우에는 영어 버전이 우선합니다.

an AWS SDK 또는 CLISelectObjectContent와 함께 사용

다음 코드 예제는 SelectObjectContent의 사용 방법을 보여 줍니다.

CLI
AWS CLI

SQL 문을 기반으로 Amazon S3 객체의 콘텐츠를 필터링하려면

다음 select-object-content 예제에서는 지정된 SQL 문my-data-file.csv으로 객체를 필터링하고 출력을 파일로 전송합니다.

aws s3api select-object-content \ --bucket my-bucket \ --key my-data-file.csv \ --expression "select * from s3object limit 100" \ --expression-type 'SQL' \ --input-serialization '{"CSV": {}, "CompressionType": "NONE"}' \ --output-serialization '{"CSV": {}}' "output.csv"

이 명령은 출력을 생성하지 않습니다.

Java
Java 2.x용 SDK
참고

더 많은 on GitHub가 있습니다. AWS 코드 예시 리포지토리에서 전체 예시를 찾고 설정 및 실행하는 방법을 배워보세요.

다음 예제에서는 JSON 객체를 사용한 쿼리를 보여줍니다. 전체 예제에서는 CSV 객체의 사용도 보여줍니다.

import org.slf4j.Logger; import org.slf4j.LoggerFactory; import software.amazon.awssdk.core.async.AsyncRequestBody; import software.amazon.awssdk.core.async.BlockingInputStreamAsyncRequestBody; import software.amazon.awssdk.core.exception.SdkException; import software.amazon.awssdk.services.s3.S3AsyncClient; import software.amazon.awssdk.services.s3.model.CSVInput; import software.amazon.awssdk.services.s3.model.CSVOutput; import software.amazon.awssdk.services.s3.model.CompressionType; import software.amazon.awssdk.services.s3.model.ExpressionType; import software.amazon.awssdk.services.s3.model.FileHeaderInfo; import software.amazon.awssdk.services.s3.model.InputSerialization; import software.amazon.awssdk.services.s3.model.JSONInput; import software.amazon.awssdk.services.s3.model.JSONOutput; import software.amazon.awssdk.services.s3.model.JSONType; import software.amazon.awssdk.services.s3.model.ObjectIdentifier; import software.amazon.awssdk.services.s3.model.OutputSerialization; import software.amazon.awssdk.services.s3.model.Progress; import software.amazon.awssdk.services.s3.model.PutObjectResponse; import software.amazon.awssdk.services.s3.model.SelectObjectContentRequest; import software.amazon.awssdk.services.s3.model.SelectObjectContentResponseHandler; import software.amazon.awssdk.services.s3.model.Stats; import java.io.IOException; import java.net.URL; import java.util.ArrayList; import java.util.List; import java.util.UUID; import java.util.concurrent.CompletableFuture; public class SelectObjectContentExample { static final Logger logger = LoggerFactory.getLogger(SelectObjectContentExample.class); static final String BUCKET_NAME = "amzn-s3-demo-bucket-" + UUID.randomUUID(); static final S3AsyncClient s3AsyncClient = S3AsyncClient.create(); static String FILE_CSV = "csv"; static String FILE_JSON = "json"; static String URL_CSV = "https://raw.githubusercontent.com/mledoze/countries/master/dist/countries.csv"; static String URL_JSON = "https://raw.githubusercontent.com/mledoze/countries/master/dist/countries.json"; public static void main(String[] args) { SelectObjectContentExample selectObjectContentExample = new SelectObjectContentExample(); try { SelectObjectContentExample.setUp(); selectObjectContentExample.runSelectObjectContentMethodForJSON(); selectObjectContentExample.runSelectObjectContentMethodForCSV(); } catch (SdkException e) { logger.error(e.getMessage(), e); System.exit(1); } finally { SelectObjectContentExample.tearDown(); } } EventStreamInfo runSelectObjectContentMethodForJSON() { // Set up request parameters. final String queryExpression = "select * from s3object[*][*] c where c.area < 350000"; final String fileType = FILE_JSON; InputSerialization inputSerialization = InputSerialization.builder() .json(JSONInput.builder().type(JSONType.DOCUMENT).build()) .compressionType(CompressionType.NONE) .build(); OutputSerialization outputSerialization = OutputSerialization.builder() .json(JSONOutput.builder().recordDelimiter(null).build()) .build(); // Build the SelectObjectContentRequest. SelectObjectContentRequest select = SelectObjectContentRequest.builder() .bucket(BUCKET_NAME) .key(FILE_JSON) .expression(queryExpression) .expressionType(ExpressionType.SQL) .inputSerialization(inputSerialization) .outputSerialization(outputSerialization) .build(); EventStreamInfo eventStreamInfo = new EventStreamInfo(); // Call the selectObjectContent method with the request and a response handler. // Supply an EventStreamInfo object to the response handler to gather records and information from the response. s3AsyncClient.selectObjectContent(select, buildResponseHandler(eventStreamInfo)).join(); // Log out information gathered while processing the response stream. long recordCount = eventStreamInfo.getRecords().stream().mapToInt(record -> record.split("\n").length ).sum(); logger.info("Total records {}: {}", fileType, recordCount); logger.info("Visitor onRecords for fileType {} called {} times", fileType, eventStreamInfo.getCountOnRecordsCalled()); logger.info("Visitor onStats for fileType {}, {}", fileType, eventStreamInfo.getStats()); logger.info("Visitor onContinuations for fileType {}, {}", fileType, eventStreamInfo.getCountContinuationEvents()); return eventStreamInfo; } static SelectObjectContentResponseHandler buildResponseHandler(EventStreamInfo eventStreamInfo) { // Use a Visitor to process the response stream. This visitor logs information and gathers details while processing. final SelectObjectContentResponseHandler.Visitor visitor = SelectObjectContentResponseHandler.Visitor.builder() .onRecords(r -> { logger.info("Record event received."); eventStreamInfo.addRecord(r.payload().asUtf8String()); eventStreamInfo.incrementOnRecordsCalled(); }) .onCont(ce -> { logger.info("Continuation event received."); eventStreamInfo.incrementContinuationEvents(); }) .onProgress(pe -> { Progress progress = pe.details(); logger.info("Progress event received:\n bytesScanned:{}\nbytesProcessed: {}\nbytesReturned:{}", progress.bytesScanned(), progress.bytesProcessed(), progress.bytesReturned()); }) .onEnd(ee -> logger.info("End event received.")) .onStats(se -> { logger.info("Stats event received."); eventStreamInfo.addStats(se.details()); }) .build(); // Build the SelectObjectContentResponseHandler with the visitor that processes the stream. return SelectObjectContentResponseHandler.builder() .subscriber(visitor).build(); } // The EventStreamInfo class is used to store information gathered while processing the response stream. static class EventStreamInfo { private final List<String> records = new ArrayList<>(); private Integer countOnRecordsCalled = 0; private Integer countContinuationEvents = 0; private Stats stats; void incrementOnRecordsCalled() { countOnRecordsCalled++; } void incrementContinuationEvents() { countContinuationEvents++; } void addRecord(String record) { records.add(record); } void addStats(Stats stats) { this.stats = stats; } public List<String> getRecords() { return records; } public Integer getCountOnRecordsCalled() { return countOnRecordsCalled; } public Integer getCountContinuationEvents() { return countContinuationEvents; } public Stats getStats() { return stats; } }
  • API 세부 정보는 SelectObjectContent AWS SDK for Java 2.x 참조의 API를 참조하세요.