AWS Glue 用SDK于 Kotlin 的示例 - AWS SDK 代码示例

AWS 文档 AWS SDK示例 GitHub 存储库中还有更多SDK示例

本文属于机器翻译版本。若本译文内容与英语原文存在差异,则一律以英文原文为准。

AWS Glue 用SDK于 Kotlin 的示例

以下代码示例向您展示了如何使用 for Kotlin 来执行操作和实现常见场景。 AWS SDK AWS Glue

基础知识是向您展示如何在服务中执行基本操作的代码示例。

操作是大型程序的代码摘录,必须在上下文中运行。您可以通过操作了解如何调用单个服务函数,还可以通过函数相关场景的上下文查看操作。

每个示例都包含一个指向完整源代码的链接,您可以在其中找到有关如何在上下文中设置和运行代码的说明。

基本功能

以下代码示例展示了如何:

  • 创建一个爬虫来抓取公有 Amazon S3 存储桶并生成格式CSV元数据的数据库。

  • 列出您的中的数据库和表的相关信息 AWS Glue Data Catalog。

  • 创建任务以从 S3 存储桶中提取CSV数据、转换数据,并将JSON格式化的输出加载到另一个 S3 存储桶中。

  • 列出有关作业运行的信息,查看转换后的数据,并清除资源。

有关更多信息,请参阅教程: AWS Glue Studio 入门

SDK对于 Kotlin 来说
注意

还有更多相关信息 GitHub。查找完整示例,学习如何在 AWS 代码示例存储库中进行设置和运行。

suspend fun main(args: Array<String>) { val usage = """ Usage: <iam> <s3Path> <cron> <dbName> <crawlerName> <jobName> <scriptLocation> <locationUri> Where: iam - The Amazon Resource Name (ARN) of the AWS Identity and Access Management (IAM) role that has AWS Glue and Amazon Simple Storage Service (Amazon S3) permissions. s3Path - The Amazon Simple Storage Service (Amazon S3) target that contains data (for example, CSV data). cron - A cron expression used to specify the schedule (for example, cron(15 12 * * ? *). dbName - The database name. crawlerName - The name of the crawler. jobName - The name you assign to this job definition. scriptLocation - Specifies the Amazon S3 path to a script that runs a job. locationUri - Specifies the location of the database """ if (args.size != 8) { println(usage) exitProcess(1) } val iam = args[0] val s3Path = args[1] val cron = args[2] val dbName = args[3] val crawlerName = args[4] val jobName = args[5] val scriptLocation = args[6] val locationUri = args[7] println("About to start the AWS Glue Scenario") createDatabase(dbName, locationUri) createCrawler(iam, s3Path, cron, dbName, crawlerName) getCrawler(crawlerName) startCrawler(crawlerName) getDatabase(dbName) getGlueTables(dbName) createJob(jobName, iam, scriptLocation) startJob(jobName) getJobs() getJobRuns(jobName) deleteJob(jobName) println("*** Wait for 5 MIN so the $crawlerName is ready to be deleted") TimeUnit.MINUTES.sleep(5) deleteMyDatabase(dbName) deleteCrawler(crawlerName) } suspend fun createDatabase( dbName: String?, locationUriVal: String?, ) { val input = DatabaseInput { description = "Built with the AWS SDK for Kotlin" name = dbName locationUri = locationUriVal } val request = CreateDatabaseRequest { databaseInput = input } GlueClient { region = "us-east-1" }.use { glueClient -> glueClient.createDatabase(request) println("The database was successfully created") } } suspend fun createCrawler( iam: String?, s3Path: String?, cron: String?, dbName: String?, crawlerName: String, ) { val s3Target = S3Target { path = s3Path } val targetList = ArrayList<S3Target>() targetList.add(s3Target) val targetOb = CrawlerTargets { s3Targets = targetList } val crawlerRequest = CreateCrawlerRequest { databaseName = dbName name = crawlerName description = "Created by the AWS Glue Java API" targets = targetOb role = iam schedule = cron } GlueClient { region = "us-east-1" }.use { glueClient -> glueClient.createCrawler(crawlerRequest) println("$crawlerName was successfully created") } } suspend fun getCrawler(crawlerName: String?) { val request = GetCrawlerRequest { name = crawlerName } GlueClient { region = "us-east-1" }.use { glueClient -> val response = glueClient.getCrawler(request) val role = response.crawler?.role println("The role associated with this crawler is $role") } } suspend fun startCrawler(crawlerName: String) { val crawlerRequest = StartCrawlerRequest { name = crawlerName } GlueClient { region = "us-east-1" }.use { glueClient -> glueClient.startCrawler(crawlerRequest) println("$crawlerName was successfully started.") } } suspend fun getDatabase(databaseName: String?) { val request = GetDatabaseRequest { name = databaseName } GlueClient { region = "us-east-1" }.use { glueClient -> val response = glueClient.getDatabase(request) val dbDesc = response.database?.description println("The database description is $dbDesc") } } suspend fun getGlueTables(dbName: String?) { val tableRequest = GetTablesRequest { databaseName = dbName } GlueClient { region = "us-east-1" }.use { glueClient -> val response = glueClient.getTables(tableRequest) response.tableList?.forEach { tableName -> println("Table name is ${tableName.name}") } } } suspend fun startJob(jobNameVal: String?) { val runRequest = StartJobRunRequest { workerType = WorkerType.G1X numberOfWorkers = 10 jobName = jobNameVal } GlueClient { region = "us-east-1" }.use { glueClient -> val response = glueClient.startJobRun(runRequest) println("The job run Id is ${response.jobRunId}") } } suspend fun createJob( jobName: String, iam: String?, scriptLocationVal: String?, ) { val commandOb = JobCommand { pythonVersion = "3" name = "MyJob1" scriptLocation = scriptLocationVal } val jobRequest = CreateJobRequest { description = "A Job created by using the AWS SDK for Java V2" glueVersion = "2.0" workerType = WorkerType.G1X numberOfWorkers = 10 name = jobName role = iam command = commandOb } GlueClient { region = "us-east-1" }.use { glueClient -> glueClient.createJob(jobRequest) println("$jobName was successfully created.") } } suspend fun getJobs() { val request = GetJobsRequest { maxResults = 10 } GlueClient { region = "us-east-1" }.use { glueClient -> val response = glueClient.getJobs(request) response.jobs?.forEach { job -> println("Job name is ${job.name}") } } } suspend fun getJobRuns(jobNameVal: String?) { val request = GetJobRunsRequest { jobName = jobNameVal } GlueClient { region = "us-east-1" }.use { glueClient -> val response = glueClient.getJobRuns(request) response.jobRuns?.forEach { job -> println("Job name is ${job.jobName}") } } } suspend fun deleteJob(jobNameVal: String) { val jobRequest = DeleteJobRequest { jobName = jobNameVal } GlueClient { region = "us-east-1" }.use { glueClient -> glueClient.deleteJob(jobRequest) println("$jobNameVal was successfully deleted") } } suspend fun deleteMyDatabase(databaseName: String) { val request = DeleteDatabaseRequest { name = databaseName } GlueClient { region = "us-east-1" }.use { glueClient -> glueClient.deleteDatabase(request) println("$databaseName was successfully deleted") } } suspend fun deleteCrawler(crawlerName: String) { val request = DeleteCrawlerRequest { name = crawlerName } GlueClient { region = "us-east-1" }.use { glueClient -> glueClient.deleteCrawler(request) println("$crawlerName was deleted") } }

操作

以下代码示例演示如何使用 CreateCrawler

SDK对于 Kotlin 来说
注意

还有更多相关信息 GitHub。查找完整示例,学习如何在 AWS 代码示例存储库中进行设置和运行。

suspend fun createGlueCrawler( iam: String?, s3Path: String?, cron: String?, dbName: String?, crawlerName: String, ) { val s3Target = S3Target { path = s3Path } // Add the S3Target to a list. val targetList = mutableListOf<S3Target>() targetList.add(s3Target) val targetOb = CrawlerTargets { s3Targets = targetList } val request = CreateCrawlerRequest { databaseName = dbName name = crawlerName description = "Created by the AWS Glue Kotlin API" targets = targetOb role = iam schedule = cron } GlueClient { region = "us-west-2" }.use { glueClient -> glueClient.createCrawler(request) println("$crawlerName was successfully created") } }
  • 有关API详细信息,请参阅CreateCrawler中的 Kotlin AWS SDK API 参考

以下代码示例演示如何使用 GetCrawler

SDK对于 Kotlin 来说
注意

还有更多相关信息 GitHub。查找完整示例,学习如何在 AWS 代码示例存储库中进行设置和运行。

suspend fun getSpecificCrawler(crawlerName: String?) { val request = GetCrawlerRequest { name = crawlerName } GlueClient { region = "us-east-1" }.use { glueClient -> val response = glueClient.getCrawler(request) val role = response.crawler?.role println("The role associated with this crawler is $role") } }
  • 有关API详细信息,请参阅GetCrawler中的 Kotlin AWS SDK API 参考

以下代码示例演示如何使用 GetDatabase

SDK对于 Kotlin 来说
注意

还有更多相关信息 GitHub。查找完整示例,学习如何在 AWS 代码示例存储库中进行设置和运行。

suspend fun getSpecificDatabase(databaseName: String?) { val request = GetDatabaseRequest { name = databaseName } GlueClient { region = "us-east-1" }.use { glueClient -> val response = glueClient.getDatabase(request) val dbDesc = response.database?.description println("The database description is $dbDesc") } }
  • 有关API详细信息,请参阅GetDatabase中的 Kotlin AWS SDK API 参考

以下代码示例演示如何使用 StartCrawler

SDK对于 Kotlin 来说
注意

还有更多相关信息 GitHub。查找完整示例,学习如何在 AWS 代码示例存储库中进行设置和运行。

suspend fun startSpecificCrawler(crawlerName: String?) { val request = StartCrawlerRequest { name = crawlerName } GlueClient { region = "us-west-2" }.use { glueClient -> glueClient.startCrawler(request) println("$crawlerName was successfully started.") } }
  • 有关API详细信息,请参阅StartCrawler中的 Kotlin AWS SDK API 参考