本文属于机器翻译版本。若本译文内容与英语原文存在差异,则一律以英文原文为准。
检测存储视频中的文本
存储视频中的 Amazon Rekognition Video 文本检测是一个异步操作。要开始检测文本,请致电StartTextDetection。Amazon Rekognition Video 会将视频分析的完成状态发布到 Amazon SNS 主题。如果视频分析成功,请GetTextDetection致电获取分析结果。有关启动视频分析和获取结果的详细信息,请参阅调用 Amazon Rekognition Video 操作。
此过程扩展了使用 Java 或 Python 分析存储在亚马逊 S3 存储桶中的视频 (SDK)中的代码。它使用 Amazon SQS 队列获取视频分析请求的完成状态。
检测存储在 Amazon S3 存储桶内的视频中的文本 (SDK)
-
按照使用 Java 或 Python 分析存储在亚马逊 S3 存储桶中的视频 (SDK)中的步骤操作。
-
将以下代码添加到步骤 1 中的 VideoDetect
类。
- Java
-
//Copyright 2018 Amazon.com, Inc. or its affiliates. All Rights Reserved.
//PDX-License-Identifier: MIT-0 (For details, see https://github.com/awsdocs/amazon-rekognition-developer-guide/blob/master/LICENSE-SAMPLECODE.)
private static void StartTextDetection(String bucket, String video) throws Exception{
NotificationChannel channel= new NotificationChannel()
.withSNSTopicArn(snsTopicArn)
.withRoleArn(roleArn);
StartTextDetectionRequest req = new StartTextDetectionRequest()
.withVideo(new Video()
.withS3Object(new S3Object()
.withBucket(bucket)
.withName(video)))
.withNotificationChannel(channel);
StartTextDetectionResult startTextDetectionResult = rek.startTextDetection(req);
startJobId=startTextDetectionResult.getJobId();
}
private static void GetTextDetectionResults() throws Exception{
int maxResults=10;
String paginationToken=null;
GetTextDetectionResult textDetectionResult=null;
do{
if (textDetectionResult !=null){
paginationToken = textDetectionResult.getNextToken();
}
textDetectionResult = rek.getTextDetection(new GetTextDetectionRequest()
.withJobId(startJobId)
.withNextToken(paginationToken)
.withMaxResults(maxResults));
VideoMetadata videoMetaData=textDetectionResult.getVideoMetadata();
System.out.println("Format: " + videoMetaData.getFormat());
System.out.println("Codec: " + videoMetaData.getCodec());
System.out.println("Duration: " + videoMetaData.getDurationMillis());
System.out.println("FrameRate: " + videoMetaData.getFrameRate());
//Show text, confidence values
List<TextDetectionResult> textDetections = textDetectionResult.getTextDetections();
for (TextDetectionResult text: textDetections) {
long seconds=text.getTimestamp()/1000;
System.out.println("Sec: " + Long.toString(seconds) + " ");
TextDetection detectedText=text.getTextDetection();
System.out.println("Text Detected: " + detectedText.getDetectedText());
System.out.println("Confidence: " + detectedText.getConfidence().toString());
System.out.println("Id : " + detectedText.getId());
System.out.println("Parent Id: " + detectedText.getParentId());
System.out.println("Bounding Box" + detectedText.getGeometry().getBoundingBox().toString());
System.out.println("Type: " + detectedText.getType());
System.out.println();
}
} while (textDetectionResult !=null && textDetectionResult.getNextToken() != null);
}
在函数 main
中,将以下行:
StartLabelDetection(bucket, video);
if (GetSQSMessageSuccess()==true)
GetLabelDetectionResults();
替换为:
StartTextDetection(bucket, video);
if (GetSQSMessageSuccess()==true)
GetTextDetectionResults();
- Java V2
-
此代码取自 AWS 文档 SDK 示例 GitHub 存储库。请在此处查看完整示例。
//snippet-start:[rekognition.java2.recognize_video_text.import]
import software.amazon.awssdk.auth.credentials.ProfileCredentialsProvider;
import software.amazon.awssdk.regions.Region;
import software.amazon.awssdk.services.rekognition.RekognitionClient;
import software.amazon.awssdk.services.rekognition.model.S3Object;
import software.amazon.awssdk.services.rekognition.model.NotificationChannel;
import software.amazon.awssdk.services.rekognition.model.Video;
import software.amazon.awssdk.services.rekognition.model.StartTextDetectionRequest;
import software.amazon.awssdk.services.rekognition.model.StartTextDetectionResponse;
import software.amazon.awssdk.services.rekognition.model.RekognitionException;
import software.amazon.awssdk.services.rekognition.model.GetTextDetectionResponse;
import software.amazon.awssdk.services.rekognition.model.GetTextDetectionRequest;
import software.amazon.awssdk.services.rekognition.model.VideoMetadata;
import software.amazon.awssdk.services.rekognition.model.TextDetectionResult;
import java.util.List;
//snippet-end:[rekognition.java2.recognize_video_text.import]
/**
* Before running this Java V2 code example, set up your development environment, including your credentials.
*
* For more information, see the following documentation topic:
*
* https://docs.aws.amazon.com/sdk-for-java/latest/developer-guide/get-started.html
*/
public class DetectTextVideo {
private static String startJobId ="";
public static void main(String[] args) {
final String usage = "\n" +
"Usage: " +
" <bucket> <video> <topicArn> <roleArn>\n\n" +
"Where:\n" +
" bucket - The name of the bucket in which the video is located (for example, (for example, myBucket). \n\n"+
" video - The name of video (for example, people.mp4). \n\n" +
" topicArn - The ARN of the Amazon Simple Notification Service (Amazon SNS) topic. \n\n" +
" roleArn - The ARN of the AWS Identity and Access Management (IAM) role to use. \n\n" ;
if (args.length != 4) {
System.out.println(usage);
System.exit(1);
}
String bucket = args[0];
String video = args[1];
String topicArn = args[2];
String roleArn = args[3];
Region region = Region.US_EAST_1;
RekognitionClient rekClient = RekognitionClient.builder()
.region(region)
.credentialsProvider(ProfileCredentialsProvider.create("profile-name"))
.build();
NotificationChannel channel = NotificationChannel.builder()
.snsTopicArn(topicArn)
.roleArn(roleArn)
.build();
startTextLabels(rekClient, channel, bucket, video);
GetTextResults(rekClient);
System.out.println("This example is done!");
rekClient.close();
}
// snippet-start:[rekognition.java2.recognize_video_text.main]
public static void startTextLabels(RekognitionClient rekClient,
NotificationChannel channel,
String bucket,
String video) {
try {
S3Object s3Obj = S3Object.builder()
.bucket(bucket)
.name(video)
.build();
Video vidOb = Video.builder()
.s3Object(s3Obj)
.build();
StartTextDetectionRequest labelDetectionRequest = StartTextDetectionRequest.builder()
.jobTag("DetectingLabels")
.notificationChannel(channel)
.video(vidOb)
.build();
StartTextDetectionResponse labelDetectionResponse = rekClient.startTextDetection(labelDetectionRequest);
startJobId = labelDetectionResponse.jobId();
} catch (RekognitionException e) {
System.out.println(e.getMessage());
System.exit(1);
}
}
public static void GetTextResults(RekognitionClient rekClient) {
try {
String paginationToken=null;
GetTextDetectionResponse textDetectionResponse=null;
boolean finished = false;
String status;
int yy=0 ;
do{
if (textDetectionResponse !=null)
paginationToken = textDetectionResponse.nextToken();
GetTextDetectionRequest recognitionRequest = GetTextDetectionRequest.builder()
.jobId(startJobId)
.nextToken(paginationToken)
.maxResults(10)
.build();
// Wait until the job succeeds.
while (!finished) {
textDetectionResponse = rekClient.getTextDetection(recognitionRequest);
status = textDetectionResponse.jobStatusAsString();
if (status.compareTo("SUCCEEDED") == 0)
finished = true;
else {
System.out.println(yy + " status is: " + status);
Thread.sleep(1000);
}
yy++;
}
finished = false;
// Proceed when the job is done - otherwise VideoMetadata is null.
VideoMetadata videoMetaData=textDetectionResponse.videoMetadata();
System.out.println("Format: " + videoMetaData.format());
System.out.println("Codec: " + videoMetaData.codec());
System.out.println("Duration: " + videoMetaData.durationMillis());
System.out.println("FrameRate: " + videoMetaData.frameRate());
System.out.println("Job");
List<TextDetectionResult> labels= textDetectionResponse.textDetections();
for (TextDetectionResult detectedText: labels) {
System.out.println("Confidence: " + detectedText.textDetection().confidence().toString());
System.out.println("Id : " + detectedText.textDetection().id());
System.out.println("Parent Id: " + detectedText.textDetection().parentId());
System.out.println("Type: " + detectedText.textDetection().type());
System.out.println("Text: " + detectedText.textDetection().detectedText());
System.out.println();
}
} while (textDetectionResponse !=null && textDetectionResponse.nextToken() != null);
} catch(RekognitionException | InterruptedException e) {
System.out.println(e.getMessage());
System.exit(1);
}
}
// snippet-end:[rekognition.java2.recognize_video_text.main]
}
- Python
-
#Copyright 2019 Amazon.com, Inc. or its affiliates. All Rights Reserved.
#PDX-License-Identifier: MIT-0 (For details, see https://github.com/awsdocs/amazon-rekognition-developer-guide/blob/master/LICENSE-SAMPLECODE.)
def StartTextDetection(self):
response=self.rek.start_text_detection(Video={'S3Object': {'Bucket': self.bucket, 'Name': self.video}},
NotificationChannel={'RoleArn': self.roleArn, 'SNSTopicArn': self.snsTopicArn})
self.startJobId=response['JobId']
print('Start Job Id: ' + self.startJobId)
def GetTextDetectionResults(self):
maxResults = 10
paginationToken = ''
finished = False
while finished == False:
response = self.rek.get_text_detection(JobId=self.startJobId,
MaxResults=maxResults,
NextToken=paginationToken)
print('Codec: ' + response['VideoMetadata']['Codec'])
print('Duration: ' + str(response['VideoMetadata']['DurationMillis']))
print('Format: ' + response['VideoMetadata']['Format'])
print('Frame rate: ' + str(response['VideoMetadata']['FrameRate']))
print()
for textDetection in response['TextDetections']:
text=textDetection['TextDetection']
print("Timestamp: " + str(textDetection['Timestamp']))
print(" Text Detected: " + text['DetectedText'])
print(" Confidence: " + str(text['Confidence']))
print (" Bounding box")
print (" Top: " + str(text['Geometry']['BoundingBox']['Top']))
print (" Left: " + str(text['Geometry']['BoundingBox']['Left']))
print (" Width: " + str(text['Geometry']['BoundingBox']['Width']))
print (" Height: " + str(text['Geometry']['BoundingBox']['Height']))
print (" Type: " + str(text['Type']) )
print()
if 'NextToken' in response:
paginationToken = response['NextToken']
else:
finished = True
在函数 main
中,将以下行:
analyzer.StartLabelDetection()
if analyzer.GetSQSMessageSuccess()==True:
analyzer.GetLabelDetectionResults()
替换为:
analyzer.StartTextDetection()
if analyzer.GetSQSMessageSuccess()==True:
analyzer.GetTextDetectionResults()
- CLI
-
运行以下 AWS CLI 命令开始检测视频中的文本。
aws rekognition start-text-detection --video "{"S3Object":{"Bucket":"bucket-name","Name":"video-name"}}"\
--notification-channel "{"SNSTopicArn":"topic-arn","RoleArn":"role-arn"}" \
--region region-name --profile profile-name
更新以下值:
如果您在 Windows 设备上访问 CLI,请使用双引号代替单引号,并用反斜杠(即 \)对内部双引号进行转义,以解决可能遇到的任何解析器错误。有关示例,请参阅以下内容:
aws rekognition start-text-detection --video \
"{\"S3Object\":{\"Bucket\":\"bucket-name\",\"Name\":\"video-name\"}}" \
--notification-channel "{\"SNSTopicArn\":\"topic-arn\",\"RoleArn\":\"role-arn\"}" \
--region region-name --profile profile-name
运行后续代码示例后,复制返回的 jobID
并将其提供给以下 GetTextDetection
命令以获取结果,将 job-id-number
替换为您之前收到的 jobID
:
aws rekognition get-text-detection --job-id job-id-number --profile profile-name
-
运行该代码。在视频中检测到的文本将显示在列表中。
筛选条件
筛选器是可选的请求参数,可供您在调用 StartTextDetection
时使用。通过按文本区域、大小和置信度评分进行筛选,您可以更加灵活地控制文本检测输出。通过使用感兴趣的区域,您可以轻松将文本检测范围限制为相关的区域,例如,图片的倒数第三个区域或者用于显示足球比赛记分板的左上角。单词边界框大小筛选器可用于避免噪点或不相关的小背景文本。最后,通过单词置信度筛选器,您可以移除因模糊或脏污导致的不可靠的结果。
有关筛选值的信息,请参阅 DetectTextFilters
。
您可以使用以下筛选器:
-
MinConfidence— 设置字词检测的置信度。从结果中排除检测置信度低于此级别的单词。值应介于 0 和 100 之间。
-
MinBoundingBoxWidth— 设置单词边界框的最小宽度。将从结果中排除其边界框小于此值的单词。该值是相对于视频帧宽度的。
-
MinBoundingBoxHeight— 设置单词边框的最小高度。将从结果中排除其边界框高度小于此值的单词。该值是相对于视频帧高度的。
-
RegionsOfInterest— 将检测范围限制在帧的特定区域。这些值是相对于帧尺寸的。对于仅部分位于区域内的对象,响应是不确定的。
GetTextDetection 响应
GetTextDetection
将返回一个数组 (TextDetectionResults
),其中包含有关在视频中检测到的文本的信息。每当在视频中检测到单词或行时,都会存在一个数组元素 TextDetection。数组元素按时间(从视频开始起计时,以毫秒为单位)进行排序。
以下是来自 GetTextDetection
的部分 JSON 响应。在响应中,请注意以下内容:
-
文本信息-TextDetectionResult
数组元素包含有关检测到的文本 (TextDetection) 和视频中检测到文本的时间 (Timestamp
) 的信息。
-
分页信息 – 此示例显示一页文本检测信息。您可以在 GetTextDetection
的 MaxResults
输入参数中指定要返回的文本元素数量。如果存在的结果的数量超过了 MaxResults
或结果数超过默认的最大值,则 GetTextDetection
会返回一个令牌 (NextToken
),用于获取下一页的结果。有关更多信息,请参阅 获取 Amazon Rekognition Video 分析结果。
-
视频信息 - 此响应包含有关由 VideoMetadata
返回的每页信息中的视频格式 (GetTextDetection
) 的信息。
{
"JobStatus": "SUCCEEDED",
"VideoMetadata": {
"Codec": "h264",
"DurationMillis": 174441,
"Format": "QuickTime / MOV",
"FrameRate": 29.970029830932617,
"FrameHeight": 480,
"FrameWidth": 854
},
"TextDetections": [
{
"Timestamp": 967,
"TextDetection": {
"DetectedText": "Twinkle Twinkle Little Star",
"Type": "LINE",
"Id": 0,
"Confidence": 99.91780090332031,
"Geometry": {
"BoundingBox": {
"Width": 0.8337579369544983,
"Height": 0.08365312218666077,
"Left": 0.08313830941915512,
"Top": 0.4663468301296234
},
"Polygon": [
{
"X": 0.08313830941915512,
"Y": 0.4663468301296234
},
{
"X": 0.9168962240219116,
"Y": 0.4674469828605652
},
{
"X": 0.916861355304718,
"Y": 0.5511001348495483
},
{
"X": 0.08310343325138092,
"Y": 0.5499999523162842
}
]
}
}
},
{
"Timestamp": 967,
"TextDetection": {
"DetectedText": "Twinkle",
"Type": "WORD",
"Id": 1,
"ParentId": 0,
"Confidence": 99.98338317871094,
"Geometry": {
"BoundingBox": {
"Width": 0.2423887550830841,
"Height": 0.0833333358168602,
"Left": 0.08313817530870438,
"Top": 0.46666666865348816
},
"Polygon": [
{
"X": 0.08313817530870438,
"Y": 0.46666666865348816
},
{
"X": 0.3255269229412079,
"Y": 0.46666666865348816
},
{
"X": 0.3255269229412079,
"Y": 0.550000011920929
},
{
"X": 0.08313817530870438,
"Y": 0.550000011920929
}
]
}
}
},
{
"Timestamp": 967,
"TextDetection": {
"DetectedText": "Twinkle",
"Type": "WORD",
"Id": 2,
"ParentId": 0,
"Confidence": 99.982666015625,
"Geometry": {
"BoundingBox": {
"Width": 0.2423887550830841,
"Height": 0.08124999701976776,
"Left": 0.3454332649707794,
"Top": 0.46875
},
"Polygon": [
{
"X": 0.3454332649707794,
"Y": 0.46875
},
{
"X": 0.5878220200538635,
"Y": 0.46875
},
{
"X": 0.5878220200538635,
"Y": 0.550000011920929
},
{
"X": 0.3454332649707794,
"Y": 0.550000011920929
}
]
}
}
},
{
"Timestamp": 967,
"TextDetection": {
"DetectedText": "Little",
"Type": "WORD",
"Id": 3,
"ParentId": 0,
"Confidence": 99.8787612915039,
"Geometry": {
"BoundingBox": {
"Width": 0.16627635061740875,
"Height": 0.08124999701976776,
"Left": 0.6053864359855652,
"Top": 0.46875
},
"Polygon": [
{
"X": 0.6053864359855652,
"Y": 0.46875
},
{
"X": 0.7716627717018127,
"Y": 0.46875
},
{
"X": 0.7716627717018127,
"Y": 0.550000011920929
},
{
"X": 0.6053864359855652,
"Y": 0.550000011920929
}
]
}
}
},
{
"Timestamp": 967,
"TextDetection": {
"DetectedText": "Star",
"Type": "WORD",
"Id": 4,
"ParentId": 0,
"Confidence": 99.82640075683594,
"Geometry": {
"BoundingBox": {
"Width": 0.12997658550739288,
"Height": 0.08124999701976776,
"Left": 0.7868852615356445,
"Top": 0.46875
},
"Polygon": [
{
"X": 0.7868852615356445,
"Y": 0.46875
},
{
"X": 0.9168618321418762,
"Y": 0.46875
},
{
"X": 0.9168618321418762,
"Y": 0.550000011920929
},
{
"X": 0.7868852615356445,
"Y": 0.550000011920929
}
]
}
}
}
],
"NextToken": "NiHpGbZFnkM/S8kLcukMni15wb05iKtquu/Mwc+Qg1LVlMjjKNOD0Z0GusSPg7TONLe+OZ3P",
"TextModelVersion": "3.0"
}