Thursday, August 4, 2022

Amazon Rekognition OCR Example Apps


Our project structure we'll be like this:

|-- config
|   |-- __init__.py
|   |-- aws_config.py
|-- images
|   |-- aircraft.png
|   |-- challenging.png
|   |-- park.png
|   |-- street_signs.png
|-- amazon_ocr.py

edit the aws_config.py and add your own AWS access key, secret key and regions.

# define our AWS Access Key, Secret Key, and Region

ACCESS_KEY = "YOUR_ACCESS_KEY"

SECRET_KEY = "YOUR_SECRET_KEY"

REGION = "YOUR_AWS_REGION"

then

# import the necessary packages
from config import aws_config as config
import argparse
import boto3
import cv2

def draw_ocr_results(image, text, poly, color=(0, 255, 0)):
   # unpack the bounding box, taking care to scale the coordinates
   # relative to the input image size
   (h, w) = image.shape[:2]
   tlX = int(poly[0]["X"] * w)
   tlY = int(poly[0]["Y"] * h)
   trX = int(poly[1]["X"] * w)
   trY = int(poly[1]["Y"] * h)
   brX = int(poly[2]["X"] * w)
   brY = int(poly[2]["Y"] * h)
   blX = int(poly[3]["X"] * w)
   blY = int(poly[3]["Y"] * h)

# build a list of points and use it to construct each vertex
   # of the bounding box
   pts = ((tlX, tlY), (trX, trY), (brX, brY), (blX, blY))
   topLeft = pts[0]
   topRight = pts[1]
   bottomRight = pts[2]
   bottomLeft = pts[3]

   # draw the bounding box of the detected text
   cv2.line(image, topLeft, topRight, color, 2)
   cv2.line(image, topRight, bottomRight, color, 2)
   cv2.line(image, bottomRight, bottomLeft, color, 2)
   cv2.line(image, bottomLeft, topLeft, color, 2)

   # draw the text itself
   cv2.putText(image, text, (topLeft[0], topLeft[1] - 10),
      cv2.FONT_HERSHEY_SIMPLEX, 0.8, color, 2)

   # return the output image
   return image

   # construct the argument parser and parse the arguments
	ap = argparse.ArgumentParser()
	ap.add_argument("-i", "--image", required=True,
	   help="path to input image that we'll submit to AWS Rekognition")
	ap.add_argument("-t", "--type", type=str, default="line",
	   choices=["line", "word"],
	   help="output text type (either 'line' or 'word')")
	args = vars(ap.parse_args())

	# connect to AWS so we can use the Amazon Rekognition OCR API
	client = boto3.client(
	   "rekognition",
	   aws_access_key_id=config.ACCESS_KEY,
	   aws_secret_access_key=config.SECRET_KEY,
	   region_name=config.REGION)

	# load the input image as a raw binary file and make a request to
	# the Amazon Rekognition OCR API
	print("[INFO] making request to AWS Rekognition API...")
	image = open(args["image"], "rb").read()
	response = client.detect_text(Image={"Bytes": image})

	# grab the text detection results from the API and load the input
	# image again, this time in OpenCV format
	detections = response["TextDetections"]
	image = cv2.imread(args["image"])

	# make a copy of the input image for final output
	final = image.copy()


# loop over the text detection bounding boxes
for detection in detections:
   # extract the OCR'd text, text type, and bounding box coordinates
   text = detection["DetectedText"]
   textType = detection["Type"]
   poly = detection["Geometry"]["Polygon"]

   # only draw show the output of the OCR process if we are looking
   # at the correct text type
   if args["type"] == textType.lower():
      # draw the output OCR line-by-line
      output = image.copy()
      output = draw_ocr_results(output, text, poly)
      final = draw_ocr_results(final, text, poly)

      # show the output OCR'd line
      print(text)
      cv2.imshow("Output", output)
      cv2.waitKey(0)

# show the final output image
cv2.imshow("Final Output", final)
cv2.waitKey(0)

To use it, we can type:

$ python amazon_ocr.py --image images/aircraft.png
[INFO] making request to AWS Rekognition API...
WARNING!
LOW FLYING AND DEPARTING AIRCRAFT
BLAST CAN CAUSE PHYSICAL INJURY

Happy coding!

0 comments:

Post a Comment