Building OCR capabilities in Swift for iOS

Optical Character Recognition (OCR) brings powerful capabilities to mobile applications, enabling text extraction from images and documents. In this guide, we demonstrate how to integrate OCR into iOS using the SwiftyTesseract SDK combined with modern image processing techniques for optimal accuracy.
Requirements
- iOS 13.0 or later (full Apple Silicon support)
- Xcode 14.0+
- Swift 5.5+
Setup
Add SwiftyTesseract to your project using Swift Package Manager:
// Package.swift
dependencies: [
.package(url: "https://github.com/SwiftyTesseract/SwiftyTesseract.git",
.upToNextMajor(from: "4.0.1"))
]
Core OCR implementation
Create an OCR processor that leverages the SwiftyTesseract SDK for text extraction:
import Vision
import SwiftyTesseract
import UIKit
class OCRProcessor {
private let tesseract: Tesseract
init() throws {
tesseract = try Tesseract(language: .english)
}
func extractText(from image: UIImage) async throws -> String {
return try await tesseract.performOCR(on: image)
}
func preprocessImage(_ image: UIImage) throws -> UIImage {
guard let cgImage = image.cgImage else {
throw OCRError.processingFailed
}
let ciImage = CIImage(cgImage: cgImage)
let context = CIContext()
guard let filter = CIFilter(name: "CIColorControls") else {
throw OCRError.processingFailed
}
filter.setValue(ciImage, forKey: kCIInputImageKey)
filter.setValue(1.1, forKey: kCIInputContrastKey)
filter.setValue(0.0, forKey: kCIInputBrightnessKey)
filter.setValue(0.0, forKey: kCIInputSaturationKey)
guard let outputImage = filter.outputImage,
let processedCgImage = context.createCGImage(outputImage, from: outputImage.extent)
else {
throw OCRError.processingFailed
}
return UIImage(cgImage: processedCgImage)
}
enum OCRError: Error {
case processingFailed
}
}
Camera integration
Integrate camera capture to process live images for OCR. This example shows how to set up the camera and process video frames:
import AVFoundation
import UIKit
class CameraViewController: UIViewController {
private let captureSession = AVCaptureSession()
private let processor: OCRProcessor
init() throws {
self.processor = try OCRProcessor()
super.init(nibName: nil, bundle: nil)
}
required init?(coder: NSCoder) {
fatalError("init(coder:) has not been implemented")
}
override func viewDidLoad() {
super.viewDidLoad()
Task {
await setupCamera()
}
}
private func setupCamera() async {
guard let device = AVCaptureDevice.default(.builtInWideAngleCamera,
for: .video,
position: .back) else { return }
do {
let input = try AVCaptureDeviceInput(device: device)
captureSession.addInput(input)
let output = AVCaptureVideoDataOutput()
output.setSampleBufferDelegate(self, queue: DispatchQueue(label: "com.app.camera"))
captureSession.addOutput(output)
captureSession.startRunning()
} catch {
print("Camera setup failed: \(error.localizedDescription)")
}
}
}
extension CameraViewController: AVCaptureVideoDataOutputSampleBufferDelegate {
func captureOutput(_ output: AVCaptureOutput,
didOutput sampleBuffer: CMSampleBuffer,
from connection: AVCaptureConnection) {
guard let imageBuffer = CMSampleBufferGetImageBuffer(sampleBuffer) else { return }
let ciImage = CIImage(cvPixelBuffer: imageBuffer)
let context = CIContext()
guard let cgImage = context.createCGImage(ciImage, from: ciImage.extent) else { return }
let image = UIImage(cgImage: cgImage)
Task {
do {
let processedImage = try processor.preprocessImage(image)
let text = try await processor.extractText(from: processedImage)
print("Detected text: \(text)")
} catch {
print("OCR processing failed: \(error)")
}
}
}
}
Results display
Display OCR results with highlighted links using a custom text view:
import UIKit
class ResultsViewController: UIViewController {
private let textView: UITextView = {
let tv = UITextView()
tv.isEditable = false
tv.font = .monospacedSystemFont(ofSize: 16, weight: .regular)
return tv
}()
func updateResults(with text: String) {
let attributedString = NSMutableAttributedString(string: text)
if let detector = try? NSDataDetector(types: NSTextCheckingResult.CheckingType.link.rawValue) {
detector.enumerateMatches(in: text, options: [], range: NSRange(location: 0, length: text.utf16.count)) { match, _, _ in
if let match = match {
attributedString.addAttribute(.foregroundColor, value: UIColor.systemBlue, range: match.range)
}
}
}
textView.attributedText = attributedString
}
}
Performance optimization
Improve OCR performance by optimizing images prior to recognition:
import UIKit
extension OCRProcessor {
func optimizeImage(_ image: UIImage) -> UIImage {
let targetSize = CGSize(width: 1080, height: 1920)
let renderer = UIGraphicsImageRenderer(size: targetSize)
return renderer.image { _ in
image.draw(in: CGRect(origin: .zero, size: targetSize))
}
}
func processWithOptimization(_ image: UIImage) async throws -> String {
let optimizedImage = optimizeImage(image)
let preprocessedImage = try self.preprocessImage(optimizedImage)
return try await extractText(from: preprocessedImage)
}
}
Testing
Implement comprehensive tests using XCTest to ensure OCR and preprocessing functionalities work as expected.
import XCTest
import UIKit
class OCRTests: XCTestCase {
var processor: OCRProcessor!
override func setUp() throws {
try super.setUp()
processor = try OCRProcessor()
}
func testImageRecognition() async throws {
guard let image = UIImage(named: "test_document") else {
XCTFail("Test image not found")
return
}
let result = try await processor.extractText(from: image)
XCTAssertFalse(result.isEmpty)
}
func testPreprocessing() throws {
guard let image = UIImage(named: "test_document") else {
XCTFail("Test image not found")
return
}
let processedImage = try processor.preprocessImage(image)
XCTAssertNotNil(processedImage)
}
}
Troubleshooting
- Ensure that the image is clear and well-lit for accurate OCR processing.
- Verify that your app has the necessary camera permissions.
- Adjust preprocessing settings—such as contrast and brightness—to improve recognition quality.
- Confirm that the SwiftyTesseract language data is correctly configured.
- Refer to the SwiftyTesseract documentation for advanced troubleshooting tips.
For applications requiring cloud-based OCR processing or handling large document volumes, consider integrating with {{ display: 'none' }} Uppy's file upload capabilities to streamline document processing pipelines.
In summary, integrating OCR in iOS with SwiftyTesseract allows you to transform images into searchable text efficiently. Use these techniques to optimize your app's performance and enhance user experience.