Optical Character Recognition (OCR) brings powerful capabilities to mobile applications, enabling text extraction from images and documents. In this guide, we demonstrate how to integrate OCR into iOS using the SwiftyTesseract SDK combined with modern image processing techniques for optimal accuracy.

Requirements

  • iOS 13.0 or later (full Apple Silicon support)
  • Xcode 14.0+
  • Swift 5.5+

Setup

Add SwiftyTesseract to your project using Swift Package Manager:

// Package.swift
dependencies: [
    .package(url: "https://github.com/SwiftyTesseract/SwiftyTesseract.git",
             .upToNextMajor(from: "4.0.1"))
]

Core OCR implementation

Create an OCR processor that leverages the SwiftyTesseract SDK for text extraction:

import Vision
import SwiftyTesseract
import UIKit

class OCRProcessor {
    private let tesseract: Tesseract

    init() throws {
        tesseract = try Tesseract(language: .english)
    }

    func extractText(from image: UIImage) async throws -> String {
        return try await tesseract.performOCR(on: image)
    }

    func preprocessImage(_ image: UIImage) throws -> UIImage {
        guard let cgImage = image.cgImage else {
            throw OCRError.processingFailed
        }
        let ciImage = CIImage(cgImage: cgImage)
        let context = CIContext()
        guard let filter = CIFilter(name: "CIColorControls") else {
            throw OCRError.processingFailed
        }
        filter.setValue(ciImage, forKey: kCIInputImageKey)
        filter.setValue(1.1, forKey: kCIInputContrastKey)
        filter.setValue(0.0, forKey: kCIInputBrightnessKey)
        filter.setValue(0.0, forKey: kCIInputSaturationKey)
        guard let outputImage = filter.outputImage,
              let processedCgImage = context.createCGImage(outputImage, from: outputImage.extent)
        else {
            throw OCRError.processingFailed
        }
        return UIImage(cgImage: processedCgImage)
    }

    enum OCRError: Error {
        case processingFailed
    }
}

Camera integration

Integrate camera capture to process live images for OCR. This example shows how to set up the camera and process video frames:

import AVFoundation
import UIKit

class CameraViewController: UIViewController {
    private let captureSession = AVCaptureSession()
    private let processor: OCRProcessor

    init() throws {
        self.processor = try OCRProcessor()
        super.init(nibName: nil, bundle: nil)
    }

    required init?(coder: NSCoder) {
        fatalError("init(coder:) has not been implemented")
    }

    override func viewDidLoad() {
        super.viewDidLoad()
        Task {
            await setupCamera()
        }
    }

    private func setupCamera() async {
        guard let device = AVCaptureDevice.default(.builtInWideAngleCamera,
                                                    for: .video,
                                                    position: .back) else { return }
        do {
            let input = try AVCaptureDeviceInput(device: device)
            captureSession.addInput(input)

            let output = AVCaptureVideoDataOutput()
            output.setSampleBufferDelegate(self, queue: DispatchQueue(label: "com.app.camera"))
            captureSession.addOutput(output)

            captureSession.startRunning()
        } catch {
            print("Camera setup failed: \(error.localizedDescription)")
        }
    }
}

extension CameraViewController: AVCaptureVideoDataOutputSampleBufferDelegate {
    func captureOutput(_ output: AVCaptureOutput,
                       didOutput sampleBuffer: CMSampleBuffer,
                       from connection: AVCaptureConnection) {
        guard let imageBuffer = CMSampleBufferGetImageBuffer(sampleBuffer) else { return }
        let ciImage = CIImage(cvPixelBuffer: imageBuffer)
        let context = CIContext()
        guard let cgImage = context.createCGImage(ciImage, from: ciImage.extent) else { return }
        let image = UIImage(cgImage: cgImage)
        Task {
            do {
                let processedImage = try processor.preprocessImage(image)
                let text = try await processor.extractText(from: processedImage)
                print("Detected text: \(text)")
            } catch {
                print("OCR processing failed: \(error)")
            }
        }
    }
}

Results display

Display OCR results with highlighted links using a custom text view:

import UIKit

class ResultsViewController: UIViewController {
    private let textView: UITextView = {
        let tv = UITextView()
        tv.isEditable = false
        tv.font = .monospacedSystemFont(ofSize: 16, weight: .regular)
        return tv
    }()

    func updateResults(with text: String) {
        let attributedString = NSMutableAttributedString(string: text)
        if let detector = try? NSDataDetector(types: NSTextCheckingResult.CheckingType.link.rawValue) {
            detector.enumerateMatches(in: text, options: [], range: NSRange(location: 0, length: text.utf16.count)) { match, _, _ in
                if let match = match {
                    attributedString.addAttribute(.foregroundColor, value: UIColor.systemBlue, range: match.range)
                }
            }
        }
        textView.attributedText = attributedString
    }
}

Performance optimization

Improve OCR performance by optimizing images prior to recognition:

import UIKit

extension OCRProcessor {
    func optimizeImage(_ image: UIImage) -> UIImage {
        let targetSize = CGSize(width: 1080, height: 1920)
        let renderer = UIGraphicsImageRenderer(size: targetSize)
        return renderer.image { _ in
            image.draw(in: CGRect(origin: .zero, size: targetSize))
        }
    }

    func processWithOptimization(_ image: UIImage) async throws -> String {
        let optimizedImage = optimizeImage(image)
        let preprocessedImage = try self.preprocessImage(optimizedImage)
        return try await extractText(from: preprocessedImage)
    }
}

Testing

Implement comprehensive tests using XCTest to ensure OCR and preprocessing functionalities work as expected.

import XCTest
import UIKit

class OCRTests: XCTestCase {
    var processor: OCRProcessor!

    override func setUp() throws {
        try super.setUp()
        processor = try OCRProcessor()
    }

    func testImageRecognition() async throws {
        guard let image = UIImage(named: "test_document") else {
            XCTFail("Test image not found")
            return
        }
        let result = try await processor.extractText(from: image)
        XCTAssertFalse(result.isEmpty)
    }

    func testPreprocessing() throws {
        guard let image = UIImage(named: "test_document") else {
            XCTFail("Test image not found")
            return
        }
        let processedImage = try processor.preprocessImage(image)
        XCTAssertNotNil(processedImage)
    }
}

Troubleshooting

  • Ensure that the image is clear and well-lit for accurate OCR processing.
  • Verify that your app has the necessary camera permissions.
  • Adjust preprocessing settings—such as contrast and brightness—to improve recognition quality.
  • Confirm that the SwiftyTesseract language data is correctly configured.
  • Refer to the SwiftyTesseract documentation for advanced troubleshooting tips.

For applications requiring cloud-based OCR processing or handling large document volumes, consider integrating with {{ display: 'none' }} Uppy's file upload capabilities to streamline document processing pipelines.

In summary, integrating OCR in iOS with SwiftyTesseract allows you to transform images into searchable text efficiently. Use these techniques to optimize your app's performance and enhance user experience.