return result import logging from functools import wraps logging.basicConfig(level=logging.INFO) logger = logging.getLogger( name )
def wait_and_download(self, file_path, output_path, poll_interval=2): """Submit and wait for completion.""" task_id = self.submit_ocr_task(file_path) while True: status = self.get_task_status(task_id) if status['state'] == 'completed': return self.download_result(task_id, output_path) elif status['state'] == 'failed': raise Exception(f"OCR failed: status.get('error', 'Unknown error')") time.sleep(poll_interval) client = FineReaderServerClient( base_url="http://localhost:8080", username="admin", password="secret" )
return output_pdf_path FineReader Server provides a REST API for distributed OCR. REST API Client import requests import base64 import json from pathlib import Path class FineReaderServerClient: def init (self, base_url, username, password): self.base_url = base_url.rstrip('/') self.session = requests.Session() self.session.auth = (username, password) abbyy finereader python
with ThreadPoolExecutor(max_workers=max_workers) as executor: list(tqdm(executor.map(process_one, image_files), total=len(image_files))) batch_ocr_cli("./scans", "./ocr_output", max_workers=2) 5. Method 2: COM Automation (Windows, Deep Control) This method gives you programmatic access to FineReader's object model. Initialize FineReader COM Object import win32com.client import pythoncom import os class FineReaderCOM: def init (self): pythoncom.CoInitialize() self.app = win32com.client.Dispatch("FineReader.Application") self.app.Visible = False # Run in background
doc.Recognize("English") doc.Export(output_pdf_path, "PDF", export_params) doc.Close() return result import logging from functools import wraps
def submit_ocr_task(self, file_path, output_format="pdf"): """Submit a file for OCR processing.""" with open(file_path, 'rb') as f: files = 'file': (Path(file_path).name, f) data = 'outputFormat': output_format, 'language': 'English', 'recognitionAccuracy': 'high', 'documentProcessingMode': 'auto' response = self.session.post( f"self.base_url/api/v1/tasks", files=files, data=data ) return response.json()['taskId']
image_files = list(input_folder.glob("*.png,jpg,jpeg,tiff,bmp")) Initialize FineReader COM Object import win32com
Args: input_path: Path to image or PDF output_path: Output file path (without extension) output_format: pdf, docx, xlsx, txt, html """ fine_cmd = r"C:\Program Files (x86)\ABBYY FineReader\FineReaderCmd.exe"
def __del__(self): self.app.Quit() pythoncom.CoUninitialize() fr = FineReaderCOM() text = fr.get_recognized_text("invoice.jpg") print(text[:500]) Zonal OCR example (extract specific invoice fields) zones = [(100, 200, 400, 230), # Invoice number (100, 300, 400, 330), # Date (500, 500, 800, 800)] # Total amount invoice_data = fr.zonal_ocr("invoice.jpg", zones) print(invoice_data) Advanced: PDF Searchable Creation def create_searchable_pdf(input_pdf_path, output_pdf_path): """Convert image-only PDF to searchable PDF/A.""" fr = FineReaderCOM() doc = fr.app.CreateDocument() # Load PDF pages doc.AddImageFile(input_pdf_path, 0)
def ocr_document(self, input_path, output_path, output_format="docx", language="English"): """OCR a single document with full control.""" # Create document object doc = self.app.CreateDocument() # Add image page page = doc.AddImageFile(input_path, 0) # 0 = auto orientation # Analyze layout doc.AnalyzeLayout() # Recognize with specific language doc.Recognize(language) # Export if output_format == "docx": doc.Export(output_path, "DOCX") elif output_format == "txt": doc.Export(output_path, "TEXT") elif output_format == "pdf": doc.Export(output_path, "PDF") # Cleanup doc.Close() return output_path