在Objective-C中从PDF中提取可编辑字段

我一直在研究在iOS应用程序中使用PDF的一段时间。 我已经想出了一些难题,比如扫描操作符并在UIWebView中显示PDF。 但是,我真正需要做的是识别PDF文档中的可编辑字段

理想情况下,我希望能够直接与田野互动,但这听起来非常困难,而不是一个明显的第一步。 我已经与一个Windows服务交互,这个服务可以以这种方式处理PDF,并且可以用来识别可编辑字段,在表单视图中从用户收集字段数据,并将这些数据发回服务器。 问题是我看不到如何识别字段。 我正在与政府颁发的PDF(例如I-9和W-4)交互,所以我不能控制PDF的创build或字段的命名。 这就是为什么我需要dynamic提取它们。 任何帮助和/或参考将不胜感激。

我使用苹果的“Quatrz二维编程指南”(https://developer.apple.com/library/mac/#documentation/graphicsimaging/conceptual/drawingwithquartz2d/dq_pdf_scan/dq_pdf_scan.html“PDF文档分析”)触发操作员callback扫描PDF时,但这不帮我find可编辑的领域。

我也简单地加载一个UIWebView的PDF数据显示给用户。

[_webView loadData:decodedData MIMEType:@"application/pdf" textEncodingName:@"utf-8" baseURL:nil]; 

更新:

我构build了PDF助手类(如下所示)来遍历目录中的所有可能的对象types。 最初我没有处理数组中的嵌套字典,所以我没有看到表单字段。 一旦我解决了这个问题,我意识到有父引用,我必须说明,以避免循环recursion调用,将启动一个无限循环。 下面的代码显示了文档目录中丰富的信息。 现在我只需要parsing它来隔离我需要的表单字段。

PDFHelper.h

 #import <Foundation/Foundation.h> id selfClass; @interface PDFHelper : NSObject @property (nonatomic, strong) NSData *pdfData; @property (nonatomic, strong) NSMutableDictionary *pdfDict; @property (nonatomic) int catalogLevel; -(NSArray *) copyPDFArray:(CGPDFArrayRef)arr referencingDictionary:(CGPDFDictionaryRef)dict referencingKey:(const char *)key; -(NSArray *) getFormFields; -(CGPDFDictionaryRef) getDocumentCatalog; @end 

PDFHelper.m

 #import "PDFHelper.h" #import "FileHelpers.h" #import "Log.h" @implementation PDFHelper @synthesize pdfData = _pdfData; @synthesize pdfDict = _pdfDict; @synthesize catalogLevel = _catalogLevel; -(id)init { self = [super init]; if(self) { selfClass = self; _pdfDict = [[NSMutableDictionary alloc] init]; _catalogLevel = 1; } return self; } -(NSArray *) getFormFields { CGPDFDictionaryRef acroForm = NULL; if (CGPDFDictionaryGetDictionary([self getPdfDocDictionary], "AcroForm", &acroForm)) CGPDFDictionaryApplyFunction(acroForm, getDictionaryObjects, acroForm); return [_pdfDict objectForKey:@"XFA"]; } -(CGPDFDictionaryRef) getDocumentCatalog { CGPDFDictionaryRef docCatalog = [self getPdfDocDictionary]; CGPDFDictionaryApplyFunction(docCatalog, getDictionaryObjects, docCatalog); return docCatalog; } -(CGPDFDictionaryRef) getPdfDocDictionary { NSURL *pdf = [[NSURL alloc] initFileURLWithPath:[FileHelpers pathInLibraryDirectory:@"file.pdf"]]; [_pdfData writeToFile:[pdf path] atomically:YES]; CGPDFDocumentRef pdfDocument = CGPDFDocumentCreateWithURL((__bridge CFURLRef)pdf); CGPDFDictionaryRef returnDict = CGPDFDocumentGetCatalog(pdfDocument); return returnDict; } void getDictionaryObjects (const char *key, CGPDFObjectRef object, void *info) { NSString *logString = [[NSString alloc] initWithString:[NSString stringWithFormat:@"key: %s", key]]; for (int i = 0; i < [selfClass catalogLevel]; i++) logString = [NSString stringWithFormat:@"-%@", logString]; [Log LogDebug:logString]; CGPDFDictionaryRef contentDict = (CGPDFDictionaryRef)info; CGPDFObjectType type = CGPDFObjectGetType(object); switch (type) { case kCGPDFObjectTypeNull: { [Log LogDebug:[NSString stringWithFormat:@"*****pdf null value"]]; break; } case kCGPDFObjectTypeBoolean: { CGPDFBoolean objectBoolean; if (CGPDFObjectGetValue(object, kCGPDFObjectTypeBoolean, &objectBoolean)) { NSString *logString = [[NSString alloc] initWithString:[NSString stringWithFormat:@"pdf boolean value: %@", [NSNumber numberWithBool:objectBoolean]]]; for (int i = 0; i < [selfClass catalogLevel]; i++) logString = [NSString stringWithFormat:@"-%@", logString]; [Log LogDebug:logString]; [[selfClass pdfDict] setObject:[NSNumber numberWithBool:objectBoolean] forKey:[NSString stringWithCString:key encoding:NSUTF8StringEncoding]]; } break; } case kCGPDFObjectTypeInteger: { CGPDFInteger objectInteger; if (CGPDFObjectGetValue(object, kCGPDFObjectTypeInteger, &objectInteger)) { NSString *logString = [[NSString alloc] initWithString:[NSString stringWithFormat:@"pdf integer value: %ld", (long int)objectInteger]]; for (int i = 0; i < [selfClass catalogLevel]; i++) logString = [NSString stringWithFormat:@"-%@", logString]; [Log LogDebug:logString]; [[selfClass pdfDict] setObject:[NSNumber numberWithInt:objectInteger] forKey:[NSString stringWithCString:key encoding:NSUTF8StringEncoding]]; } break; } case kCGPDFObjectTypeReal: { CGPDFReal objectReal; if (CGPDFObjectGetValue(object, kCGPDFObjectTypeReal, &objectReal)) { NSString *logString = [[NSString alloc] initWithString:[NSString stringWithFormat:@"pdf real value: %ld", (long int)objectReal]]; for (int i = 0; i < [selfClass catalogLevel]; i++) logString = [NSString stringWithFormat:@"-%@", logString]; [Log LogDebug:logString]; [[selfClass pdfDict] setObject:[NSNumber numberWithInt:objectReal] forKey:[NSString stringWithCString:key encoding:NSUTF8StringEncoding]]; } break; } case kCGPDFObjectTypeName: { const char *name; if (CGPDFDictionaryGetName(contentDict, key, &name)) { NSString *dictName = [[NSString alloc] initWithCString:name encoding:NSUTF8StringEncoding]; if (dictName) { NSString *logString = [[NSString alloc] initWithString:[NSString stringWithFormat:@"pdf name value: %@", dictName]]; for (int i = 0; i < [selfClass catalogLevel]; i++) logString = [NSString stringWithFormat:@"-%@", logString]; [Log LogDebug:logString]; [[selfClass pdfDict] setObject:dictName forKey:[NSString stringWithCString:key encoding:NSUTF8StringEncoding]]; } } break; } case kCGPDFObjectTypeString: { CGPDFStringRef objectString; if (CGPDFObjectGetValue(object, kCGPDFObjectTypeString, &objectString)) { NSString *logString = [[NSString alloc] initWithString:[NSString stringWithFormat:@"pdf string value: %@", (__bridge NSString *)CGPDFStringCopyTextString(objectString)]]; for (int i = 0; i < [selfClass catalogLevel]; i++) logString = [NSString stringWithFormat:@"-%@", logString]; [Log LogDebug:logString]; [[selfClass pdfDict] setObject:(__bridge NSString *)CGPDFStringCopyTextString(objectString) forKey:[NSString stringWithCString:key encoding:NSUTF8StringEncoding]]; } break; } case kCGPDFObjectTypeArray: { CGPDFArrayRef objectArray; if (CGPDFObjectGetValue(object, kCGPDFObjectTypeArray, &objectArray)) { NSArray *myArray=[selfClass copyPDFArray:objectArray referencingDictionary:contentDict referencingKey:key]; [[selfClass pdfDict] setObject:myArray forKey:[NSString stringWithCString:key encoding:NSUTF8StringEncoding]]; } break; } case kCGPDFObjectTypeDictionary: { CGPDFDictionaryRef objectDictionary; if (CGPDFObjectGetValue(object, kCGPDFObjectTypeDictionary, &objectDictionary)) { NSString *logString = @"Found dictionary"; for (int i = 0; i < [selfClass catalogLevel]; i++) logString = [NSString stringWithFormat:@"-%@", logString]; //[Log LogDebug:logString]; NSString *keyCheck = [[NSString alloc] initWithUTF8String:key]; if (![keyCheck isEqualToString:@"Parent"] && ![keyCheck isEqualToString:@"P"]) { [selfClass setCatalogLevel:[selfClass catalogLevel] + 1]; CGPDFDictionaryApplyFunction(objectDictionary, getDictionaryObjects, objectDictionary); [selfClass setCatalogLevel:[selfClass catalogLevel] - 1]; } } break; } case kCGPDFObjectTypeStream: { CGPDFStreamRef objectStream; if (CGPDFObjectGetValue(object, kCGPDFObjectTypeStream, &objectStream)) { CGPDFDictionaryRef dict = CGPDFStreamGetDictionary( objectStream ); CGPDFDataFormat fmt = CGPDFDataFormatRaw; CFDataRef streamData = CGPDFStreamCopyData(objectStream, &fmt); NSData *data = [[NSData alloc] initWithData:(__bridge NSData *)(streamData)]; [data writeToFile:[FileHelpers pathInDocumentDirectory:@"data.dat"] atomically:YES]; NSString *dataString = [[NSString alloc] initWithData:data encoding:NSUTF8StringEncoding]; //if (!dataString) { // dataString = [[NSString alloc] initWithData:(__bridge NSData *)(streamData) encoding:NSUTF16StringEncoding]; // } NSString *logString = [[NSString alloc] initWithString:[NSString stringWithFormat:@"pdf stream length: %ld - %@", (long int)CFDataGetLength( streamData ), dataString]]; for (int i = 0; i < [selfClass catalogLevel]; i++) logString = [NSString stringWithFormat:@"-%@", logString]; [Log LogDebug:logString]; NSString *keyCheck = [[NSString alloc] initWithUTF8String:key]; if( dict && ![keyCheck isEqualToString:@"Parent"] && ![keyCheck isEqualToString:@"P"]) { [selfClass setCatalogLevel:[selfClass catalogLevel] + 1]; CGPDFDictionaryApplyFunction(dict, getDictionaryObjects, dict); [selfClass setCatalogLevel:[selfClass catalogLevel] - 1]; } } } } } - (NSArray *)copyPDFArray:(CGPDFArrayRef)arr referencingDictionary:(CGPDFDictionaryRef)dict referencingKey:(const char *)key { int i = 0; NSMutableArray *temp = [[NSMutableArray alloc] init]; NSString *logString = [[NSString alloc] initWithString:[NSString stringWithFormat:@"pdf array count: %zu", CGPDFArrayGetCount(arr)]]; for (int i = 0; i < [selfClass catalogLevel]; i++) logString = [NSString stringWithFormat:@"-%@", logString]; [Log LogDebug:logString]; for(i=0; i<CGPDFArrayGetCount(arr); i++){ CGPDFObjectRef object; CGPDFArrayGetObject(arr, i, &object); CGPDFObjectType type = CGPDFObjectGetType(object); switch(type){ case kCGPDFObjectTypeNull: { NSString *logString = [[NSString alloc] initWithString:[NSString stringWithFormat:@"pdf array null(%d)", i]]; for (int i = 0; i < [selfClass catalogLevel]; i++) logString = [NSString stringWithFormat:@"-%@", logString]; [Log LogDebug:logString]; break; } case kCGPDFObjectTypeBoolean: { CGPDFBoolean objectBool; if (CGPDFObjectGetValue(object, kCGPDFObjectTypeBoolean, &objectBool)) { NSString *logString = [[NSString alloc] initWithString:[NSString stringWithFormat:@"pdf array boolean value(%d): %@", i, [NSNumber numberWithBool:objectBool]]]; for (int i = 0; i < [selfClass catalogLevel]; i++) logString = [NSString stringWithFormat:@"-%@", logString]; [Log LogDebug:logString]; [temp addObject:[NSNumber numberWithBool:objectBool]]; } break; } case kCGPDFObjectTypeInteger: { CGPDFInteger objectInteger; if (CGPDFObjectGetValue(object, kCGPDFObjectTypeInteger, &objectInteger)) { NSString *logString = [[NSString alloc] initWithString:[NSString stringWithFormat:@"pdf array integer value(%d): %ld", i, (long int)objectInteger]]; for (int i = 0; i < [selfClass catalogLevel]; i++) logString = [NSString stringWithFormat:@"-%@", logString]; [Log LogDebug:logString]; [temp addObject:[NSNumber numberWithInt:objectInteger]]; } break; } case kCGPDFObjectTypeReal: { CGPDFReal objectReal; if (CGPDFObjectGetValue(object, kCGPDFObjectTypeReal, &objectReal)) { NSString *logString = [[NSString alloc] initWithString:[NSString stringWithFormat:@"pdf array real(%d): %ld", i, (long int)objectReal]]; for (int i = 0; i < [selfClass catalogLevel]; i++) logString = [NSString stringWithFormat:@"-%@", logString]; [Log LogDebug:logString]; [temp addObject:[NSNumber numberWithInt:objectReal]]; } break; } case kCGPDFObjectTypeName: { const char *name; if (CGPDFDictionaryGetName(dict, key, &name)) { NSString *dictName = [[NSString alloc] initWithCString:name encoding:NSUTF8StringEncoding]; if (dictName) { NSString *logString = [[NSString alloc] initWithString:[NSString stringWithFormat:@"pdf array name value(%d): %@", i, dictName]]; for (int i = 0; i < [selfClass catalogLevel]; i++) logString = [NSString stringWithFormat:@"-%@", logString]; [Log LogDebug:logString]; [[selfClass pdfDict] setObject:dictName forKey:[NSString stringWithCString:key encoding:NSUTF8StringEncoding]]; } } break; } case kCGPDFObjectTypeString: { CGPDFStringRef objectString; if (CGPDFObjectGetValue(object, kCGPDFObjectTypeString, &objectString)) { NSString *tempStr = (__bridge NSString *)CGPDFStringCopyTextString(objectString); NSString *logString = [[NSString alloc] initWithString:[NSString stringWithFormat:@"pdf array string(%d): %@", i, tempStr]]; for (int i = 0; i < [selfClass catalogLevel]; i++) logString = [NSString stringWithFormat:@"-%@", logString]; [Log LogDebug:logString]; [temp addObject:tempStr]; } break; } case kCGPDFObjectTypeArray : { CGPDFArrayRef objectArray; if (CGPDFObjectGetValue(object, kCGPDFObjectTypeArray, &objectArray)) { NSArray *tempArr = [selfClass copyPDFArray:objectArray referencingDictionary:dict referencingKey:key]; [temp addObject:tempArr]; } break; } case kCGPDFObjectTypeDictionary : { CGPDFDictionaryRef objectDict; NSString *keyCheck = [[NSString alloc] initWithUTF8String:key]; if (CGPDFObjectGetValue(object, kCGPDFObjectTypeDictionary, &objectDict) && ![keyCheck isEqualToString:@"Parent"] && ![keyCheck isEqualToString:@"P"]) { [selfClass setCatalogLevel:[selfClass catalogLevel] + 1]; CGPDFDictionaryApplyFunction( objectDict, getDictionaryObjects, objectDict); [selfClass setCatalogLevel:[selfClass catalogLevel] - 1]; } break; } case kCGPDFObjectTypeStream : { CGPDFStreamRef objectStream; if (CGPDFObjectGetValue(object, kCGPDFObjectTypeStream, &objectStream)) { CGPDFDictionaryRef streamDict = CGPDFStreamGetDictionary( objectStream ); CGPDFDataFormat fmt = CGPDFDataFormatRaw; CFDataRef streamData = CGPDFStreamCopyData(objectStream, &fmt); NSString *dataString = [[NSString alloc] initWithData:(__bridge NSData *)(streamData) encoding:NSUTF8StringEncoding]; NSString *logString = [[NSString alloc] initWithString:[NSString stringWithFormat:@"pdf array stream length: (%d): %ld - %@", i, (long int)CFDataGetLength( streamData ), dataString]]; for (int i = 0; i < [selfClass catalogLevel]; i++) logString = [NSString stringWithFormat:@"-%@", logString]; [Log LogDebug:logString]; NSString *keyCheck = [[NSString alloc] initWithUTF8String:key]; if( streamDict && ![keyCheck isEqualToString:@"Parent"] && ![keyCheck isEqualToString:@"P"]) { [selfClass setCatalogLevel:[selfClass catalogLevel] + 1]; CGPDFDictionaryApplyFunction( streamDict, getDictionaryObjects, streamDict ); [selfClass setCatalogLevel:[selfClass catalogLevel] - 1]; } } } } } return temp; } @end 

“可编辑字段”是指可以使用Acrobat或Adobe Reader填充的表单元素的types?

这些字段不是实际页面描述的一部分。 如果您查看PDF规范文档,您将在第12.7章find“交互式表单”的说明,说明文档的字段词典是从文档目录中名为“AcroForm”的元素开始存储的。

据我所知,iOS确实可以访问文档目录,因此您必须在该目录字典中find“AcroForm”字段,然后下载到字典字典结构中以收集所需的信息。 完整文档中的所有字段都以分层的方式存储在这个地方。

Interesting Posts