I'm currently working on a project where I need to make PDF documents accessible to individuals with disabilities. I'm using PDFBox 3.0 to create these PDFs, but I'm facing challenges in properly tagging the PDFs to meet accessibility compliance, having done everything else to make them compliant (XMP metadata, marked dictionary etc...).
I followed these two guides already posted on stack overflow to no results. I expected that these tags will be reflected in the document however when testing this in Acrobat, only the top level tag under the logical structure is reflected
Here is the code at present, note I have stripped out the some layout code to keep this concise.
object GeneratorImpl {
protected class PDFContext {
val document: PDDocument = new PDDocument()
var page: PDPage = new PDPage(PDRectangle.A4)
page.getCOSObject.setItem(COSName.STRUCT_PARENTS, COSInteger.get(0))
var contentStream: PDPageContentStream = new PDPageContentStream(document, page)
var currentY: Float = 0
val MARGIN = 50f
var unsupportedCharsFound = false
page.setMediaBox(PDRectangle.A4)
document.addPage(page)
//Set Document Language
document.getDocumentCatalog.setLanguage("en")
document.getDocument.setVersion(1.7F)
//Set Document Viewer Preferences and add metadata
document.getDocumentCatalog.setViewerPreferences(new PDViewerPreferences(new COSDictionary()));
document.getDocumentCatalog.getViewerPreferences.setDisplayDocTitle(true);
//Mark the PDF as Tagged
private val markInfo = new PDMarkInfo()
markInfo.setMarked(true)
document.getDocumentCatalog.setMarkInfo(markInfo)
addXMPMetadata(document)
val structureTreeRoot = new PDStructureTreeRoot()
document.getDocumentCatalog.setStructureTreeRoot(structureTreeRoot)
val root = new PDStructureElement(StandardStructureTypes.DOCUMENT, structureTreeRoot)
val dictionary = new COSDictionary()
dictionary.setItem(COSName.STRUCT_PARENTS, COSInteger.get(0))
var pageStructureElement = new PDStructureElement(dictionary)
root.appendKid(pageStructureElement)
var currentMCID = 0
var currentMarkedDictionary = new COSDictionary()
currentMarkedDictionary.setInt(COSName.MCID, currentMCID)
var nums = new COSArray()
var numDictionaries = new COSArray()
}
def generateCertificatePDF(fields: Seq[Field]): Unit = {
var pdfContext = createContext()
if (fields.isEmpty) throw NonRetryableException("No fields supplied to PDF generator")
for (field <- fields) {
field match {
case header: Header => {
pdfContext = addTitle(header.contents, pdfContext.ROBOTO_BOLD, 20)(pdfContext)
}
// Stripped out the other cases for simplicity
case _ => throw NonRetryableException("Unknown field type in list. Cannot generate PDF without matching generation logic.")
}
}
pdfContext.contentStream.close()
addParentTree(pdfContext)
pdfContext.document.save(new File("./test.pdf"))
val outputStream = new ByteArrayOutputStream()
pdfContext.document.save(outputStream)
outputStream.close()
pdfContext.document.close()
}
private def addTitle(text: String, font: PDType0Font, fontSize: Float)(implicit context: PDFContext): PDFContext = {
if (text.nonEmpty) {
setDocumentTitle(text)
addContentToParent(COSName.P, StandardStructureTypes.H, context.page, context.pageStructureElement)
addCenteredTextBlock(text.toUpperCase, fontSize, font)
}
context
}
private def addCenteredTextBlock(text: String, fontSize: Float, font: PDType0Font)(implicit context: PDFContext): PDFContext = {
var localContext = context
localContext.contentStream.setFont(font, fontSize)
// 2 * context.MARGIN for left and right margin * 2 for extra padding
val maxCentreWidth = localContext.page.getMediaBox.getWidth - (2 * localContext.MARGIN * 2)
localContext.currentY = localContext.page.getMediaBox.getHeight - (2 * localContext.MARGIN)
val startX = (localContext.page.getMediaBox.getWidth - maxCentreWidth) / 2f
val startY = localContext.currentY
var currentY = startY
val lines = wrapText(text, font, fontSize, maxCentreWidth)
for (line <- lines) {
if (localContext.currentY - fontSize <= localContext.MARGIN) {
localContext = addPage(fontSize, font)
}
val textWidth = fontSize * font.getStringWidth(line) / 1000F
val currentX = startX + (maxCentreWidth - textWidth) / 2f
localContext.contentStream.beginText()
localContext.contentStream.newLineAtOffset(currentX, currentY)
localContext.contentStream.beginMarkedContent(COSName.P, PDPropertyList.create(context.currentMarkedDictionary))
localContext.contentStream.showText(line)
localContext.contentStream.endMarkedContent()
setNextMarkedDictionary
if (line.contains("\n") || line.contains("\\n") || line.equals("")) {
currentY -= fontSize
localContext.contentStream.beginMarkedContent(COSName.ARTIFACT, PDPropertyList.create(context.currentMarkedDictionary))
localContext.contentStream.newLineAtOffset(currentX, currentY)
localContext.contentStream.endMarkedContent()
setNextMarkedDictionary
}
localContext.contentStream.endText()
currentY -= fontSize
localContext.currentY = currentY
}
localContext
}
private def setNextMarkedDictionary(implicit context: PDFContext): Unit = {
context.currentMCID += 1
context.currentMarkedDictionary = new COSDictionary()
context.currentMarkedDictionary.setInt(COSName.MCID, context.currentMCID)
}
//Add a structure element to a parent structure element with optional marked content given a non-null name param.//Add a structure element to a parent structure element with optional marked content given a non-null name param.
private def addContentToParent(name: COSName, structureType: String, currentPage: PDPage, parent: PDStructureElement)(implicit context: PDFContext) = {
//Create a structure element and add it to the current section.
var structureElement: PDStructureElement = null
if (structureType != null) {
structureElement = new PDStructureElement(structureType, parent)
structureElement.setPage(currentPage)
}
//If COSName is not null then there is marked content.
if (name != null) {
//numDict for parent tree
val numDict = new COSDictionary()
numDict.setInt(COSName.K, context.currentMCID)
numDict.setItem(COSName.PG, currentPage.getCOSObject)
if (structureElement != null) {
if (!COSName.ARTIFACT.equals(name)) structureElement.appendKid(new PDMarkedContent(name, context.currentMarkedDictionary))
else structureElement.appendKid(new PDArtifactMarkedContent(context.currentMarkedDictionary))
numDict.setItem(COSName.P, structureElement.getCOSObject)
}
else {
if (!COSName.ARTIFACT.equals(name)) parent.appendKid(new PDMarkedContent(name, context.currentMarkedDictionary))
else parent.appendKid(new PDArtifactMarkedContent(context.currentMarkedDictionary))
numDict.setItem(COSName.P, parent.getCOSObject)
}
numDict.setName(COSName.S, name.getName)
context.numDictionaries.add(numDict)
}
if (structureElement != null) parent.appendKid(structureElement)
structureElement
}
//Adds the parent tree to root struct element to identify tagged content//Adds the parent tree to root struct element to identify tagged content
private def addParentTree(implicit context: PDFContext): Unit = {
val dict = new COSDictionary()
context.nums.add(context.numDictionaries)
var i = 1
while (i < 1) {
context.nums.add(COSInteger.get(i))
i += 1
}
dict.setItem(COSName.NUMS, context.nums)
val numberTreeNode = new PDNumberTreeNode(dict, dict.getClass)
context.document.getDocumentCatalog.getStructureTreeRoot.setParentTree(numberTreeNode)
context.document.getDocumentCatalog.getStructureTreeRoot.appendKid(context.root)
}
}