A simple PDF parser/serialiser for Go.
Parsing a PDF file into an Abstract-Syntax-Tree (AST).
fileName := "sample.pdf"
ast, _ := pdf.ParseFile(fileName)
f, err := os.Open(fileName)
if err != nil {
fmt.Println(err)
return
}
defer f.Close()
ast, _ := pdf.ParseStream(f)
Serialising an AST into a string, suitable for writing to a file.
ast, _ := pdf.ParseFile("sample.pdf")
// ... manipulate the AST
serialised, _ := pdf.Serialise(ast)
f, err := os.Create("serialised.pdf")
if err != nil {
fmt.Println(err)
return
}
defer f.Close()
f.Write([]byte(serialised))
Finding a node by its ID
type Filter func(n ast.PdfNode) bool
// A recursive function to find a node based on some criteria
func findNode(filter Filter, node ast.PdfNode) ast.PdfNode {
if filter(node) {
return node
}
for _, child := range node.Children() {
if found := findNode(filter, child); found != nil {
return found
}
}
return nil
}
// Find an indirect object with ID 27.
node := findNode(func(n ast.PdfNode) bool {
if n.Type() == ast.INDIRECT_OBJECT {
if n.(*ast.IndirectObjectNode).Id() == 27 {
return true
}
}
return false
}, ast)
// ... do something with node