Skip to content

rgracey/pdf

Folders and files

NameName
Last commit message
Last commit date

Latest commit

 

History

37 Commits
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 

Repository files navigation

pdf

A simple PDF parser/serialiser for Go.

Getting Started

Example

Parsing

Parsing a PDF file into an Abstract-Syntax-Tree (AST).

Parsing a file
fileName := "sample.pdf"

ast, _ := pdf.ParseFile(fileName)
Parse a stream
f, err := os.Open(fileName)

if err != nil {
    fmt.Println(err)
    return
}

defer f.Close()

ast, _ := pdf.ParseStream(f)

Serialising

Serialising an AST into a string, suitable for writing to a file.

ast, _ := pdf.ParseFile("sample.pdf")

// ... manipulate the AST

serialised, _ := pdf.Serialise(ast)

f, err := os.Create("serialised.pdf")

if err != nil {
    fmt.Println(err)
    return
}

defer f.Close()

f.Write([]byte(serialised))

Finding a node

Finding a node by its ID

type Filter func(n ast.PdfNode) bool

// A recursive function to find a node based on some criteria
func findNode(filter Filter, node ast.PdfNode) ast.PdfNode {
	if filter(node) {
		return node
	}

	for _, child := range node.Children() {
		if found := findNode(filter, child); found != nil {
			return found
		}
	}

	return nil
}

// Find an indirect object with ID 27.
node := findNode(func(n ast.PdfNode) bool {
    if n.Type() == ast.INDIRECT_OBJECT {
        if n.(*ast.IndirectObjectNode).Id() == 27 {
            return true
        }
    }

    return false
}, ast)


// ... do something with node

Releases

No releases published

Packages

No packages published

Languages