
Security News
PEP 810 Proposes Explicit Lazy Imports for Python 3.15
An opt-in lazy import keyword aims to speed up Python startups, especially CLIs, without the ecosystem-wide risks that sank PEP 690.
github.com/01101101/pdf
A simple Go library which enables reading PDF files.
This repository contains merged code from https://github.com/rsc/pdf and https://github.com/ledongthuc/pdf
Features
go get -u github.com/dslipak/pdf
package main
import (
"bytes"
"fmt"
"github.com/dslipak/pdf"
)
func main() {
pdf.DebugOn = true
content, err := readPdf("test.pdf") // Read local pdf file
if err != nil {
panic(err)
}
fmt.Println(content)
return
}
func readPdf(path string) (string, error) {
f, r, err := pdf.Open(path)
// remember close file
defer f.Close()
if err != nil {
return "", err
}
var buf bytes.Buffer
b, err := r.GetPlainText()
if err != nil {
return "", err
}
buf.ReadFrom(b)
return buf.String(), nil
}
func readPdf2(path string) (string, error) {
f, r, err := pdf.Open(path)
// remember close file
defer f.Close()
if err != nil {
return "", err
}
totalPage := r.NumPage()
for pageIndex := 1; pageIndex <= totalPage; pageIndex++ {
p := r.Page(pageIndex)
if p.V.IsNull() {
continue
}
var lastTextStyle pdf.Text
texts := p.Content().Text
for _, text := range texts {
if isSameSentence(text, lastTextStyle) {
lastTextStyle.S = lastTextStyle.S + text.S
} else {
fmt.Printf("Font: %s, Font-size: %f, x: %f, y: %f, content: %s \n", lastTextStyle.Font, lastTextStyle.FontSize, lastTextStyle.X, lastTextStyle.Y, lastTextStyle.S)
lastTextStyle = text
}
}
}
return "", nil
}
package main
import (
"fmt"
"os"
"github.com/dslipak/pdf"
)
func main() {
content, err := readPdf(os.Args[1]) // Read local pdf file
if err != nil {
panic(err)
}
fmt.Println(content)
return
}
func readPdf(path string) (string, error) {
f, r, err := pdf.Open(path)
defer func() {
_ = f.Close()
}()
if err != nil {
return "", err
}
totalPage := r.NumPage()
for pageIndex := 1; pageIndex <= totalPage; pageIndex++ {
p := r.Page(pageIndex)
if p.V.IsNull() {
continue
}
rows, _ := p.GetTextByRow()
for _, row := range rows {
println(">>>> row: ", row.Position)
for _, word := range row.Content {
fmt.Println(word.S)
}
}
}
return "", nil
}
FAQs
Unknown package
Did you know?
Socket for GitHub automatically highlights issues in each pull request and monitors the health of all your open source dependencies. Discover the contents of your packages and block harmful activity before you install or update your dependencies.
Security News
An opt-in lazy import keyword aims to speed up Python startups, especially CLIs, without the ecosystem-wide risks that sank PEP 690.
Security News
Socket CEO Feross Aboukhadijeh discusses the recent npm supply chain attacks on PodRocket, covering novel attack vectors and how developers can protect themselves.
Security News
Maintainers back GitHub’s npm security overhaul but raise concerns about CI/CD workflows, enterprise support, and token management.