You can not select more than 25 topics Topics must start with a letter or number, can include dashes ('-') and can be up to 35 characters long.
terraform/vendor/github.com/dimchansky/utfbom
Radek Simko f3357aad45
vendor: Downgrade Azure dependencies
7 years ago
..
.gitignore
.travis.yml vendor: Downgrade Azure dependencies 7 years ago
LICENSE
README.md vendor: Downgrade Azure dependencies 7 years ago
go.mod
utfbom.go vendor: Downgrade Azure dependencies 7 years ago

README.md

utfbom Godoc License Build Status Go Report Card Coverage Status

The package utfbom implements the detection of the BOM (Unicode Byte Order Mark) and removing as necessary. It can also return the encoding detected by the BOM.

Installation

go get -u github.com/dimchansky/utfbom

Example

package main

import (
	"bytes"
	"fmt"
	"io/ioutil"

	"github.com/dimchansky/utfbom"
)

func main() {
	trySkip([]byte("\xEF\xBB\xBFhello"))
	trySkip([]byte("hello"))
}

func trySkip(byteData []byte) {
	fmt.Println("Input:", byteData)

	// just skip BOM
	output, err := ioutil.ReadAll(utfbom.SkipOnly(bytes.NewReader(byteData)))
	if err != nil {
		fmt.Println(err)
		return
	}
	fmt.Println("ReadAll with BOM skipping", output)

	// skip BOM and detect encoding
	sr, enc := utfbom.Skip(bytes.NewReader(byteData))
	var encStr string
	switch enc {
	case utfbom.UTF8:
		encStr = "UTF8"
	case utfbom.UTF16BigEndian:
		encStr = "UTF16 big endian"
	case utfbom.UTF16LittleEndian:
		encStr = "UTF16 little endian"
	case utfbom.UTF32BigEndian:
		encStr = "UTF32 big endian"
	case utfbom.UTF32LittleEndian:
		encStr = "UTF32 little endian"
	default:
		encStr = "Unknown, no byte-order mark found"
	}
	fmt.Println("Detected encoding:", encStr)
	output, err = ioutil.ReadAll(sr)
	if err != nil {
		fmt.Println(err)
		return
	}
	fmt.Println("ReadAll with BOM detection and skipping", output)
	fmt.Println()
}

Output:

$ go run main.go
Input: [239 187 191 104 101 108 108 111]
ReadAll with BOM skipping [104 101 108 108 111]
Detected encoding: UTF8
ReadAll with BOM detection and skipping [104 101 108 108 111]

Input: [104 101 108 108 111]
ReadAll with BOM skipping [104 101 108 108 111]
Detected encoding: Unknown, no byte-order mark found
ReadAll with BOM detection and skipping [104 101 108 108 111]