ProductPromotion
Logo

Go.Lang

made by https://0x3d.site

GitHub - khezen/avro: Apache AVRO for go
Apache AVRO for go. Contribute to khezen/avro development by creating an account on GitHub.
Visit Site

GitHub - khezen/avro: Apache AVRO for go

GitHub - khezen/avro: Apache AVRO for go

avro

Build Status codecov Go Report Card

The purpose of this package is to facilitate use of AVRO with go strong typing.

Features

github.com/khezen/avro

GoDoc

github.com/khezen/avro/sqlavro

GoDoc

github.com/khezen/avro/redshiftavro

GoDoc

What is AVRO

Apache AVRO is a data serialization system which relies on JSON schemas.

It provides:

  • Rich data structures
  • A compact, fast, binary data format
  • A container file, to store persistent data
  • Remote procedure call (RPC)

AVRO binary encoded data comes together with its schema and therefore is fully self-describing.

When AVRO data is read, the schema used when writing it is always present. This permits each datum to be written with no per-value overheads, making serialization both fast and small.

When AVRO data is stored in a file, its schema is stored with it, so that files may be processed later by any program. If the program reading the data expects a different schema this can be easily resolved, since both schemas are present.

Examples

Schema Marshal/Unmarshal

package main

import (
  "encoding/json"
  "fmt"

  "github.com/khezen/avro"
)

func main() {
  schemaBytes := []byte(
    `{
      "type": "record",
      "namespace": "test",
      "name": "LongList",
      "aliases": [
        "LinkedLongs"
      ],
      "doc": "linked list of 64 bits integers",
      "fields": [
        {
          "name": "value",
          "type": "long"
        },
        {
          "name": "next",
          "type": [
            "null",
            "LongList"
          ]
        }
      ]
    }`,
  )

  // Unmarshal JSON  bytes to Schema interface
  var anySchema avro.AnySchema
  err := json.Unmarshal(schemaBytes, &anySchema)
  if err != nil {
    panic(err)
  }
  schema := anySchema.Schema()  
  // Marshal Schema interface to JSON bytes
  schemaBytes, err = json.Marshal(schema)
  if err != nil {
    panic(err)
  }
  fmt.Println(string(schemaBytes))
}
{
    "type": "record",
    "namespace": "test",
    "name": "LongList",
    "aliases": [
        "LinkedLongs"
    ],
    "doc": "linked list of 64 bits integers",
    "fields": [
        {
            "name": "value",
            "type": "long"
        },
        {
            "name": "next",
            "type": [
                "null",
                "LongList"
            ]
        }
    ]
}

Convert SQL Table to AVRO Schema

package main
import (
  "database/sql"
  "encoding/json"
  "fmt"

  "github.com/khezen/avro/sqlavro"
)

func main() {
  db, err := sql.Open("mysql", "root@/blog")
  if err != nil {
    panic(err)
  }
  defer db.Close()
  _, err = db.Exec(
    `CREATE TABLE posts(
      ID INT NOT NULL,
      title VARCHAR(128) NOT NULL,
      body LONGBLOB NOT NULL,
      content_type VARCHAR(128) DEFAULT 'text/markdown; charset=UTF-8',
      post_date DATETIME NOT NULL,
      update_date DATETIME,
      reading_time_minutes DECIMAL(3,1),
      PRIMARY KEY(ID)
    )`,
  )
  if err != nil {
    panic(err)
  }
  schemas, err := sqlavro.SQLDatabase2AVRO(db, "blog")
  if err != nil {
    panic(err)
  }
  schemasBytes, err := json.Marshal(schemas)
  if err != nil {
    panic(err)
  }
  fmt.Println(string(schemasBytes))
}
[
    {
        "type": "record",
        "namespace": "blog",
        "name": "posts",
        "fields": [
            {
                "name": "ID",
                "type": "int"
            },
            {
                "name": "title",
                "type": "string"
            },
            {
                "name": "body",
                "type": "bytes"
            },
            {
                "name": "content_type",
                "type": [
                    "string",
                    "null"
                ],
                "default": "text/markdown; charset=UTF-8"
            },
            {
                "name": "post_date",
                "type": {
                    "type": "int",
                    "doc":"datetime",
                    "logicalType": "timestamp"
                }
            },
            {
                "name": "update_date",
                "type": [
                    "null",
                    {
                        "type": "int",
                        "doc":"datetime",
                        "logicalType": "timestamp"
                    }
                ]
            },
            {
                "name": "reading_time_minutes",
                "type": [
                    "null",
                    {
                        "type": "bytes",
                        "logicalType": "decimal",
                        "precision": 3,
                        "scale": 1
                    }
                ]
            }
        ]
    }
]

Query records from SQL into AVRO or CSV binary

package main

import (
	"database/sql"
	"fmt"
	"io/ioutil"
	"time"

	"github.com/khezen/avro"
	"github.com/khezen/avro/sqlavro"
)

func main() {
	db, err := sql.Open("mysql", "root@/blog")
	if err != nil {
		panic(err)
	}
	defer db.Close()
	_, err = db.Exec(
		`CREATE TABLE posts(
			ID INT NOT NULL,
			title VARCHAR(128) NOT NULL,
			body LONGBLOB NOT NULL,
			content_type VARCHAR(128) DEFAULT 'text/markdown; charset=UTF-8',
			post_date DATETIME NOT NULL,
			update_date DATETIME,
			reading_time_minutes DECIMAL(3,1),
			PRIMARY KEY(ID)
		)`,
	)
	if err != nil {
		panic(err)
	}
	_, err = db.Exec(
		// statement
		`INSERT INTO posts(ID,title,body,content_type,post_date,update_date,reading_time_minutes)
		 VALUES (?,?,?,?,?,?,?)`,
		// values
		42,
		"lorem ispum",
		[]byte(`Lorem ipsum dolor sit amet, consectetur adipiscing elit, sed do eiusmod tempor incididunt ut labore et dolore magna aliqua.`),
		"text/markdown; charset=UTF-8",
		"2009-04-10 00:00:00",
		"2009-04-10 00:00:00",
		"4.2",
	)
	if err != nil {
		panic(err)
	}
	schema, err := sqlavro.SQLTable2AVRO(db, "blog", "posts")
	if err != nil {
		panic(err)
	}
	limit := 1000
	order := avro.Ascending
	from, err := time.Parse("2006-02-01 15:04:05", "2009-04-10 00:00:00")
	if err != nil {
		panic(err)
	}
	avroBytes, updatedCriteria, err := sqlavro.Query(sqlavro.QueryConfig{
		DB:     db,
		DBName: "blog",
		Schema: schema,
		Limit:  limit,
		Criteria: []sqlavro.Criterion{
			*sqlavro.NewCriterionDateTime("post_date", &from, order),
		},
		Output: "avro",
	})
	if err != nil {
		panic(err)
	}
	err = ioutil.WriteFile("/tmp/blog_posts.avro", avroBytes, 0644)
	if err != nil {
		panic(err)
	}
	fmt.Println(updatedCriteria)
}

Notes

  • When record fields contains aliases, the first alias is used in the query instead of the field name.

Types

Avro Go     SQL
null nil NULL
bytes []byte BLOB,MEDIUMBLOB,LONGBLOB
fixed []byte       CHAR,NCHAR
string,enum string VARCHAR, NVARCHAR,TEXT,TINYTEXT,MEDIUMTEXT,LONGTEXT,ENUM,SET
float float32 FLOAT
double float64 DOUBLE
long int64 BIGINT
int int32   TINYINT,SMALLINT,INT,YEAR
decimal *big.Rat DECIMAL
time int32 TIME
timestamp int32 TIMESTAMP,DATETIME
date time.Time DATE
array []interface{} N/A
map,record map[string]interface{} N/A
union see below    any type nullable

Because of encoding rules for Avro unions, when an union's value is null, a simple Go nil is returned. However when an union's value is non-nil, a Go map[string]interface{} with a single key is returned for the union. The map's single key is the Avro type name and its value is the datum's value.

Produce Redshift create statement from AVRO schema

package main

import (
	"encoding/json"
	"fmt"

	"github.com/khezen/avro"
	"github.com/khezen/avro/redshiftavro"
)

func main() {
	schemaBytes := []byte(`
	{
        "type": "record",
        "namespace": "blog",
        "name": "posts",
        "fields": [
            {
                "name": "ID",
                "type": "int"
            },
            {
                "name": "title",
                "type": "string"
            },
            {
                "name": "body",
                "type": "bytes"
            },
            {
                "name": "content_type",
                "type": [
                    "string",
                    "null"
                ],
                "default": "text/markdown; charset=UTF-8"
            },
            {
                "name": "post_date",
                "type": {
                    "type": "int",
                    "doc":"datetime",
                    "logicalType": "timestamp"
                }
            },
            {
                "name": "update_date",
                "type": [
                    "null",
                    {
                        "type": "int",
                        "doc":"datetime",
                        "logicalType": "timestamp"
                    }
                ]
            },
            {
                "name": "reading_time_minutes",
                "type": [
                    "null",
                    {
                        "type": "bytes",
                        "logicalType": "decimal",
                        "precision": 3,
                        "scale": 1
                    }
                ]
            }
        ]
	}`)
	var anySchema avro.AnySchema
	err := json.Unmarshal(schemaBytes, &anySchema)
	if err != nil {
		panic(err)
	}
	schema := anySchema.Schema().(*avro.RecordSchema)
	cfg := redshiftavro.CreateConfig{
		Schema:      *schema,
		SortKeys:    []string{"post_date", "title"},
		IfNotExists: true,
	}
	statement, err := redshiftavro.CreateTableStatement(cfg)
	if err != nil {
		panic(err)
	}
	fmt.Println(statement)
}
CREATE TABLE IF NOT EXISTS posts(
	ID INTEGER ENCODE LZO NOT NULL,
	title VARCHAR(65535) ENCODE RAW NOT NULL,
	body VARCHAR(65535) ENCODE ZSTD NOT NULL,
	content_type VARCHAR(65535) ENCODE ZSTD NULL,
	post_date TIMESTAMP WITHOUT TIME ZONE ENCODE RAW NOT NULL,
	update_date TIMESTAMP WITHOUT TIME ZONE ENCODE LZO NULL,
	reading_time_minutes DECIMAL(3,1) ENCODE RAW NULL
)
SORTKEY(
	post_date,
	title
)

Issues

If you have any problems or questions, please ask for help through a GitHub issue.

Contributions

Help is always welcome! For example, documentation (like the text you are reading now) can always use improvement. There's always code that can be improved. If you ever see something you think should be fixed, you should own it. If you have no idea what to start on, you can browse the issues labeled with help wanted.

As a potential contributor, your changes and ideas are welcome at any hour of the day or night, weekdays, weekends, and holidays. Please do not ever hesitate to ask a question or send a pull request.

Code of conduct.

Articles
to learn more about the golang concepts.

Resources
which are currently available to browse on.

mail [email protected] to add your project or resources here 🔥.

FAQ's
to know more about the topic.

mail [email protected] to add your project or resources here 🔥.

Queries
or most google FAQ's about GoLang.

mail [email protected] to add more queries here 🔍.

More Sites
to check out once you're finished browsing here.

0x3d
https://www.0x3d.site/
0x3d is designed for aggregating information.
NodeJS
https://nodejs.0x3d.site/
NodeJS Online Directory
Cross Platform
https://cross-platform.0x3d.site/
Cross Platform Online Directory
Open Source
https://open-source.0x3d.site/
Open Source Online Directory
Analytics
https://analytics.0x3d.site/
Analytics Online Directory
JavaScript
https://javascript.0x3d.site/
JavaScript Online Directory
GoLang
https://golang.0x3d.site/
GoLang Online Directory
Python
https://python.0x3d.site/
Python Online Directory
Swift
https://swift.0x3d.site/
Swift Online Directory
Rust
https://rust.0x3d.site/
Rust Online Directory
Scala
https://scala.0x3d.site/
Scala Online Directory
Ruby
https://ruby.0x3d.site/
Ruby Online Directory
Clojure
https://clojure.0x3d.site/
Clojure Online Directory
Elixir
https://elixir.0x3d.site/
Elixir Online Directory
Elm
https://elm.0x3d.site/
Elm Online Directory
Lua
https://lua.0x3d.site/
Lua Online Directory
C Programming
https://c-programming.0x3d.site/
C Programming Online Directory
C++ Programming
https://cpp-programming.0x3d.site/
C++ Programming Online Directory
R Programming
https://r-programming.0x3d.site/
R Programming Online Directory
Perl
https://perl.0x3d.site/
Perl Online Directory
Java
https://java.0x3d.site/
Java Online Directory
Kotlin
https://kotlin.0x3d.site/
Kotlin Online Directory
PHP
https://php.0x3d.site/
PHP Online Directory
React JS
https://react.0x3d.site/
React JS Online Directory
Angular
https://angular.0x3d.site/
Angular JS Online Directory