package grammar

import "code.betamike.com/mediocregopher/ginger/gg/grammar"

Package grammar is used for parsing a stream of runes according to a set of grammatical rules. This package only supports context-free grammars.

Example

Code:play 

package main

import (
	"bytes"
	"fmt"
	"strconv"
	"strings"

	"code.betamike.com/mediocregopher/ginger/gg/grammar"
	"golang.org/x/exp/slices"
)

/*
This example demonstrates how to describe the following EBNF using the grammar
package:

```
<digit>           ::= "0" | "1" | "2" | "3" | "4" | "5" | "6" | "7" | "8" | "9"
<positive-number> ::= <digit>+
<negative-number> ::= "-" <positive-number>
<number>          ::= <negative-number> | <positive-number>

<list-el-tail> ::= <element> <list-tail>
<list-tail>    ::= ")" | "," <list-el-tail>
<list-head>    ::= ")" | <list-el-tail>
<list>         ::= "(" <list-head>

<element> ::= <number> | <list>
```
*/

// Element represents an element of a list, which can be either a number or a
// sub-list.
type Element struct {
	Number int64
	List   []Element
}

func (e Element) String() string {
	if e.List != nil {
		listElStrs := make([]string, len(e.List))
		for i := range e.List {
			listElStrs[i] = e.List[i].String()
		}
		return fmt.Sprintf("(%s)", strings.Join(listElStrs, ","))
	}

	return fmt.Sprint(e.Number)
}

var (
	digit = grammar.RuneFunc(
		"digit", func(r rune) bool { return '0' <= r && r <= '9' },
	)

	positiveNumber = grammar.StringFromRunes(grammar.OneOrMore(digit))

	negativeNumber = grammar.Reduction(
		grammar.Rune('-'),
		positiveNumber,
		func(
			neg grammar.Located[rune], posNum grammar.Located[string],
		) grammar.Located[string] {
			return grammar.Locate(neg.Location, string(neg.Value)+posNum.Value)
		},
	)

	number = grammar.Named(
		"number",
		grammar.Mapping(
			grammar.FirstOf(negativeNumber, positiveNumber),
			func(str grammar.Located[string]) Element {
				i, err := strconv.ParseInt(str.Value, 10, 64)
				if err != nil {
					panic(fmt.Errorf("parsing %q as int: %w", str, err))
				}
				return Element{Number: i}
			},
		),
	)

	// Because the list/element definitions are recursive it requires using
	// SymbolPtrs, which is easier to do via a global initialization function
	// like this.
	list = func() grammar.Symbol[Element] {

		type listState []Element

		var (
			listTail = new(grammar.SymbolPtr[listState])
			list     = new(grammar.SymbolPtr[Element])
			element  = new(grammar.SymbolPtr[Element])

			// Right parenthesis indicates the end of a list, at which point we
			// can initialize the state which gets returned down the stack.
			listTerm = grammar.Mapping(
				grammar.Rune(')'),
				func(grammar.Located[rune]) listState { return listState{} },
			)

			listElTail = grammar.Reduction[Element, listState, listState](
				element,
				listTail,
				func(el Element, ls listState) listState {
					ls = append(ls, el)
					return ls
				},
			)

			listHead = grammar.FirstOf(listTerm, listElTail)
		)

		listTail.Symbol = grammar.FirstOf(
			listTerm,
			grammar.Prefixed(grammar.Rune(','), listElTail),
		)

		list.Symbol = grammar.Named(
			"list",
			grammar.Reduction[grammar.Located[rune], listState, Element](
				grammar.Rune('('),
				listHead,
				func(_ grammar.Located[rune], ls listState) Element {
					slices.Reverse(ls)
					return Element{List: []Element(ls)}
				},
			),
		)

		element.Symbol = grammar.FirstOf[Element](number, list)

		return list
	}()
)

func main() {
	r := grammar.NewReader(bytes.NewBufferString(
		`()` + `(1,(2,-3),4)` + `(ERROR`,
	))

	l1, err := list.Decode(r)
	fmt.Println(l1, err)

	l2, err := list.Decode(r)
	fmt.Println(l2, err)

	_, err = list.Decode(r)
	fmt.Println(err)

}

Output:

() <nil>
(1,(2,-3),4) <nil>
1:16: expected ')' or number or list

Index

Examples

Variables

var ErrNoMatch = errors.New("no match")

ErrNoMatch is used by Symbol's Decode method, see that method's docs for more details.

Types

type Located

type Located[T any] struct {
	Location
	Value T
}

Located wraps a value so that it has a Location attached to it.

func Locate

func Locate[T any](l Location, v T) Located[T]

Locate returns a Located instance combining the given values.

type LocatedError

type LocatedError Located[error]

LocatedError is an error related to a specific point within a stream of runes.

func (LocatedError) Error

func (e LocatedError) Error() string

type Location

type Location struct {
	Row, Col int
}

Location indicates a position in a stream of runes identified by column within newline-separated rows.

type Reader

type Reader interface {

	// ReadRune reads the next Rune off the stream, or returns io.EOF.
	ReadRune() (Located[rune], error)

	// UnreadRune can be used to place a Rune onto an internal buffer, such that
	// the Rune will be the next to be read using ReadRune. If called multiple
	// times then ReadRune will produce the given Runes in LIFO order.
	UnreadRune(Located[rune])

	// NextLocation returns the Location of the next Rune which will be returned
	// with ReadRune.
	NextLocation() Location
}

Reader is used for reading Runes from a stream.

func NewReader

func NewReader(r io.Reader) Reader

NewReader wraps the io.Reader as a Reader. The given Reader should not be read from after this call.

type Stringer

type Stringer struct {
	I fmt.Stringer
	F func() string
	S string
}

Stringer is a convenience tool for working with fmt.Stringer. Exactly one of the fields must be set, and will be used to implement the fmt.Stringer interface.

func (Stringer) String

func (s Stringer) String() string

type Symbol

type Symbol[T any] interface {
	fmt.Stringer // Used when generating errors related to this Symbol, e.g. "number"

	// Decode reads and parses a value represented by this Symbol off the
	// Reader.
	//
	// This may return ErrNoMatch to indicate that the upcoming data on the
	// Reader is rejected by this Symbol. In this case the Symbol should leave
	// the Reader in the same state it was passed.
	Decode(Reader) (T, error)
}

Symbol represents a symbol in the grammar. A Symbol is expected to be stateless, and is usually constructed from other Symbols using functions in this package.

func Discard

func Discard[T any](sym Symbol[T]) Symbol[struct{}]

Discard matches if the given Symbol does, but discards the value it produces, producing an empty value instead.

func FirstOf

func FirstOf[T any](syms ...Symbol[T]) Symbol[T]

FirstOf matches and produces the value for the first Symbol in the list which matches. FirstOf does not match if none of the given Symbols match.

func Mapping

func Mapping[Ta, Tb any](
	sym Symbol[Ta], fn func(Ta) Tb,
) Symbol[Tb]

Mapping produces a value of type Tb by decoding a value from the given Symbol and passing it through the given mapping function. If the given Symbol doesn't match then neither does Map.

func Named

func Named[T any](name string, sym Symbol[T]) Symbol[T]

Named wraps the given Symbol such that its String method returns the given name.

func OneOrMore

func OneOrMore[T any](sym Symbol[T]) Symbol[[]T]

OneOrMore will produce as many of the given Symbol's value as can be found sequentially, up until a non-matching value is encountered. If no matches are found then OneOrMore does not match.

func PrefixDiscarded

func PrefixDiscarded[Ta, Tb any](prefixSym Symbol[Ta], sym Symbol[Tb]) Symbol[Tb]

PrefixDiscarded is similar to Prefixed, except that if sym does not match then PrefixDiscarded does not match, whereas Prefixed produces a LocatedError in that case.

NOTE PrefixDiscarded does not fully honor the contract of Symbol. If prefixSym matches, but sym does not, then only sym will restore Reader to its prior state; prefixSym cannot return whatever data it read back onto the Reader. Therefore ErrNoMatch can be returned without Reader being fully back in its original state. In practice this isn't a big deal, given the common use-cases of PrefixDiscarded, but it may prove tricky.

func Prefixed

func Prefixed[Ta, Tb any](prefixSym Symbol[Ta], sym Symbol[Tb]) Symbol[Tb]

Prefixed matches on prefixSym, discards its value, then produces the value produced by sym.

If prefixSym does not match then Prefixed does not match. If prefixSym matches but sym does not also match then Prefixed produces a LocatedError.

func Reduction

func Reduction[Ta, Tb, Tc any](
	symA Symbol[Ta],
	symB Symbol[Tb],
	fn func(Ta, Tb) Tc,
) Symbol[Tc]

Reduction produces a value of type Tc by first reading a value from symA, then symB, and then running those through the given function.

If symA does not match then Reduction does not match. If symA matches but symB does not then also match then Reduction produces a LocatedError.

func Rune

func Rune(r rune) Symbol[Located[rune]]

Rune matches and produces the given rune.

func RuneFunc

func RuneFunc(name string, fn func(rune) bool) Symbol[Located[rune]]

RuneFunc matches and produces any rune for which the given function returns true.

func StringFromRunes

func StringFromRunes(sym Symbol[[]Located[rune]]) Symbol[Located[string]]

StringFromRunes produces a string from the slice of runes produced by the given Symbol. The slice must not be empty. StringFromRunes does not match if the given Symbol does not match.

func Suffixed

func Suffixed[Ta, Tb any](sym Symbol[Ta], suffixSym Symbol[Tb]) Symbol[Ta]

Suffixed matchs on sym and then suffixSym, returning the value produced by sym and discarding the one produced by suffixSym.

If sym does not match then Suffixed does not match. If sym matches but suffixSym does not also match then Suffixed produces a LocatedError.

func ZeroOrMore

func ZeroOrMore[T any](sym Symbol[T]) Symbol[[]T]

ZeroOrMore will produce as many of the given Symbol's value as can be found sequentially, up until a non-matching value is encountered. If no matches are found then an empty slice is produced.

type SymbolPtr

type SymbolPtr[T any] struct {
	Symbol[T]
}

SymbolPtr wraps a Symbol in a such a way as to make lazily initializing a Symbol variable possible. This allows for recursion amongst different Symbols.

Example:

a := new(SymbolPtr)
b := new(SymbolPtr)
a.Symbol = FirstOf(Rune('a'), b)
b.Symbol = FirstOf(Rune('b'), a)

Source Files

grammar.go location.go reader.go symbol.go

Version
v0.0.0-20231102164304-22e14bbb3fec (latest)
Published
Nov 2, 2023
Platform
linux/amd64
Imports
5 packages
Last checked
3 months ago

Tools for package owners.