aboutsummaryrefslogtreecommitdiff
path: root/parser
diff options
context:
space:
mode:
authorThomas Voss <mail@thomasvoss.com> 2023-09-02 18:49:53 +0200
committerThomas Voss <mail@thomasvoss.com> 2023-09-08 23:16:19 +0200
commit643623dbecdc1ccb6f3ac77e4ebabdc6ca1d8d06 (patch)
treea9d6b50ad7263e792bc276f765ada74a5661a8b1 /parser
Genesis commit
Diffstat (limited to 'parser')
-rw-r--r--parser/errors.go19
-rw-r--r--parser/parser.go321
-rw-r--r--parser/reader.go94
3 files changed, 434 insertions, 0 deletions
diff --git a/parser/errors.go b/parser/errors.go
new file mode 100644
index 0000000..f6369be
--- /dev/null
+++ b/parser/errors.go
@@ -0,0 +1,19 @@
+package parser
+
+import "fmt"
+
+type invalidSyntax struct {
+ pos position
+ expected string
+ found string
+}
+
+func (e invalidSyntax) Error() string {
+ return fmt.Sprintf("Syntax error near %v; expected %s but found %s", e.pos, e.expected, e.found)
+}
+
+type eof struct{}
+
+func (e eof) Error() string {
+ return "Hit end-of-file while parsing. You’re probably missing a closing brace (‘}’) somewhere"
+}
diff --git a/parser/parser.go b/parser/parser.go
new file mode 100644
index 0000000..5b4d65c
--- /dev/null
+++ b/parser/parser.go
@@ -0,0 +1,321 @@
+package parser
+
+import (
+ "bufio"
+ "fmt"
+ "io"
+ "os"
+ "strings"
+ "unicode"
+)
+
+type nodeType uint
+
+const (
+ Normal nodeType = iota
+ Tagless
+ Text
+)
+
+type Attr struct {
+ Key string
+ Value string
+}
+
+type AstNode struct {
+ Type nodeType
+ Text string
+ Attrs []Attr
+ Children []AstNode
+}
+
+func ParseFile(file *os.File) (AstNode, error) {
+ r := reader{r: bufio.NewReader(file)}
+ return r.parseNode()
+}
+
+func (reader *reader) parseNode() (AstNode, error) {
+ if err := reader.skipSpaces(); err != nil {
+ return AstNode{}, err
+ }
+
+ if r, err := reader.peekRune(); err != nil {
+ return AstNode{}, err
+ } else if r == '-' {
+ return reader.parseText()
+ }
+
+ node := AstNode{}
+ if name, err := reader.parseNodeName(); err != nil {
+ return AstNode{}, err
+ } else {
+ node.Type = Normal
+ node.Text = name
+ }
+
+ if attrs, err := reader.parseAttrs(); err != nil {
+ return AstNode{}, err
+ } else {
+ node.Attrs = attrs
+ }
+
+ // The above call to reader.parseAttrs() guarantees that we have the ‘{’
+ // token.
+ if _, err := reader.readRune(); err != nil {
+ return AstNode{}, err
+ }
+
+ loop: for {
+ if err := reader.skipSpaces(); err != nil {
+ return AstNode{}, err
+ }
+
+ if r, err := reader.peekRune(); err == io.EOF {
+ return AstNode{}, eof{}
+ } else if err != nil {
+ return AstNode{}, err
+ } else if r == '}' {
+ break loop
+ }
+
+ if n, err := reader.parseNode(); err != nil {
+ return AstNode{}, err
+ } else {
+ node.Children = append(node.Children, n)
+ }
+ }
+
+ // The above loop guarantees that we have the ‘}’ token.
+ if _, err := reader.readRune(); err != nil {
+ return AstNode{}, err
+ }
+
+ return node, nil
+}
+
+func (reader *reader) parseNodeName() (string, error) {
+ var r rune
+ var err error
+
+ if err = reader.skipSpaces(); err != nil {
+ return "", err
+ }
+
+ sb := strings.Builder{}
+
+ if r, err = reader.readRune(); err != nil {
+ return "", err
+ } else if !validNameStartChar(r) {
+ return "", invalidSyntax{
+ pos: reader.pos,
+ expected: "node name",
+ found: fmt.Sprintf("invalid character ‘%c’", r),
+ }
+ }
+
+ for validNameChar(r) {
+ sb.WriteRune(r)
+ if r, err = reader.readRune(); err != nil {
+ return "", err
+ }
+ }
+
+ if err = reader.unreadRune(); err != nil {
+ return "", err
+ }
+ return sb.String(), nil
+}
+
+func (reader *reader) parseText() (AstNode, error) {
+ if _, err := reader.readRune(); err != nil {
+ return AstNode{}, err
+ }
+
+ sb := strings.Builder{}
+ node := AstNode{Type: Tagless}
+
+ loop: for {
+ r, err := reader.readRune()
+ if err != nil {
+ return AstNode{}, err
+ }
+ switch r {
+ case '}':
+ if err := reader.unreadRune(); err != nil {
+ return AstNode{}, err
+ }
+ break loop
+ case '@':
+ node.Children = append(node.Children, AstNode{
+ Type: Text,
+ Text: sb.String(),
+ })
+ sb = strings.Builder{}
+
+ n, err := reader.parseNode()
+ if err != nil {
+ return AstNode{}, err
+ }
+ node.Children = append(node.Children, n)
+ case '\\':
+ r, err = reader.readRune()
+ if err != nil {
+ return AstNode{}, err
+ }
+ if r != '\\' && r != '@' && r != '}' {
+ return AstNode{}, invalidSyntax{
+ pos: reader.pos,
+ expected: "valid escape sequence (‘\\\\’, ‘\\@’, or ‘\\}’)",
+ found: fmt.Sprintf("‘\\%c’", r),
+ }
+ }
+ fallthrough
+ default:
+ sb.WriteRune(r)
+ }
+ }
+
+ node.Children = append(node.Children, AstNode{
+ Type: Text,
+ Text: sb.String(),
+ })
+ return node, nil
+}
+
+func (reader *reader) parseAttrs() ([]Attr, error) {
+ attrs := make([]Attr, 0, 2)
+
+ loop: for {
+ if err := reader.skipSpaces(); err != nil {
+ return nil, err
+ }
+ r, err := reader.peekRune()
+ if err != nil {
+ return nil, err
+ }
+
+ attr := Attr{}
+ switch r {
+ case '{':
+ break loop
+ case '.':
+ fallthrough
+ case '#':
+ sym := r
+
+ // Skip ‘sym’
+ if _, err := reader.readRune(); err != nil {
+ return nil, err
+ }
+
+ if s, err := reader.parseNodeName(); err != nil {
+ return nil, err
+ } else {
+ attr.Value = s
+ if sym == '.' {
+ attr.Key = "class"
+ } else {
+ attr.Key = "id"
+ }
+ }
+ default:
+ if unicode.IsSpace(r) {
+ if err := reader.skipSpaces(); err != nil {
+ return nil, err
+ }
+ continue
+ }
+
+ if s, err := reader.parseNodeName(); err != nil {
+ return nil, err
+ } else {
+ attr.Key = s
+ }
+
+ if r, err := reader.readNonSpaceRune(); err != nil {
+ return nil, err
+ } else if r != '=' {
+ reader.unreadRune()
+ break
+ }
+
+ if s, err := reader.parseString(); err != nil {
+ return nil, err
+ } else {
+ attr.Value = s
+ }
+ }
+ attrs = append(attrs, attr)
+ }
+
+ return attrs, nil
+}
+
+func (reader *reader) parseString() (string, error) {
+ sb := strings.Builder{}
+
+ if r, err := reader.readNonSpaceRune(); err != nil {
+ return "", err
+ } else if r != '"' {
+ return "", invalidSyntax{
+ pos: reader.pos,
+ expected: "double-quoted string",
+ found: fmt.Sprintf("‘%c’", r),
+ }
+ }
+
+ for {
+ r, err := reader.readRune()
+ if err != nil {
+ return "", err
+ }
+
+ switch r {
+ case '"':
+ return sb.String(), nil
+ case '\\':
+ r, err := reader.readRune()
+ if err != nil {
+ return "", err
+ }
+
+ if r != '\\' && r != '"' {
+ return "", invalidSyntax{
+ pos: reader.pos,
+ expected: "valid escape sequence (‘\\\\’ or ‘\\\"’)",
+ found: fmt.Sprintf("‘\\%c’", r),
+ }
+ }
+
+ sb.WriteRune(r)
+ default:
+ sb.WriteRune(r)
+ }
+ }
+}
+
+func validNameStartChar(r rune) bool {
+ return r == ':' || r == '_' ||
+ (r >= 'A' && r <= 'Z') ||
+ (r >= 'a' && r <= 'z') ||
+ (r >= 0x000C0 && r <= 0x000D6) ||
+ (r >= 0x000D8 && r <= 0x000F6) ||
+ (r >= 0x000F8 && r <= 0x002FF) ||
+ (r >= 0x00370 && r <= 0x0037D) ||
+ (r >= 0x0037F && r <= 0x01FFF) ||
+ (r >= 0x0200C && r <= 0x0200D) ||
+ (r >= 0x02070 && r <= 0x0218F) ||
+ (r >= 0x02C00 && r <= 0x02FEF) ||
+ (r >= 0x03001 && r <= 0x0D7FF) ||
+ (r >= 0x0F900 && r <= 0x0FDCF) ||
+ (r >= 0x0FDF0 && r <= 0x0FFFD) ||
+ (r >= 0x10000 && r <= 0xEFFFF)
+}
+
+func validNameChar(r rune) bool {
+ return validNameStartChar(r) ||
+ r == '-' || r == '.' || r == '·' ||
+ (r >= '0' && r <= '9') ||
+ (r >= 0x0300 && r <= 0x036F) ||
+ (r >= 0x203F && r <= 0x2040)
+}
diff --git a/parser/reader.go b/parser/reader.go
new file mode 100644
index 0000000..22a8e6f
--- /dev/null
+++ b/parser/reader.go
@@ -0,0 +1,94 @@
+package parser
+
+import (
+ "bufio"
+ "fmt"
+ "io"
+ "unicode"
+ "unicode/utf8"
+)
+
+type position struct {
+ col uint
+ row uint
+ prevCol uint
+}
+
+func (p position) String() string {
+ return fmt.Sprintf("%d:%d", p.row+1, p.col)
+}
+
+type reader struct {
+ r *bufio.Reader
+ pos position
+}
+
+func (reader *reader) peekRune() (rune, error) {
+ bytes := make([]byte, 0, 4)
+ var err error
+
+ // Peeking the next rune is annoying. We want to get the next rune
+ // which could be the next 1–4 bytes. Normally we can just call
+ // reader.r.Peek(4) but that doesn’t work here as the last rune in a
+ // file could be a 1–3 byte rune, so we would fail with an EOF error.
+ for i := 4; i > 0; i-- {
+ if bytes, err = reader.r.Peek(i); err == io.EOF {
+ continue
+ } else if err != nil {
+ return 0, err
+ } else {
+ rune, _ := utf8.DecodeRune(bytes)
+ return rune, nil
+ }
+ }
+
+ return 0, io.EOF
+}
+
+func (reader *reader) unreadRune() error {
+ if reader.pos.col == 0 {
+ reader.pos.col = reader.pos.prevCol
+ reader.pos.row--
+ } else {
+ reader.pos.col--
+ }
+
+ return reader.r.UnreadRune()
+}
+
+func (reader *reader) readRune() (rune, error) {
+ rune, _, err := reader.r.ReadRune()
+ if rune == '\n' {
+ reader.pos.prevCol = reader.pos.col
+ reader.pos.col = 0
+ reader.pos.row++
+ } else {
+ reader.pos.col++
+ }
+ return rune, err
+}
+
+func (reader *reader) readNonSpaceRune() (rune, error) {
+ if err := reader.skipSpaces(); err != nil {
+ return 0, err
+ }
+
+ if r, err := reader.readRune(); err != nil {
+ return 0, err
+ } else {
+ return r, nil
+ }
+}
+
+func (reader *reader) skipSpaces() error {
+ for {
+ if rune, err := reader.readRune(); err != nil {
+ if err == io.EOF {
+ return nil
+ }
+ return err
+ } else if !unicode.IsSpace(rune) {
+ return reader.unreadRune()
+ }
+ }
+}