diff options
author | Thomas Voss <mail@thomasvoss.com> | 2023-09-02 18:49:53 +0200 |
---|---|---|
committer | Thomas Voss <mail@thomasvoss.com> | 2023-09-08 23:16:19 +0200 |
commit | 643623dbecdc1ccb6f3ac77e4ebabdc6ca1d8d06 (patch) | |
tree | a9d6b50ad7263e792bc276f765ada74a5661a8b1 /parser |
Genesis commit
Diffstat (limited to 'parser')
-rw-r--r-- | parser/errors.go | 19 | ||||
-rw-r--r-- | parser/parser.go | 321 | ||||
-rw-r--r-- | parser/reader.go | 94 |
3 files changed, 434 insertions, 0 deletions
diff --git a/parser/errors.go b/parser/errors.go new file mode 100644 index 0000000..f6369be --- /dev/null +++ b/parser/errors.go @@ -0,0 +1,19 @@ +package parser + +import "fmt" + +type invalidSyntax struct { + pos position + expected string + found string +} + +func (e invalidSyntax) Error() string { + return fmt.Sprintf("Syntax error near %v; expected %s but found %s", e.pos, e.expected, e.found) +} + +type eof struct{} + +func (e eof) Error() string { + return "Hit end-of-file while parsing. You’re probably missing a closing brace (‘}’) somewhere" +} diff --git a/parser/parser.go b/parser/parser.go new file mode 100644 index 0000000..5b4d65c --- /dev/null +++ b/parser/parser.go @@ -0,0 +1,321 @@ +package parser + +import ( + "bufio" + "fmt" + "io" + "os" + "strings" + "unicode" +) + +type nodeType uint + +const ( + Normal nodeType = iota + Tagless + Text +) + +type Attr struct { + Key string + Value string +} + +type AstNode struct { + Type nodeType + Text string + Attrs []Attr + Children []AstNode +} + +func ParseFile(file *os.File) (AstNode, error) { + r := reader{r: bufio.NewReader(file)} + return r.parseNode() +} + +func (reader *reader) parseNode() (AstNode, error) { + if err := reader.skipSpaces(); err != nil { + return AstNode{}, err + } + + if r, err := reader.peekRune(); err != nil { + return AstNode{}, err + } else if r == '-' { + return reader.parseText() + } + + node := AstNode{} + if name, err := reader.parseNodeName(); err != nil { + return AstNode{}, err + } else { + node.Type = Normal + node.Text = name + } + + if attrs, err := reader.parseAttrs(); err != nil { + return AstNode{}, err + } else { + node.Attrs = attrs + } + + // The above call to reader.parseAttrs() guarantees that we have the ‘{’ + // token. + if _, err := reader.readRune(); err != nil { + return AstNode{}, err + } + + loop: for { + if err := reader.skipSpaces(); err != nil { + return AstNode{}, err + } + + if r, err := reader.peekRune(); err == io.EOF { + return AstNode{}, eof{} + } else if err != nil { + return AstNode{}, err + } else if r == '}' { + break loop + } + + if n, err := reader.parseNode(); err != nil { + return AstNode{}, err + } else { + node.Children = append(node.Children, n) + } + } + + // The above loop guarantees that we have the ‘}’ token. + if _, err := reader.readRune(); err != nil { + return AstNode{}, err + } + + return node, nil +} + +func (reader *reader) parseNodeName() (string, error) { + var r rune + var err error + + if err = reader.skipSpaces(); err != nil { + return "", err + } + + sb := strings.Builder{} + + if r, err = reader.readRune(); err != nil { + return "", err + } else if !validNameStartChar(r) { + return "", invalidSyntax{ + pos: reader.pos, + expected: "node name", + found: fmt.Sprintf("invalid character ‘%c’", r), + } + } + + for validNameChar(r) { + sb.WriteRune(r) + if r, err = reader.readRune(); err != nil { + return "", err + } + } + + if err = reader.unreadRune(); err != nil { + return "", err + } + return sb.String(), nil +} + +func (reader *reader) parseText() (AstNode, error) { + if _, err := reader.readRune(); err != nil { + return AstNode{}, err + } + + sb := strings.Builder{} + node := AstNode{Type: Tagless} + + loop: for { + r, err := reader.readRune() + if err != nil { + return AstNode{}, err + } + switch r { + case '}': + if err := reader.unreadRune(); err != nil { + return AstNode{}, err + } + break loop + case '@': + node.Children = append(node.Children, AstNode{ + Type: Text, + Text: sb.String(), + }) + sb = strings.Builder{} + + n, err := reader.parseNode() + if err != nil { + return AstNode{}, err + } + node.Children = append(node.Children, n) + case '\\': + r, err = reader.readRune() + if err != nil { + return AstNode{}, err + } + if r != '\\' && r != '@' && r != '}' { + return AstNode{}, invalidSyntax{ + pos: reader.pos, + expected: "valid escape sequence (‘\\\\’, ‘\\@’, or ‘\\}’)", + found: fmt.Sprintf("‘\\%c’", r), + } + } + fallthrough + default: + sb.WriteRune(r) + } + } + + node.Children = append(node.Children, AstNode{ + Type: Text, + Text: sb.String(), + }) + return node, nil +} + +func (reader *reader) parseAttrs() ([]Attr, error) { + attrs := make([]Attr, 0, 2) + + loop: for { + if err := reader.skipSpaces(); err != nil { + return nil, err + } + r, err := reader.peekRune() + if err != nil { + return nil, err + } + + attr := Attr{} + switch r { + case '{': + break loop + case '.': + fallthrough + case '#': + sym := r + + // Skip ‘sym’ + if _, err := reader.readRune(); err != nil { + return nil, err + } + + if s, err := reader.parseNodeName(); err != nil { + return nil, err + } else { + attr.Value = s + if sym == '.' { + attr.Key = "class" + } else { + attr.Key = "id" + } + } + default: + if unicode.IsSpace(r) { + if err := reader.skipSpaces(); err != nil { + return nil, err + } + continue + } + + if s, err := reader.parseNodeName(); err != nil { + return nil, err + } else { + attr.Key = s + } + + if r, err := reader.readNonSpaceRune(); err != nil { + return nil, err + } else if r != '=' { + reader.unreadRune() + break + } + + if s, err := reader.parseString(); err != nil { + return nil, err + } else { + attr.Value = s + } + } + attrs = append(attrs, attr) + } + + return attrs, nil +} + +func (reader *reader) parseString() (string, error) { + sb := strings.Builder{} + + if r, err := reader.readNonSpaceRune(); err != nil { + return "", err + } else if r != '"' { + return "", invalidSyntax{ + pos: reader.pos, + expected: "double-quoted string", + found: fmt.Sprintf("‘%c’", r), + } + } + + for { + r, err := reader.readRune() + if err != nil { + return "", err + } + + switch r { + case '"': + return sb.String(), nil + case '\\': + r, err := reader.readRune() + if err != nil { + return "", err + } + + if r != '\\' && r != '"' { + return "", invalidSyntax{ + pos: reader.pos, + expected: "valid escape sequence (‘\\\\’ or ‘\\\"’)", + found: fmt.Sprintf("‘\\%c’", r), + } + } + + sb.WriteRune(r) + default: + sb.WriteRune(r) + } + } +} + +func validNameStartChar(r rune) bool { + return r == ':' || r == '_' || + (r >= 'A' && r <= 'Z') || + (r >= 'a' && r <= 'z') || + (r >= 0x000C0 && r <= 0x000D6) || + (r >= 0x000D8 && r <= 0x000F6) || + (r >= 0x000F8 && r <= 0x002FF) || + (r >= 0x00370 && r <= 0x0037D) || + (r >= 0x0037F && r <= 0x01FFF) || + (r >= 0x0200C && r <= 0x0200D) || + (r >= 0x02070 && r <= 0x0218F) || + (r >= 0x02C00 && r <= 0x02FEF) || + (r >= 0x03001 && r <= 0x0D7FF) || + (r >= 0x0F900 && r <= 0x0FDCF) || + (r >= 0x0FDF0 && r <= 0x0FFFD) || + (r >= 0x10000 && r <= 0xEFFFF) +} + +func validNameChar(r rune) bool { + return validNameStartChar(r) || + r == '-' || r == '.' || r == '·' || + (r >= '0' && r <= '9') || + (r >= 0x0300 && r <= 0x036F) || + (r >= 0x203F && r <= 0x2040) +} diff --git a/parser/reader.go b/parser/reader.go new file mode 100644 index 0000000..22a8e6f --- /dev/null +++ b/parser/reader.go @@ -0,0 +1,94 @@ +package parser + +import ( + "bufio" + "fmt" + "io" + "unicode" + "unicode/utf8" +) + +type position struct { + col uint + row uint + prevCol uint +} + +func (p position) String() string { + return fmt.Sprintf("%d:%d", p.row+1, p.col) +} + +type reader struct { + r *bufio.Reader + pos position +} + +func (reader *reader) peekRune() (rune, error) { + bytes := make([]byte, 0, 4) + var err error + + // Peeking the next rune is annoying. We want to get the next rune + // which could be the next 1–4 bytes. Normally we can just call + // reader.r.Peek(4) but that doesn’t work here as the last rune in a + // file could be a 1–3 byte rune, so we would fail with an EOF error. + for i := 4; i > 0; i-- { + if bytes, err = reader.r.Peek(i); err == io.EOF { + continue + } else if err != nil { + return 0, err + } else { + rune, _ := utf8.DecodeRune(bytes) + return rune, nil + } + } + + return 0, io.EOF +} + +func (reader *reader) unreadRune() error { + if reader.pos.col == 0 { + reader.pos.col = reader.pos.prevCol + reader.pos.row-- + } else { + reader.pos.col-- + } + + return reader.r.UnreadRune() +} + +func (reader *reader) readRune() (rune, error) { + rune, _, err := reader.r.ReadRune() + if rune == '\n' { + reader.pos.prevCol = reader.pos.col + reader.pos.col = 0 + reader.pos.row++ + } else { + reader.pos.col++ + } + return rune, err +} + +func (reader *reader) readNonSpaceRune() (rune, error) { + if err := reader.skipSpaces(); err != nil { + return 0, err + } + + if r, err := reader.readRune(); err != nil { + return 0, err + } else { + return r, nil + } +} + +func (reader *reader) skipSpaces() error { + for { + if rune, err := reader.readRune(); err != nil { + if err == io.EOF { + return nil + } + return err + } else if !unicode.IsSpace(rune) { + return reader.unreadRune() + } + } +} |