diff options
-rw-r--r-- | .gitignore | 2 | ||||
-rw-r--r-- | LICENSE | 14 | ||||
-rw-r--r-- | formatter/formatter.go | 104 | ||||
-rw-r--r-- | go.mod | 3 | ||||
-rw-r--r-- | main.go | 37 | ||||
-rw-r--r-- | parser/errors.go | 19 | ||||
-rw-r--r-- | parser/parser.go | 321 | ||||
-rw-r--r-- | parser/reader.go | 94 |
8 files changed, 594 insertions, 0 deletions
diff --git a/.gitignore b/.gitignore new file mode 100644 index 0000000..d11f0ca --- /dev/null +++ b/.gitignore @@ -0,0 +1,2 @@ +gsp +test-data @@ -0,0 +1,14 @@ +BSD Zero Clause License + +Copyright © 2023 Thomas Voss + +Permission to use, copy, modify, and/or distribute this software for any +purpose with or without fee is hereby granted. + +THE SOFTWARE IS PROVIDED "AS IS" AND THE AUTHOR DISCLAIMS ALL WARRANTIES WITH +REGARD TO THIS SOFTWARE INCLUDING ALL IMPLIED WARRANTIES OF MERCHANTABILITY +AND FITNESS. IN NO EVENT SHALL THE AUTHOR BE LIABLE FOR ANY SPECIAL, DIRECT, +INDIRECT, OR CONSEQUENTIAL DAMAGES OR ANY DAMAGES WHATSOEVER RESULTING FROM +LOSS OF USE, DATA OR PROFITS, WHETHER IN AN ACTION OF CONTRACT, NEGLIGENCE OR +OTHER TORTIOUS ACTION, ARISING OUT OF OR IN CONNECTION WITH THE USE OR +PERFORMANCE OF THIS SOFTWARE. diff --git a/formatter/formatter.go b/formatter/formatter.go new file mode 100644 index 0000000..96e19d1 --- /dev/null +++ b/formatter/formatter.go @@ -0,0 +1,104 @@ +package formatter + +import ( + "fmt" + "unicode" + + "git.thomasvoss.com/gsp/parser" +) + +var stringEscapes = map[rune]string{ + '"': """, + '&': "&", + '<': "<", +} + +func PrintHtml(ast parser.AstNode) { + if ast.Type == parser.Text { + fmt.Print(ast.Text) + return + } + + if ast.Type == parser.Normal { + fmt.Printf("<%s", ast.Text) + + // Classes are grouped together with ‘class="…"’, so we need + // special handling. + classes := []string{} + notClasses := []parser.Attr{} + + for _, a := range ast.Attrs { + if a.Key == "class" { + classes = append(classes, a.Value) + } else { + notClasses = append(notClasses, a) + } + } + + if len(classes) > 0 { + fmt.Printf(" class=\"%s", classes[0]) + for _, c := range classes[1:] { + fmt.Printf(" %s", c) + } + fmt.Print("\"") + } + + for _, a := range notClasses { + fmt.Printf(" %s", a.Key) + if a.Value == "" { + break + } + fmt.Print("=\"") + for _, r := range a.Value { + if v, ok := stringEscapes[r]; ok { + fmt.Print(v) + } else { + fmt.Printf("%c", r) + } + } + fmt.Print("\"") + } + + fmt.Print(">") + } + + if len(ast.Children) == 0 { + return + } + + for i, n := range ast.Children { + if n.Type == parser.Text { + if i == 0 { + n.Text = trimLeftSpaces(n.Text) + } + + if i == len(ast.Children)-1 { + n.Text = trimRightSpaces(n.Text) + } + } + + PrintHtml(n) + } + + if ast.Type == parser.Normal { + fmt.Printf("</%s>", ast.Text) + } +} + +func trimLeftSpaces(s string) string { + i := 0 + rs := []rune(s) + for i < len(s) && unicode.IsSpace(rs[i]) { + i++ + } + return string(rs[i:]) +} + +func trimRightSpaces(s string) string { + rs := []rune(s) + i := len(rs) - 1 + for i >= 0 && unicode.IsSpace(rs[i]) { + i-- + } + return string(rs[:i+1]) +} @@ -0,0 +1,3 @@ +module git.thomasvoss.com/gsp + +go 1.21.0 @@ -0,0 +1,37 @@ +package main + +import ( + "fmt" + "os" + + "git.thomasvoss.com/gsp/formatter" + "git.thomasvoss.com/gsp/parser" +) + +func main() { + if len(os.Args) != 2 { + fmt.Fprintf(os.Stderr, "Usage: %s file\n", os.Args[0]) + os.Exit(1) + } + file, err := os.Open(os.Args[1]) + if err != nil { + die(err) + } + defer file.Close() + ast, err := parser.ParseFile(file) + if err != nil { + die(err) + } + + formatter.PrintHtml(ast) + fmt.Print("\n") +} + +func die(strings ...any) { + fmt.Fprint(os.Stderr, os.Args[0]) + for _, s := range strings { + fmt.Fprintf(os.Stderr, ": %v", s) + } + fmt.Fprint(os.Stderr, "\n") + os.Exit(1) +} diff --git a/parser/errors.go b/parser/errors.go new file mode 100644 index 0000000..f6369be --- /dev/null +++ b/parser/errors.go @@ -0,0 +1,19 @@ +package parser + +import "fmt" + +type invalidSyntax struct { + pos position + expected string + found string +} + +func (e invalidSyntax) Error() string { + return fmt.Sprintf("Syntax error near %v; expected %s but found %s", e.pos, e.expected, e.found) +} + +type eof struct{} + +func (e eof) Error() string { + return "Hit end-of-file while parsing. You’re probably missing a closing brace (‘}’) somewhere" +} diff --git a/parser/parser.go b/parser/parser.go new file mode 100644 index 0000000..5b4d65c --- /dev/null +++ b/parser/parser.go @@ -0,0 +1,321 @@ +package parser + +import ( + "bufio" + "fmt" + "io" + "os" + "strings" + "unicode" +) + +type nodeType uint + +const ( + Normal nodeType = iota + Tagless + Text +) + +type Attr struct { + Key string + Value string +} + +type AstNode struct { + Type nodeType + Text string + Attrs []Attr + Children []AstNode +} + +func ParseFile(file *os.File) (AstNode, error) { + r := reader{r: bufio.NewReader(file)} + return r.parseNode() +} + +func (reader *reader) parseNode() (AstNode, error) { + if err := reader.skipSpaces(); err != nil { + return AstNode{}, err + } + + if r, err := reader.peekRune(); err != nil { + return AstNode{}, err + } else if r == '-' { + return reader.parseText() + } + + node := AstNode{} + if name, err := reader.parseNodeName(); err != nil { + return AstNode{}, err + } else { + node.Type = Normal + node.Text = name + } + + if attrs, err := reader.parseAttrs(); err != nil { + return AstNode{}, err + } else { + node.Attrs = attrs + } + + // The above call to reader.parseAttrs() guarantees that we have the ‘{’ + // token. + if _, err := reader.readRune(); err != nil { + return AstNode{}, err + } + + loop: for { + if err := reader.skipSpaces(); err != nil { + return AstNode{}, err + } + + if r, err := reader.peekRune(); err == io.EOF { + return AstNode{}, eof{} + } else if err != nil { + return AstNode{}, err + } else if r == '}' { + break loop + } + + if n, err := reader.parseNode(); err != nil { + return AstNode{}, err + } else { + node.Children = append(node.Children, n) + } + } + + // The above loop guarantees that we have the ‘}’ token. + if _, err := reader.readRune(); err != nil { + return AstNode{}, err + } + + return node, nil +} + +func (reader *reader) parseNodeName() (string, error) { + var r rune + var err error + + if err = reader.skipSpaces(); err != nil { + return "", err + } + + sb := strings.Builder{} + + if r, err = reader.readRune(); err != nil { + return "", err + } else if !validNameStartChar(r) { + return "", invalidSyntax{ + pos: reader.pos, + expected: "node name", + found: fmt.Sprintf("invalid character ‘%c’", r), + } + } + + for validNameChar(r) { + sb.WriteRune(r) + if r, err = reader.readRune(); err != nil { + return "", err + } + } + + if err = reader.unreadRune(); err != nil { + return "", err + } + return sb.String(), nil +} + +func (reader *reader) parseText() (AstNode, error) { + if _, err := reader.readRune(); err != nil { + return AstNode{}, err + } + + sb := strings.Builder{} + node := AstNode{Type: Tagless} + + loop: for { + r, err := reader.readRune() + if err != nil { + return AstNode{}, err + } + switch r { + case '}': + if err := reader.unreadRune(); err != nil { + return AstNode{}, err + } + break loop + case '@': + node.Children = append(node.Children, AstNode{ + Type: Text, + Text: sb.String(), + }) + sb = strings.Builder{} + + n, err := reader.parseNode() + if err != nil { + return AstNode{}, err + } + node.Children = append(node.Children, n) + case '\\': + r, err = reader.readRune() + if err != nil { + return AstNode{}, err + } + if r != '\\' && r != '@' && r != '}' { + return AstNode{}, invalidSyntax{ + pos: reader.pos, + expected: "valid escape sequence (‘\\\\’, ‘\\@’, or ‘\\}’)", + found: fmt.Sprintf("‘\\%c’", r), + } + } + fallthrough + default: + sb.WriteRune(r) + } + } + + node.Children = append(node.Children, AstNode{ + Type: Text, + Text: sb.String(), + }) + return node, nil +} + +func (reader *reader) parseAttrs() ([]Attr, error) { + attrs := make([]Attr, 0, 2) + + loop: for { + if err := reader.skipSpaces(); err != nil { + return nil, err + } + r, err := reader.peekRune() + if err != nil { + return nil, err + } + + attr := Attr{} + switch r { + case '{': + break loop + case '.': + fallthrough + case '#': + sym := r + + // Skip ‘sym’ + if _, err := reader.readRune(); err != nil { + return nil, err + } + + if s, err := reader.parseNodeName(); err != nil { + return nil, err + } else { + attr.Value = s + if sym == '.' { + attr.Key = "class" + } else { + attr.Key = "id" + } + } + default: + if unicode.IsSpace(r) { + if err := reader.skipSpaces(); err != nil { + return nil, err + } + continue + } + + if s, err := reader.parseNodeName(); err != nil { + return nil, err + } else { + attr.Key = s + } + + if r, err := reader.readNonSpaceRune(); err != nil { + return nil, err + } else if r != '=' { + reader.unreadRune() + break + } + + if s, err := reader.parseString(); err != nil { + return nil, err + } else { + attr.Value = s + } + } + attrs = append(attrs, attr) + } + + return attrs, nil +} + +func (reader *reader) parseString() (string, error) { + sb := strings.Builder{} + + if r, err := reader.readNonSpaceRune(); err != nil { + return "", err + } else if r != '"' { + return "", invalidSyntax{ + pos: reader.pos, + expected: "double-quoted string", + found: fmt.Sprintf("‘%c’", r), + } + } + + for { + r, err := reader.readRune() + if err != nil { + return "", err + } + + switch r { + case '"': + return sb.String(), nil + case '\\': + r, err := reader.readRune() + if err != nil { + return "", err + } + + if r != '\\' && r != '"' { + return "", invalidSyntax{ + pos: reader.pos, + expected: "valid escape sequence (‘\\\\’ or ‘\\\"’)", + found: fmt.Sprintf("‘\\%c’", r), + } + } + + sb.WriteRune(r) + default: + sb.WriteRune(r) + } + } +} + +func validNameStartChar(r rune) bool { + return r == ':' || r == '_' || + (r >= 'A' && r <= 'Z') || + (r >= 'a' && r <= 'z') || + (r >= 0x000C0 && r <= 0x000D6) || + (r >= 0x000D8 && r <= 0x000F6) || + (r >= 0x000F8 && r <= 0x002FF) || + (r >= 0x00370 && r <= 0x0037D) || + (r >= 0x0037F && r <= 0x01FFF) || + (r >= 0x0200C && r <= 0x0200D) || + (r >= 0x02070 && r <= 0x0218F) || + (r >= 0x02C00 && r <= 0x02FEF) || + (r >= 0x03001 && r <= 0x0D7FF) || + (r >= 0x0F900 && r <= 0x0FDCF) || + (r >= 0x0FDF0 && r <= 0x0FFFD) || + (r >= 0x10000 && r <= 0xEFFFF) +} + +func validNameChar(r rune) bool { + return validNameStartChar(r) || + r == '-' || r == '.' || r == '·' || + (r >= '0' && r <= '9') || + (r >= 0x0300 && r <= 0x036F) || + (r >= 0x203F && r <= 0x2040) +} diff --git a/parser/reader.go b/parser/reader.go new file mode 100644 index 0000000..22a8e6f --- /dev/null +++ b/parser/reader.go @@ -0,0 +1,94 @@ +package parser + +import ( + "bufio" + "fmt" + "io" + "unicode" + "unicode/utf8" +) + +type position struct { + col uint + row uint + prevCol uint +} + +func (p position) String() string { + return fmt.Sprintf("%d:%d", p.row+1, p.col) +} + +type reader struct { + r *bufio.Reader + pos position +} + +func (reader *reader) peekRune() (rune, error) { + bytes := make([]byte, 0, 4) + var err error + + // Peeking the next rune is annoying. We want to get the next rune + // which could be the next 1–4 bytes. Normally we can just call + // reader.r.Peek(4) but that doesn’t work here as the last rune in a + // file could be a 1–3 byte rune, so we would fail with an EOF error. + for i := 4; i > 0; i-- { + if bytes, err = reader.r.Peek(i); err == io.EOF { + continue + } else if err != nil { + return 0, err + } else { + rune, _ := utf8.DecodeRune(bytes) + return rune, nil + } + } + + return 0, io.EOF +} + +func (reader *reader) unreadRune() error { + if reader.pos.col == 0 { + reader.pos.col = reader.pos.prevCol + reader.pos.row-- + } else { + reader.pos.col-- + } + + return reader.r.UnreadRune() +} + +func (reader *reader) readRune() (rune, error) { + rune, _, err := reader.r.ReadRune() + if rune == '\n' { + reader.pos.prevCol = reader.pos.col + reader.pos.col = 0 + reader.pos.row++ + } else { + reader.pos.col++ + } + return rune, err +} + +func (reader *reader) readNonSpaceRune() (rune, error) { + if err := reader.skipSpaces(); err != nil { + return 0, err + } + + if r, err := reader.readRune(); err != nil { + return 0, err + } else { + return r, nil + } +} + +func (reader *reader) skipSpaces() error { + for { + if rune, err := reader.readRune(); err != nil { + if err == io.EOF { + return nil + } + return err + } else if !unicode.IsSpace(rune) { + return reader.unreadRune() + } + } +} |