aboutsummaryrefslogtreecommitdiff
diff options
context:
space:
mode:
authorThomas Voss <mail@thomasvoss.com> 2023-09-02 18:49:53 +0200
committerThomas Voss <mail@thomasvoss.com> 2023-09-08 23:16:19 +0200
commit643623dbecdc1ccb6f3ac77e4ebabdc6ca1d8d06 (patch)
treea9d6b50ad7263e792bc276f765ada74a5661a8b1
Genesis commit
-rw-r--r--.gitignore2
-rw-r--r--LICENSE14
-rw-r--r--formatter/formatter.go104
-rw-r--r--go.mod3
-rw-r--r--main.go37
-rw-r--r--parser/errors.go19
-rw-r--r--parser/parser.go321
-rw-r--r--parser/reader.go94
8 files changed, 594 insertions, 0 deletions
diff --git a/.gitignore b/.gitignore
new file mode 100644
index 0000000..d11f0ca
--- /dev/null
+++ b/.gitignore
@@ -0,0 +1,2 @@
+gsp
+test-data
diff --git a/LICENSE b/LICENSE
new file mode 100644
index 0000000..276994d
--- /dev/null
+++ b/LICENSE
@@ -0,0 +1,14 @@
+BSD Zero Clause License
+
+Copyright © 2023 Thomas Voss
+
+Permission to use, copy, modify, and/or distribute this software for any
+purpose with or without fee is hereby granted.
+
+THE SOFTWARE IS PROVIDED "AS IS" AND THE AUTHOR DISCLAIMS ALL WARRANTIES WITH
+REGARD TO THIS SOFTWARE INCLUDING ALL IMPLIED WARRANTIES OF MERCHANTABILITY
+AND FITNESS. IN NO EVENT SHALL THE AUTHOR BE LIABLE FOR ANY SPECIAL, DIRECT,
+INDIRECT, OR CONSEQUENTIAL DAMAGES OR ANY DAMAGES WHATSOEVER RESULTING FROM
+LOSS OF USE, DATA OR PROFITS, WHETHER IN AN ACTION OF CONTRACT, NEGLIGENCE OR
+OTHER TORTIOUS ACTION, ARISING OUT OF OR IN CONNECTION WITH THE USE OR
+PERFORMANCE OF THIS SOFTWARE.
diff --git a/formatter/formatter.go b/formatter/formatter.go
new file mode 100644
index 0000000..96e19d1
--- /dev/null
+++ b/formatter/formatter.go
@@ -0,0 +1,104 @@
+package formatter
+
+import (
+ "fmt"
+ "unicode"
+
+ "git.thomasvoss.com/gsp/parser"
+)
+
+var stringEscapes = map[rune]string{
+ '"': "&quot;",
+ '&': "&amp;",
+ '<': "&lt;",
+}
+
+func PrintHtml(ast parser.AstNode) {
+ if ast.Type == parser.Text {
+ fmt.Print(ast.Text)
+ return
+ }
+
+ if ast.Type == parser.Normal {
+ fmt.Printf("<%s", ast.Text)
+
+ // Classes are grouped together with ‘class="…"’, so we need
+ // special handling.
+ classes := []string{}
+ notClasses := []parser.Attr{}
+
+ for _, a := range ast.Attrs {
+ if a.Key == "class" {
+ classes = append(classes, a.Value)
+ } else {
+ notClasses = append(notClasses, a)
+ }
+ }
+
+ if len(classes) > 0 {
+ fmt.Printf(" class=\"%s", classes[0])
+ for _, c := range classes[1:] {
+ fmt.Printf(" %s", c)
+ }
+ fmt.Print("\"")
+ }
+
+ for _, a := range notClasses {
+ fmt.Printf(" %s", a.Key)
+ if a.Value == "" {
+ break
+ }
+ fmt.Print("=\"")
+ for _, r := range a.Value {
+ if v, ok := stringEscapes[r]; ok {
+ fmt.Print(v)
+ } else {
+ fmt.Printf("%c", r)
+ }
+ }
+ fmt.Print("\"")
+ }
+
+ fmt.Print(">")
+ }
+
+ if len(ast.Children) == 0 {
+ return
+ }
+
+ for i, n := range ast.Children {
+ if n.Type == parser.Text {
+ if i == 0 {
+ n.Text = trimLeftSpaces(n.Text)
+ }
+
+ if i == len(ast.Children)-1 {
+ n.Text = trimRightSpaces(n.Text)
+ }
+ }
+
+ PrintHtml(n)
+ }
+
+ if ast.Type == parser.Normal {
+ fmt.Printf("</%s>", ast.Text)
+ }
+}
+
+func trimLeftSpaces(s string) string {
+ i := 0
+ rs := []rune(s)
+ for i < len(s) && unicode.IsSpace(rs[i]) {
+ i++
+ }
+ return string(rs[i:])
+}
+
+func trimRightSpaces(s string) string {
+ rs := []rune(s)
+ i := len(rs) - 1
+ for i >= 0 && unicode.IsSpace(rs[i]) {
+ i--
+ }
+ return string(rs[:i+1])
+}
diff --git a/go.mod b/go.mod
new file mode 100644
index 0000000..5a7aafb
--- /dev/null
+++ b/go.mod
@@ -0,0 +1,3 @@
+module git.thomasvoss.com/gsp
+
+go 1.21.0
diff --git a/main.go b/main.go
new file mode 100644
index 0000000..3a39852
--- /dev/null
+++ b/main.go
@@ -0,0 +1,37 @@
+package main
+
+import (
+ "fmt"
+ "os"
+
+ "git.thomasvoss.com/gsp/formatter"
+ "git.thomasvoss.com/gsp/parser"
+)
+
+func main() {
+ if len(os.Args) != 2 {
+ fmt.Fprintf(os.Stderr, "Usage: %s file\n", os.Args[0])
+ os.Exit(1)
+ }
+ file, err := os.Open(os.Args[1])
+ if err != nil {
+ die(err)
+ }
+ defer file.Close()
+ ast, err := parser.ParseFile(file)
+ if err != nil {
+ die(err)
+ }
+
+ formatter.PrintHtml(ast)
+ fmt.Print("\n")
+}
+
+func die(strings ...any) {
+ fmt.Fprint(os.Stderr, os.Args[0])
+ for _, s := range strings {
+ fmt.Fprintf(os.Stderr, ": %v", s)
+ }
+ fmt.Fprint(os.Stderr, "\n")
+ os.Exit(1)
+}
diff --git a/parser/errors.go b/parser/errors.go
new file mode 100644
index 0000000..f6369be
--- /dev/null
+++ b/parser/errors.go
@@ -0,0 +1,19 @@
+package parser
+
+import "fmt"
+
+type invalidSyntax struct {
+ pos position
+ expected string
+ found string
+}
+
+func (e invalidSyntax) Error() string {
+ return fmt.Sprintf("Syntax error near %v; expected %s but found %s", e.pos, e.expected, e.found)
+}
+
+type eof struct{}
+
+func (e eof) Error() string {
+ return "Hit end-of-file while parsing. You’re probably missing a closing brace (‘}’) somewhere"
+}
diff --git a/parser/parser.go b/parser/parser.go
new file mode 100644
index 0000000..5b4d65c
--- /dev/null
+++ b/parser/parser.go
@@ -0,0 +1,321 @@
+package parser
+
+import (
+ "bufio"
+ "fmt"
+ "io"
+ "os"
+ "strings"
+ "unicode"
+)
+
+type nodeType uint
+
+const (
+ Normal nodeType = iota
+ Tagless
+ Text
+)
+
+type Attr struct {
+ Key string
+ Value string
+}
+
+type AstNode struct {
+ Type nodeType
+ Text string
+ Attrs []Attr
+ Children []AstNode
+}
+
+func ParseFile(file *os.File) (AstNode, error) {
+ r := reader{r: bufio.NewReader(file)}
+ return r.parseNode()
+}
+
+func (reader *reader) parseNode() (AstNode, error) {
+ if err := reader.skipSpaces(); err != nil {
+ return AstNode{}, err
+ }
+
+ if r, err := reader.peekRune(); err != nil {
+ return AstNode{}, err
+ } else if r == '-' {
+ return reader.parseText()
+ }
+
+ node := AstNode{}
+ if name, err := reader.parseNodeName(); err != nil {
+ return AstNode{}, err
+ } else {
+ node.Type = Normal
+ node.Text = name
+ }
+
+ if attrs, err := reader.parseAttrs(); err != nil {
+ return AstNode{}, err
+ } else {
+ node.Attrs = attrs
+ }
+
+ // The above call to reader.parseAttrs() guarantees that we have the ‘{’
+ // token.
+ if _, err := reader.readRune(); err != nil {
+ return AstNode{}, err
+ }
+
+ loop: for {
+ if err := reader.skipSpaces(); err != nil {
+ return AstNode{}, err
+ }
+
+ if r, err := reader.peekRune(); err == io.EOF {
+ return AstNode{}, eof{}
+ } else if err != nil {
+ return AstNode{}, err
+ } else if r == '}' {
+ break loop
+ }
+
+ if n, err := reader.parseNode(); err != nil {
+ return AstNode{}, err
+ } else {
+ node.Children = append(node.Children, n)
+ }
+ }
+
+ // The above loop guarantees that we have the ‘}’ token.
+ if _, err := reader.readRune(); err != nil {
+ return AstNode{}, err
+ }
+
+ return node, nil
+}
+
+func (reader *reader) parseNodeName() (string, error) {
+ var r rune
+ var err error
+
+ if err = reader.skipSpaces(); err != nil {
+ return "", err
+ }
+
+ sb := strings.Builder{}
+
+ if r, err = reader.readRune(); err != nil {
+ return "", err
+ } else if !validNameStartChar(r) {
+ return "", invalidSyntax{
+ pos: reader.pos,
+ expected: "node name",
+ found: fmt.Sprintf("invalid character ‘%c’", r),
+ }
+ }
+
+ for validNameChar(r) {
+ sb.WriteRune(r)
+ if r, err = reader.readRune(); err != nil {
+ return "", err
+ }
+ }
+
+ if err = reader.unreadRune(); err != nil {
+ return "", err
+ }
+ return sb.String(), nil
+}
+
+func (reader *reader) parseText() (AstNode, error) {
+ if _, err := reader.readRune(); err != nil {
+ return AstNode{}, err
+ }
+
+ sb := strings.Builder{}
+ node := AstNode{Type: Tagless}
+
+ loop: for {
+ r, err := reader.readRune()
+ if err != nil {
+ return AstNode{}, err
+ }
+ switch r {
+ case '}':
+ if err := reader.unreadRune(); err != nil {
+ return AstNode{}, err
+ }
+ break loop
+ case '@':
+ node.Children = append(node.Children, AstNode{
+ Type: Text,
+ Text: sb.String(),
+ })
+ sb = strings.Builder{}
+
+ n, err := reader.parseNode()
+ if err != nil {
+ return AstNode{}, err
+ }
+ node.Children = append(node.Children, n)
+ case '\\':
+ r, err = reader.readRune()
+ if err != nil {
+ return AstNode{}, err
+ }
+ if r != '\\' && r != '@' && r != '}' {
+ return AstNode{}, invalidSyntax{
+ pos: reader.pos,
+ expected: "valid escape sequence (‘\\\\’, ‘\\@’, or ‘\\}’)",
+ found: fmt.Sprintf("‘\\%c’", r),
+ }
+ }
+ fallthrough
+ default:
+ sb.WriteRune(r)
+ }
+ }
+
+ node.Children = append(node.Children, AstNode{
+ Type: Text,
+ Text: sb.String(),
+ })
+ return node, nil
+}
+
+func (reader *reader) parseAttrs() ([]Attr, error) {
+ attrs := make([]Attr, 0, 2)
+
+ loop: for {
+ if err := reader.skipSpaces(); err != nil {
+ return nil, err
+ }
+ r, err := reader.peekRune()
+ if err != nil {
+ return nil, err
+ }
+
+ attr := Attr{}
+ switch r {
+ case '{':
+ break loop
+ case '.':
+ fallthrough
+ case '#':
+ sym := r
+
+ // Skip ‘sym’
+ if _, err := reader.readRune(); err != nil {
+ return nil, err
+ }
+
+ if s, err := reader.parseNodeName(); err != nil {
+ return nil, err
+ } else {
+ attr.Value = s
+ if sym == '.' {
+ attr.Key = "class"
+ } else {
+ attr.Key = "id"
+ }
+ }
+ default:
+ if unicode.IsSpace(r) {
+ if err := reader.skipSpaces(); err != nil {
+ return nil, err
+ }
+ continue
+ }
+
+ if s, err := reader.parseNodeName(); err != nil {
+ return nil, err
+ } else {
+ attr.Key = s
+ }
+
+ if r, err := reader.readNonSpaceRune(); err != nil {
+ return nil, err
+ } else if r != '=' {
+ reader.unreadRune()
+ break
+ }
+
+ if s, err := reader.parseString(); err != nil {
+ return nil, err
+ } else {
+ attr.Value = s
+ }
+ }
+ attrs = append(attrs, attr)
+ }
+
+ return attrs, nil
+}
+
+func (reader *reader) parseString() (string, error) {
+ sb := strings.Builder{}
+
+ if r, err := reader.readNonSpaceRune(); err != nil {
+ return "", err
+ } else if r != '"' {
+ return "", invalidSyntax{
+ pos: reader.pos,
+ expected: "double-quoted string",
+ found: fmt.Sprintf("‘%c’", r),
+ }
+ }
+
+ for {
+ r, err := reader.readRune()
+ if err != nil {
+ return "", err
+ }
+
+ switch r {
+ case '"':
+ return sb.String(), nil
+ case '\\':
+ r, err := reader.readRune()
+ if err != nil {
+ return "", err
+ }
+
+ if r != '\\' && r != '"' {
+ return "", invalidSyntax{
+ pos: reader.pos,
+ expected: "valid escape sequence (‘\\\\’ or ‘\\\"’)",
+ found: fmt.Sprintf("‘\\%c’", r),
+ }
+ }
+
+ sb.WriteRune(r)
+ default:
+ sb.WriteRune(r)
+ }
+ }
+}
+
+func validNameStartChar(r rune) bool {
+ return r == ':' || r == '_' ||
+ (r >= 'A' && r <= 'Z') ||
+ (r >= 'a' && r <= 'z') ||
+ (r >= 0x000C0 && r <= 0x000D6) ||
+ (r >= 0x000D8 && r <= 0x000F6) ||
+ (r >= 0x000F8 && r <= 0x002FF) ||
+ (r >= 0x00370 && r <= 0x0037D) ||
+ (r >= 0x0037F && r <= 0x01FFF) ||
+ (r >= 0x0200C && r <= 0x0200D) ||
+ (r >= 0x02070 && r <= 0x0218F) ||
+ (r >= 0x02C00 && r <= 0x02FEF) ||
+ (r >= 0x03001 && r <= 0x0D7FF) ||
+ (r >= 0x0F900 && r <= 0x0FDCF) ||
+ (r >= 0x0FDF0 && r <= 0x0FFFD) ||
+ (r >= 0x10000 && r <= 0xEFFFF)
+}
+
+func validNameChar(r rune) bool {
+ return validNameStartChar(r) ||
+ r == '-' || r == '.' || r == '·' ||
+ (r >= '0' && r <= '9') ||
+ (r >= 0x0300 && r <= 0x036F) ||
+ (r >= 0x203F && r <= 0x2040)
+}
diff --git a/parser/reader.go b/parser/reader.go
new file mode 100644
index 0000000..22a8e6f
--- /dev/null
+++ b/parser/reader.go
@@ -0,0 +1,94 @@
+package parser
+
+import (
+ "bufio"
+ "fmt"
+ "io"
+ "unicode"
+ "unicode/utf8"
+)
+
+type position struct {
+ col uint
+ row uint
+ prevCol uint
+}
+
+func (p position) String() string {
+ return fmt.Sprintf("%d:%d", p.row+1, p.col)
+}
+
+type reader struct {
+ r *bufio.Reader
+ pos position
+}
+
+func (reader *reader) peekRune() (rune, error) {
+ bytes := make([]byte, 0, 4)
+ var err error
+
+ // Peeking the next rune is annoying. We want to get the next rune
+ // which could be the next 1–4 bytes. Normally we can just call
+ // reader.r.Peek(4) but that doesn’t work here as the last rune in a
+ // file could be a 1–3 byte rune, so we would fail with an EOF error.
+ for i := 4; i > 0; i-- {
+ if bytes, err = reader.r.Peek(i); err == io.EOF {
+ continue
+ } else if err != nil {
+ return 0, err
+ } else {
+ rune, _ := utf8.DecodeRune(bytes)
+ return rune, nil
+ }
+ }
+
+ return 0, io.EOF
+}
+
+func (reader *reader) unreadRune() error {
+ if reader.pos.col == 0 {
+ reader.pos.col = reader.pos.prevCol
+ reader.pos.row--
+ } else {
+ reader.pos.col--
+ }
+
+ return reader.r.UnreadRune()
+}
+
+func (reader *reader) readRune() (rune, error) {
+ rune, _, err := reader.r.ReadRune()
+ if rune == '\n' {
+ reader.pos.prevCol = reader.pos.col
+ reader.pos.col = 0
+ reader.pos.row++
+ } else {
+ reader.pos.col++
+ }
+ return rune, err
+}
+
+func (reader *reader) readNonSpaceRune() (rune, error) {
+ if err := reader.skipSpaces(); err != nil {
+ return 0, err
+ }
+
+ if r, err := reader.readRune(); err != nil {
+ return 0, err
+ } else {
+ return r, nil
+ }
+}
+
+func (reader *reader) skipSpaces() error {
+ for {
+ if rune, err := reader.readRune(); err != nil {
+ if err == io.EOF {
+ return nil
+ }
+ return err
+ } else if !unicode.IsSpace(rune) {
+ return reader.unreadRune()
+ }
+ }
+}