From 4f9a44193b25127dd036ce5f2bf53feb31b53d03 Mon Sep 17 00:00:00 2001 From: Thomas Voss Date: Tue, 5 Mar 2024 16:37:18 +0100 Subject: Add the GSP/TS article --- src/blog/gsp/example.gsp | 11 ++ src/blog/gsp/fmt-code-example.gsp | 4 + src/blog/gsp/fmt-code.m4 | 13 ++ src/blog/gsp/folds.scm | 1 + src/blog/gsp/grammar.js | 9 ++ src/blog/gsp/highlights.scm | 7 + src/blog/gsp/index.gsp | 269 ++++++++++++++++++++++++++++++++++++++ src/blog/gsp/markdown.md | 3 + src/blog/gsp/pug.pug | 8 ++ src/blog/index.gsp | 2 +- 10 files changed, 326 insertions(+), 1 deletion(-) create mode 100644 src/blog/gsp/example.gsp create mode 100644 src/blog/gsp/fmt-code-example.gsp create mode 100644 src/blog/gsp/fmt-code.m4 create mode 100644 src/blog/gsp/folds.scm create mode 100644 src/blog/gsp/grammar.js create mode 100644 src/blog/gsp/highlights.scm create mode 100644 src/blog/gsp/index.gsp create mode 100644 src/blog/gsp/markdown.md create mode 100644 src/blog/gsp/pug.pug (limited to 'src/blog') diff --git a/src/blog/gsp/example.gsp b/src/blog/gsp/example.gsp new file mode 100644 index 0000000..b927d8f --- /dev/null +++ b/src/blog/gsp/example.gsp @@ -0,0 +1,11 @@ +html lang="en" { + body { + p {- Hello, World!} + + ul { + li {a href="#" #home {-Home Page}} + li {a href="#" #about {-About Me}} + li {a href="#" #links {-Fun Links}} + } + } +} diff --git a/src/blog/gsp/fmt-code-example.gsp b/src/blog/gsp/fmt-code-example.gsp new file mode 100644 index 0000000..a56e415 --- /dev/null +++ b/src/blog/gsp/fmt-code-example.gsp @@ -0,0 +1,4 @@ +p {= + Here is my code example: +} +figure { ‘‘FMT_CODE(example.c)’’ } diff --git a/src/blog/gsp/fmt-code.m4 b/src/blog/gsp/fmt-code.m4 new file mode 100644 index 0000000..b59abb2 --- /dev/null +++ b/src/blog/gsp/fmt-code.m4 @@ -0,0 +1,13 @@ +‘‘m4_dnl Set the quote characters to something that conflicts less +m4_changequote(‘‘,’’) + +m4_define(FMT_CODE, ‘‘pre .code-sample {= + m4_esyscmd(sed ' + s/[@\\}]/\\&/g + m4_regexp($1, .*\.diff$, + /^+/s/.*/@ins{-&}/ + /^-/s/.*/@del{-&}/ + ) + s/.*/@code{-&}/ + ' $(dirname 'm4___file__')/$1) +}’’)’’ diff --git a/src/blog/gsp/folds.scm b/src/blog/gsp/folds.scm new file mode 100644 index 0000000..b905c94 --- /dev/null +++ b/src/blog/gsp/folds.scm @@ -0,0 +1 @@ +[(node) (attribute_list)] @fold diff --git a/src/blog/gsp/grammar.js b/src/blog/gsp/grammar.js new file mode 100644 index 0000000..13393a1 --- /dev/null +++ b/src/blog/gsp/grammar.js @@ -0,0 +1,9 @@ +{ + node: $ => seq( + $.node_name, + optional($.attribute_list), + '{', optional($.node_body), '}', + ), + + node_name: $ => /[a-zA-Z:_][a-zA-Z0-9:_\-​.]*​/, +} diff --git a/src/blog/gsp/highlights.scm b/src/blog/gsp/highlights.scm new file mode 100644 index 0000000..a8d08a9 --- /dev/null +++ b/src/blog/gsp/highlights.scm @@ -0,0 +1,7 @@ +[">" "-" "=" "@"] @operator +["{" "}"] @tag.delimiter +(node_name) @tag +(attribute_value) @string +[(attribute_name) + (class_shorthand) + (id_shorthand)] @tag.attribute diff --git a/src/blog/gsp/index.gsp b/src/blog/gsp/index.gsp new file mode 100644 index 0000000..5fae118 --- /dev/null +++ b/src/blog/gsp/index.gsp @@ -0,0 +1,269 @@ +html lang="en" { + head { HEAD } + body { + header { + div .head { + h1 {-Never Settle For Trash} + INCLUDE(nav.gsp) + } + + figure .quote { + blockquote { + p {= + 🚨🚨 BREAKING: TYPESCRIPT SNATCHES DEFEAT FROM THE JAWS OF VICTORY + 🚨🚨 + } + } + figcaption {-Lane Wagner} + } + } + + main { + h2 #simple {-Simplicity and Abstraction} + p {= + I like my software simple and devoid of useless abstraction. I often + find myself in positions where I’m searching for scissors to cut a sheet + of paper, and am instead greeted with a chainsaw. The urge to + over-complicate and -abstract your software can be strong; I often see + people who preach simple software writing programs to solve basic + problems that have 30 different command-line flags, and require a 50 + page PDF explaining its operation. + } + + p {= + Why do I mention all of this? Well as anyone who’s ever tried their + hand at web-development knows, websites are written in HTML. I wish I + could say that’s a good thing, but as anyone who’s ever looked at HTML + before would know, that language is — to put it lightly — really not + great. It’s extremely verbose, and awkward to write- and edit (angle + brackets are not the easiest-to-reach keys on the keyboard). + } + + p {= + So what’s the solution? The most obvious to me is to create a nicer to + read- and write language which I can easily transpile down to HTML. + Ideally the CLI is very simple and works on the standard input and + -output like all good UNIX utilities. I should be able to transpile my + site by simply running ‘@code{-cmd in.xyz out.html}’, where my input + reflects the structure of my output with nicer, less-polluting syntax. + } + + p {= + The kind of tool I am describing here is what I imagine the ideal + solution to be. A @em{-simple} tool with a @em{-simple} function. It + takes an input language and produces an output language. There is also + minimal abstraction. The input language should reflect the structure of + HTML, because that’s exactly what we’re trying to output. It makes + little sense to create a fundamentally different language when HTML not + only does a good job at defining a websites structure, but sticking + close to the language we are targeting just makes everyone’s life easier + in every way. + } + + h2 #sucks {-Most Software Sucks} + p {= + So with my ideal solution being a simple language with a simple CLI that + sticks close to the structure of HTML, let’s take a look at what other + people have come up with: + } + + figure { FMT_CODE(markdown.md) } + + p {= + Oh no. + } + + p {= + Now most readers probably had the initial reaction of ‘@em{-What’s wrong + with Markdown?}’. To answer your question: @em{-everything}. The issue + I have with these highly-prevalent Markdown-based replacements for HTML + is that they ignore the fundamental fact that HTML and Markdown are + @em{-not} compatible languages with each other. HTML is designed around + making websites (with the added autism of XML). It gives us things like + semantic tags for describing input forms, navigation bars, figures, and + more. With the addition of classes and IDs, we can even style two + paragraphs on the same page in different ways. This is fundamentally + not possible in Markdown. If we ignore the fact that Markdown is just + poorly designed, it offers us basically none of what we need to make an + even slightly-complex static page as it’s not meant for website-design + but simply to be a readable plain-text format you can use for + documentation or your email or something. + } + + p {= + How do you make your navigation bar in Markdown? Or style two + paragraphs differently? You can’t. Some try to get around this by + adding extensions to the Markdown language, but they never manage to + cover all bases. Another problem I @em{-always} come across when trying + to use Markdown-to-HTML tools is code blocks. I always make sure to use + tabs for indentation in my code blocks instead of spaces, so that I can + vary the tab-width based on the screen size of the reader. You + obviously can’t do this with spaces since the fundamental (and stupid) + purpose of space-indentation is to force everyone to view code with the + same indentation, which sucks for users on mobile when you have nice + large indents. To this day I have yet to find a Markdown-to-HTML + converter that will let me have tab indents without error-prone + post-processing of the generated HTML. + } + + p {= + Ok well… there are other ways of generating HTML; one rather popular + option is Pug: + } + + figure { FMT_CODE(pug.pug) } + + p {= + While Pug certainly hits the ‘maintain the same structure’ point right + on the head, it fails in one very crucial area — it’s a JavaScript + library @em{-only}, and so requires a whole JS setup simply to transpile + your site to HTML. What a bummer. There is also a second issue which + is that it uses an indentation-sensitive syntax. Normally I am actually + a fan of languages like this — such as Python — but in the case of a + markup language like Pug, this is terrible as it makes macros and + templating with tools such as @code{-m4} exceptionally difficult. Pug + @em{-does} offer templating faculties via JavaScript, but I really try + to minimize the amount of JavaScript I need to write whenever possible. + } + + h2 #solution {-My Solution} + + p {= + So with no existing tools fitting my entry criteria, I did the only + reasonable next thing and made my own tool, I am a programmer after all. + It tries to stick to the format of HTML as closely as possible while + offering an @em{-extremely} easy-to-use transpiler. It also has no + added bullshit like filters, templates, etc. If you want macros, use a + macro-processor like @code{-m4}. I called it GSP because everyone knows + that German Shorthaired Pointers are better than pugs. Here is a quick + syntax example: + } + + figure { FMT_CODE(example.gsp) } + + p {= + Here you can see almost all of GSP. The document follows the same + structure as HTML, but thanks to the use of braces instead of opening- + and closing tags, the syntax is far less verbose and easier to read. + The language also provides shorthands for classes and IDs through + CSS-selector syntax. + } + + p {= + Templating and macros are also very easy via macro processors thanks to + the use of braces instead of whitespace-based scoping. As an example, I + like to have code samples in articles like this one — but I like to have + the code in an external file. To achieve this I use the following m4 + macro to insert the named file verbatim into my document with delimiters + escaped. As a bonus it also syntax-highlights diffs: + } + + figure { FMT_CODE(fmt-code.m4) } + + p {= + It may look a bit confusing, but at the end of the day it’s just a + glorified wrapper around Sed. I use this macro as such in my GSP + documents that comprise this site: + } + + figure { FMT_CODE(fmt-code-example.gsp) } + + p {= + The transpiler itself is also incredibly easy to use, something + JavaScript developers would never be able to comprehend. In order to + transpile a GSP document into an HTML document, I simply run + ‘@code{-gsp index.gsp >index.html}’. Yep, that’s it. If I want to + expand macros too, the command still remains simple with it just being + ‘@code{-m4 index.gsp | gsp >index.html}’. + } + + h2 #syntax {-Syntax Highlighting} + + p {= + One problem that I came across writing GSP was the lack of syntax + highlighting. It can seem not so important, but syntax highlighting is + crucial for helping you quickly identify different syntax elements. The + awesome solution I found for this ended being Tree-Sitter. Tree-Sitter + is a parser-generator that various text editors such as Vim and Emacs + can integrate with to offer efficient- and high quality syntax + highlighting, amongst other features such as syntax-aware code folding + and movement. + } + + p {= + After a bit of research and reading the documentation, I found that + creating your own parsers is actually really easy. You effectively just + define a JavaScript object that describes the language grammar, and a C + parser is generated from that. If you’re interested, you can find the + GSP parser @NEWTAB href="https://git.sr.ht/~mango/tree-sitter-gsp" + {-here}. To give you a bit of an idea of just how simple a Tree-Sitter + parser is, here’s a simplified example of how you describe the + definition of a node, and a node name: + } + + figure { FMT_CODE(grammar.js) } + + p {= + As you can see, the grammar syntax is extremely simple. You simply + define your core syntax elements via regular expressions, and then + compose them together via helper functions such as @code{-optional} and + @code{-repeat} to define the full structure of your language. + } + + p {= + This isn’t enough though. We now have a parser for our language that + can create a syntax tree that our editor can take advantage of, but our + editor still doesn’t know what each node actually @em{-is} so that it + can be syntax highlighted properly. Tree Sitter solves this through a + query file written in Scheme where we can describe how to syntax + highlight our AST. This is what the configuration for GSP looks like: + } + + figure { + figcaption { + code {-queries/highlights.scm} + } + FMT_CODE(highlights.scm) + } + + p {= + As you can see, this is all really simple stuff, which is what I love so + much about Tree Sitter — it’s just so easy! With these basic + annotations your editor knows that attribute values should be + highlighted like strings, braces like tag delimiters, etc. In a similar + vein, writing a query to describe code-folding is really easy: + } + + figure { + figcaption { + code {-queries/folds.scm} + } + FMT_CODE(folds.scm) + } + + h2 #takeaway {-The Takeaway} + + p {= + So what’s the takeaway? I think it’s that when you have a problem, + often times the best solution is not to fundamentally redesign something + from the ground up, or to completely change the way a system works, but + to instead identify the specific thing that annoys you and find a fix + for it. I thought that the syntax of HTML was annoying and bad, so I + found a solution for the syntax, while keeping the core structure the + same. In the same line of thinking, try not to over-abstract — I’m + looking at you, Java developers. Abstraction often leads to + exponentially increased complications the moment we want to do anything + different or out of the ordinary, so unless you can find a really nice + abstraction that doesn’t really make anyone’s life harder, try to avoid + them when you can. + } + + p {= + If you’re interested in GSP, you can find the git repository over at + @NEWTAB href="https://git.sr.ht/~mango/gsp" {-Sourcehut}. + } + } + + footer { FOOT } + } +} diff --git a/src/blog/gsp/markdown.md b/src/blog/gsp/markdown.md new file mode 100644 index 0000000..aa4f169 --- /dev/null +++ b/src/blog/gsp/markdown.md @@ -0,0 +1,3 @@ +# Markdown 4 Lyfe + +Welcome to my website written in Hugo! diff --git a/src/blog/gsp/pug.pug b/src/blog/gsp/pug.pug new file mode 100644 index 0000000..7090786 --- /dev/null +++ b/src/blog/gsp/pug.pug @@ -0,0 +1,8 @@ +div + p + | Hello world! This is a + | multiline paragraph. + ul + li foo + li bar + li baz diff --git a/src/blog/index.gsp b/src/blog/index.gsp index 4d4aab8..7e53697 100644 --- a/src/blog/index.gsp +++ b/src/blog/index.gsp @@ -32,9 +32,9 @@ html lang="en" { m4_dnl ARTICLE(andy-val, {-Values in Andy}) m4_dnl ARTICLE(new-sh, {-Making a New Shell}) m4_dnl ARTICLE(extend, {-Extensible Scripting}) - m4_dnl ARTICLE(gsp, {-Writing an HTML Preprocessor (feat. Tree-Sitter)}) m4_dnl ARTICLE(fw-ec, {-Patching My Laptop’s Embedded Controller}) + ARTICLE(gsp, {-Writing an HTML Preprocessor (feat. Tree-Sitter)}) ARTICLE(nvim-ts, {-Hacking with Tree-Sitter on Neovim}) } } -- cgit v1.2.3