From 4626a2da056a1961ba4781d8472a22860539072f Mon Sep 17 00:00:00 2001 From: Thomas Voss Date: Sat, 2 Dec 2023 02:16:34 +0100 Subject: Add a post about Andy --- src/blog/index.gsp | 1 + src/blog/new-sh/index.gsp | 237 +++++++++++++++++++++++++++++++++++++ src/blog/new-sh/proc-diff.bash.gsp | 3 + src/blog/new-sh/proc-diff.sh.gsp | 6 + src/blog/new-sh/suf.an.gsp | 9 ++ src/blog/new-sh/suf.bash.gsp | 3 + src/style.css | 5 +- 7 files changed, 263 insertions(+), 1 deletion(-) create mode 100644 src/blog/new-sh/index.gsp create mode 100644 src/blog/new-sh/proc-diff.bash.gsp create mode 100644 src/blog/new-sh/proc-diff.sh.gsp create mode 100644 src/blog/new-sh/suf.an.gsp create mode 100644 src/blog/new-sh/suf.bash.gsp diff --git a/src/blog/index.gsp b/src/blog/index.gsp index f014029..cc61785 100644 --- a/src/blog/index.gsp +++ b/src/blog/index.gsp @@ -28,6 +28,7 @@ html lang="en" { p {-Posts:} ul { + m4_article(new-sh, {-Making a New Shell}) m4_article(extend, {-Extensible Scripting}) m4_article(nvim-ts, {-Hacking with Tree-Sitter on Neovim}) m4_article(gsp, {-Writing an HTML Preprocessor (feat. Tree-Sitter)}) diff --git a/src/blog/new-sh/index.gsp b/src/blog/new-sh/index.gsp new file mode 100644 index 0000000..b3470f5 --- /dev/null +++ b/src/blog/new-sh/index.gsp @@ -0,0 +1,237 @@ +html lang="en" { + head { m4_include(head.gsp) } + body { + header { + div { + h1 {-POSIX Pitfalls} + m4_include(nav.gsp) + } + + figure .quote { + blockquote { + p {= + Plan 9 argues that given a few carefully implemented abstractions it + is possible to produce a small operating system that provides + support for the largest systems on a variety of architectures and + networks. + } + } + figcaption {= + @cite{-The Use of Name Spaces in Plan 9} by Rob Pike et al. + } + } + } + + main { + h2 #prologue {-Prologue} + p {- + Since the moment I decided to take software development more seriously, + I have been absolutely enamored by the Shell @x-ref{-1} — the POSIX + shell to be more specific. The syntax is questionable at times, and the + available resources outside of the POSIX specification itself are + absolutely piss-poor as a result of the average *NIX user failing to + understand the difference between @code{-/bin/sh} and Bash @x-ref{-2}. + What @em{-really} drew me into the Shell was the powerful idea of + composability, and being able to combine simple tools to form a much + more powerful one in only a handful of lines. I talked more about this + @a href="/blog/extend" {-in my previous post}. + } + + p {- + It didn’t take long for me to find issues with my beloved + @code{-/bin/sh} however. Like it or not, the modern shells we all use + such as Bash and Zsh are all based on a that is approaching half a + century in age. It some things right — like the idea that you can use + loops and conditional statements in a pipeline — but it also got a lot + of things wrong, and these are things that we can improve on. The most + obvious deficiency in POSIX shells is the absolutely abhorrent handling + of whitespace. + } + + p {- + There have been quite a few alternatives to the POSIX shell made over + the years, although I find this to be an area that is shockingly + underdeveloped. If you’re reading this, I implore you to attempt to + design your own shell, no matter how simple. If you know how to make + one, you can experiment with new ideas! If you don’t, it’s a really + great learning experience, even if all your shell can do is spawn a + process. + } + + aside { + p data-ref="1" {- + My first ever ‘programming’ language that I learnt was actually + Windows Batch Script back on my elementary school laptops. + } + p data-ref="2" {- + If you see someone using Oh-My-Zsh unironically, you can rest assured + they know absolutely nothing about how their shell works. + } + } + + h2 #alternatives {-Alternatives to POSIX} + p {- + There are a few alternatives shells that have managed to garner a + respectable userbase. Fish, Powershell, Nushell, and Elvish just + to name the ones I can think of off the top of my head have all managed + to get a userbase while giving the finger to POSIX. I do believe that + ditching POSIX is a necessity to create a half-decent modern shell. I + have used Fish for close to a year before and it is probably my favorite + of the bunch; it tries to do its own thing with its own ideas, but it + still remains highly familiar for those coming from POSIX. + } + + p {- + I’m not entirely happy with Fish though. Fish and most of the other + modern shells all fall in my opinion to the classic trap of + over-engineering; they try to do too much and lose sight of what the + shell is fundamentally all about. The philosophy of the shell is to + manipulate streams by composing small- and simple tools, yet Fish + bundles in a whole host of builtins that add nothing while replacing + functionality that is already solved by existing tools. You can read + from @code{-/dev/urandom} to generate random numbers, yet Fish added a + @code{-random} builtin. You can do arbitrary-precision mathematics with + the Bc and Dc calculators, yet Fish added the @code{-math} builtin. The + same goes for the @code{-string} builtin. + } + + p {- + I do appreciate Fish though, because despite loosing sight of what a + shell should be (in my opinion), they still tried something new, and I + respect that. The same goes for all the other shells out there. Also + they definitely do get some things right. Using Fish as an example once + again, they decided to just remove the ‘?’ wildcard from globs entirely + — a move I completely support. + } + + p {- + All in all, while I don’t think any of these ‘mainstream’ alternatives + got it right, they are a great source of inspiration for me as to what I + should or should not do should I make my own shell. + } + + h2 #andy {-Introducing Andy} + p {- + Andy is a shell that I’ve been meaning to make for around 2 years now + which never materialized as a result of a lack of dedicated focus, and a + lack of a thought-out vision and -design. Part of why I’m writing this + in fact is to help me develop a proper vision for what I want Andy to + be; I find that discussing and writing about things helps a lot with + this kind of thing. + } + + p {- + I want the philosophy of Andy to reflect that of the original Bourne + Shell, and the less features the better — ‘less is more’ as Ludwig Mies + van der Rohe famously said. That being said, not all features should be + thrown to the wayside; if a feature is simple to understand, simple to + implement, and solves a real problem, there is no problem in adding it. + } + + p {- + Take process redirection for example. To properly compare the outputs + of two processes in POSIX shell, we need to do this whole rigmarole: + } + + figure { + pre {= m4_fmt_code(proc-diff.sh.gsp) } + } + + p {- + Now compare that to the Bash solution using process redirections: + } + + figure { + pre {= m4_fmt_code(proc-diff.bash.gsp) } + } + + p {- + The Bash solution is more readable, and far easier to understand at a + glance. It’s also a lot better functionally in that it doesn’t require + you to need to need to manually cleanup your temporary file (something + which might fail if your script receives certain signals). It’s more + efficient too; instead of waiting for @code{-cmd2} to write all its + output to a temporary file for us to read, both @code{-cmd1} and + @code{-cmd2} are run in parallel to each other. This can obviously be + solved using named pipes, but now we’re adding more complexity to our + application. + } + + p {- + There are a few fundamental ‘problems’ I want to fix in Andy. The first + is whitespace handling; safe POSIX shell scripts will contain almost as + many quotation marks to avoid word-splitting as Lisp programs contain + parenthesis. This is an absolute must, under no circumstance should + strings be expanding into even more strings without the explicit consent + of the user; it’s a recipe for disaster and it’s the shell-equivelant of + the null-pointer-exception. + } + + p {- + The second major fix I want to make is in terms of datatypes. For this + I took major inspiration from Plan 9’s Rc shell. While the fundamental + datatype of the shell is the @em{-stream} — which is well-represented by + the string — we very often are working with @em{-lists} of items. Lists + of filenames, lists of regular expression matches, etc. I want lists to + be a first-class citizen of Andy. + } + + p {- + Outside of these major changes, there are other minor changes I want to + make. I want to use a C-style syntax similar (but even simpler) than + that of Rc. The whole ‘if-then’ and ‘esac’ business is both overly + verbose for a language that needs to work well in a m4_abbr(REPL), and + just plain ugly. A friend of mine even suggested that the reason the + Bourne Shell decided to call them ‘case-statements’ instead of + ‘switch-statements’ like every other language was that nobody would + remember how to spell ‘hctiws’. + } + + p {- + I also want to allow functions to take named arguments, and to + completely remove the need for newline-escaping, allowing for readable + multiline pipelines. + } + + p {- + In ‘The shell and its crappy handling of whitespace’, the author Mark + Dominus offers an example piece of shell script to rename @code{-*.jpeg} + files to @code{-*.jpg}. Take note of all the quoting that is required + in his example in order to properly handle filenames with spaces, as + well as the seemingly useless ‘do’ keyword: + } + + figure { + pre .sh {= m4_fmt_code(suf.bash.gsp) } + } + + p {- + Here is how I envision such a solution in Andy: + } + + figure { + pre .sh {= m4_fmt_code(suf.an.gsp) } + } + + p {- + Notice the complete lack of quotes in the Andy solution, because it + lacks the retardation of automatic word-expansion. The syntax is also + minimal, fast to type, and visually out of the way. C-style braces work + well here; they’re only one character each. We can also completely + remove the ‘do’ keyword, and potentially even make the binding of an + iteration variable optional — I’m not sure about that yet though. + } + + p {- + I’m currently in the process of actively developing Andy, and I will + probably make another post on here soon detailing the current progress + and features of the shell. I hope to soon be able to use Andy as my + primary shell; both for scripting and interactive use. + } + } + + hr{} + + footer { m4_footer } + } +} diff --git a/src/blog/new-sh/proc-diff.bash.gsp b/src/blog/new-sh/proc-diff.bash.gsp new file mode 100644 index 0000000..0711271 --- /dev/null +++ b/src/blog/new-sh/proc-diff.bash.gsp @@ -0,0 +1,3 @@ +@span .cmt {-#!/bin/bash} + +@span .fn {-diff} @span .op {-<(}@span .fn {-cmd1}@span .op {-) <(}@span .fn {-cmd2}@span .op {-)} diff --git a/src/blog/new-sh/proc-diff.sh.gsp b/src/blog/new-sh/proc-diff.sh.gsp new file mode 100644 index 0000000..71a7343 --- /dev/null +++ b/src/blog/new-sh/proc-diff.sh.gsp @@ -0,0 +1,6 @@ +@span .cmt {-#!/bin/sh} + +tmp@span .op {-=$(}@span .fn {-mktemp}@span .op {-)} +@span .fn {-trap} @span .str {-"rm -f $tmp"} EXIT +@span .fn {-cmd2} @span .op {->}$tmp +@span .fn {-cmd1} @span .op {-|} @span .fn {-diff} - $tmp diff --git a/src/blog/new-sh/suf.an.gsp b/src/blog/new-sh/suf.an.gsp new file mode 100644 index 0000000..f4589e7 --- /dev/null +++ b/src/blog/new-sh/suf.an.gsp @@ -0,0 +1,9 @@ +@code .kw {-for} i @code .kw {-in} *.jpeg @code .op {-{} + @code .fn {-mv} $i @code .op {-`{}@code .fn {-suf} $i@code .op {-\}}.jpg +@code .op {-\}} + +@code .cmt {-# maybe have an implicit iteration variable?} + +@code .kw {-for} *.jpeg @code .op {-{} + @code .fn {-mv} $_ @code .op {-`{}@code .fn {-suf} $_@code .op {-\}}.jpg +@code .op {-\}} diff --git a/src/blog/new-sh/suf.bash.gsp b/src/blog/new-sh/suf.bash.gsp new file mode 100644 index 0000000..b300fbe --- /dev/null +++ b/src/blog/new-sh/suf.bash.gsp @@ -0,0 +1,3 @@ +@span .kw {-for} i @span .kw {-in} *.jpeg@span .op {-;} @span .kw {-do} + @span .fn {-mv} @span .str {-"$i"} @span .str {-"}@span .op {-$(}@span .fn {-suf} @span .str {-"$i"}@span .op {-)}@span .str {-"}.jpg @span .cmt {-# three sets of quotes} +@span .kw {-done} diff --git a/src/style.css b/src/style.css index 842e95d..a418a11 100644 --- a/src/style.css +++ b/src/style.css @@ -64,7 +64,7 @@ pre, code, kbd, samp { pre > code { color: var(--fg); } :not(pre) > code { white-space: nowrap; } -pre.js { tab-size: 4; } +pre.js, pre.sh { tab-size: 4; } pre.pug, pre.gsp { tab-size: 2; } h1 { @@ -98,6 +98,9 @@ blockquote, blockquote + figcaption { font-style: italic; } margin-block: .5rem; } +.quote cite::before { content: '‘'; } +.quote cite::after { content: '’'; } + figure:not(.quote) figcaption { text-align: center; text-decoration: underline; -- cgit v1.2.3