summaryrefslogtreecommitdiffhomepage
diff options
context:
space:
mode:
authorThomas Voss <mail@thomasvoss.com> 2024-01-24 21:53:27 +0100
committerThomas Voss <mail@thomasvoss.com> 2024-01-24 21:53:27 +0100
commit7a513031822010612ef1fd0104cb339854d741d3 (patch)
treed5ee5351d355cc06b4702a2a8813f9506aa2dfb8
parent754154a71af3f753db758744fe52e8409ec5011f (diff)
Add new blog post
-rw-r--r--src/blog/grab/answer.sh.gsp2
-rw-r--r--src/blog/grab/example-1.sh.gsp16
-rw-r--r--src/blog/grab/example-2.sh.gsp4
-rw-r--r--src/blog/grab/example-3.sh.gsp4
-rw-r--r--src/blog/grab/example.vue.gsp7
-rw-r--r--src/blog/grab/g.pat.gsp1
-rw-r--r--src/blog/grab/grep.sh.gsp1
-rw-r--r--src/blog/grab/index.gsp276
-rw-r--r--src/blog/grab/x.pat.gsp1
-rw-r--r--src/blog/index.gsp1
-rw-r--r--src/style.css17
11 files changed, 328 insertions, 2 deletions
diff --git a/src/blog/grab/answer.sh.gsp b/src/blog/grab/answer.sh.gsp
new file mode 100644
index 0000000..1f634d7
--- /dev/null
+++ b/src/blog/grab/answer.sh.gsp
@@ -0,0 +1,2 @@
+@span .p {-thomas} @span .pp {-~} @span .p {-〉}@span .fn {-grab} @span .str {-'x/<date-input.*?>/ g/‌\\bbrowser\\b/ h//'} foo
+…
diff --git a/src/blog/grab/example-1.sh.gsp b/src/blog/grab/example-1.sh.gsp
new file mode 100644
index 0000000..9944003
--- /dev/null
+++ b/src/blog/grab/example-1.sh.gsp
@@ -0,0 +1,16 @@
+@span .p {-thomas} @span .pp {-~} @span .p {-〉}@span .fn {-cat} foo
+Hello world, this is
+a paragraph.
+
+This is also a paragraph
+but it contains doubled
+doubled words.
+@span .p {-thomas} @span .pp {-~} @span .p {-〉}@span .fn {-grab} @span .str {-'x/[^\\n].‌+?(?=\\n\\n|$)/ g/(\\b\\w+\\b)\\s+\\1/'} foo
+This is also a paragraph
+but it contains doubled
+doubled words.
+@span .p {-thomas} @span .pp {-~} @span .p {-〉}@span .cmt {-# Just like grep, you can display match positions}
+@span .p {-thomas} @span .pp {-~} @span .p {-〉}@span .fn {-grab} -f @span .str {-'…'} foo
+@span .grab-file {-foo}@span .grab-sep {-:}@span .grab-pos {-4}@span .grab-sep {-:}@span .grab-pos {-1}@span .grab-sep {-:}This is also a paragraph
+but it contains doubled
+doubled words.
diff --git a/src/blog/grab/example-2.sh.gsp b/src/blog/grab/example-2.sh.gsp
new file mode 100644
index 0000000..5e55a4d
--- /dev/null
+++ b/src/blog/grab/example-2.sh.gsp
@@ -0,0 +1,4 @@
+@span .p {-thomas} @span .pp {-~} @span .p {-〉}@span .fn {-grab} @span .str {-'x/[^\\n].‌+?(?=\\n\\n|$)/ g/(\\b\\w+\\b)\\s+\\1/ h/(\\b\\w+\\b)\\s+\\1/'} foo
+This is also a paragraph
+but it contains @span .grab-hl {-doubled}
+@span .grab-hl {-doubled} words.
diff --git a/src/blog/grab/example-3.sh.gsp b/src/blog/grab/example-3.sh.gsp
new file mode 100644
index 0000000..d6076b9
--- /dev/null
+++ b/src/blog/grab/example-3.sh.gsp
@@ -0,0 +1,4 @@
+@span .p {-thomas} @span .pp {-~} @span .p {-〉}@span .fn {-grab} @span .str {-'x/[^\\n].‌+?(?=\\n\\n|$)/ g/(\\b\\w+\\b)\\s+\\1/ h//'} foo
+This is also a paragraph
+but it contains @span .grab-hl {-doubled}
+@span .grab-hl {-doubled} words.
diff --git a/src/blog/grab/example.vue.gsp b/src/blog/grab/example.vue.gsp
new file mode 100644
index 0000000..58975c6
--- /dev/null
+++ b/src/blog/grab/example.vue.gsp
@@ -0,0 +1,7 @@
+<@span .fn {-date-input}
+ @span .var {-v-model}@span .op {-=}@span .str {-"date"}
+ @span .var {-class}@span .op {-=}@span .str {-"foo bar"}
+ @span .var {-:browser}@span .op {-=}@span .str {-"true"}
+ @span .var {-:placeholder}@span .op {-=}@span .str {-"today"}
+ @span .var {-required}
+/>
diff --git a/src/blog/grab/g.pat.gsp b/src/blog/grab/g.pat.gsp
new file mode 100644
index 0000000..cebea99
--- /dev/null
+++ b/src/blog/grab/g.pat.gsp
@@ -0,0 +1 @@
+@span .fn {-x}@span .op {-/}[^\\n].‌+?(?=\\n\\n|$)@span .op {-/} @span .fn {-g}@span .op {-/}(\\b\\w+\\b)\\s+\\1@span .op {-/}
diff --git a/src/blog/grab/grep.sh.gsp b/src/blog/grab/grep.sh.gsp
new file mode 100644
index 0000000..1f363a7
--- /dev/null
+++ b/src/blog/grab/grep.sh.gsp
@@ -0,0 +1 @@
+$ @span .fn {-git} grep @span .str {-'<date-input.*browser.*‌>'}
diff --git a/src/blog/grab/index.gsp b/src/blog/grab/index.gsp
new file mode 100644
index 0000000..b9d0d84
--- /dev/null
+++ b/src/blog/grab/index.gsp
@@ -0,0 +1,276 @@
+html lang="en" {
+ head { m4_include(head.gsp) }
+ body {
+ header {
+ div {
+ h1 {-Reinvent The Wheel!}
+ m4_include(nav.gsp)
+ }
+
+ figure .quote {
+ blockquote {
+ p {=
+ You have to do what must be done. Nobody is going to ask you, “why
+ didn’t you make it?”. It’s either do it or not. Do not think about
+ what you’re feeling, do it no matter what.
+ }
+ }
+ figcaption {-Haroon Khan}
+ }
+ }
+
+ main {
+ h2 #story {-Story of a Software Engineer}
+ p {-
+ It was your average Wednesday afternoon, and I was working my job. My
+ specific task on this day was quite simple: document our custom Vue
+ components that make up most of our products m4_abbr(UI).
+ }
+
+ p {-
+ This should be a relatively easy task and for the most part it was, but
+ I had an issue. Some of these components had some @em{-really} obscure
+ properties that could influence their behavior, and seeing as much of
+ the codebase was written 10 years ago by utter idiots, the code
+ implementing these properties is @em{-really} hard to read.
+ }
+
+ p {-
+ I decided that it would be quite a bit easier to instead of trying to
+ study the @em{-definitions} of these properties, to try to study the
+ @em{-usage} of these properties. But how do I find them? Our codebase
+ is hundreds of thousands of lines of code, and these properties have
+ very generic names such as ‘@em{-browser}’. Additionally while the
+ components are easy to search for, they’re used in hundreds of places
+ and such properties may only be used once or twice.
+ }
+
+ p {-
+ The solution? I thought it would be the trusty tool in every hackers
+ toolbelt: @code{-grep}.
+ }
+
+ h2 #downfall {-The Downfall of Grep}
+ p {-
+ I thought that @code{-grep} would be my saviour. The tool that would
+ answer the call to find me the usages I so desired. So I whipped that
+ baby out and went straight to work:
+ }
+
+ figure {
+ pre {= m4_fmt_code(grep.sh.gsp) }
+ }
+
+ p {-
+ You can probably tell from the fact I’m writing this post that this did
+ not work. If you’ve ever worked with Vue or something similar, you
+ might even be able to figure out why. For those unfamiliar with the
+ frontend (you’re a treasure that must be preserved), allow me to show
+ you something that is all too common in a Vue codebase:
+ }
+
+ figure {
+ pre .vue {= m4_fmt_code(example.vue.gsp) }
+ }
+
+ p {-
+ The issue here is clear: the property we’re searching for (‘browser’) is
+ on an entirely different line from the component we’re searching for
+ (‘@code{-<date-input>}’). It’s not enough to search for just the
+ component because it’s used everywhere but only a few rare usages
+ interest me, and it’s not enough to search for just the attribute
+ because many different components have attributes of the same name (and
+ @em{-no} they don’t have the same behavior; the codebase is shit).
+ }
+
+ p {-
+ What I need is a tool that will let me search for patterns that span
+ multiple lines.
+ }
+
+ h2 #grab {-Introducing Grab}
+ figure .quote {
+ blockquote {
+ p {=
+ The current UNIX® text processing tools are weakened by the built-in
+ concept of a line. There is a simple notation that can describe the
+ ‘shape’ of files when the typical array-of-lines picture is
+ inadequate. That notation is regular expressions. Using regular
+ expressions to describe the structure in addition to the contents of
+ files has interesting applications, and yields elegant methods for
+ dealing with some problems the current tools handle clumsily. When
+ operations using these expressions are composed, the result is
+ reminiscent of shell pipelines.
+ }
+ }
+ figcaption {-Rob Pike}
+ }
+
+ p {-
+ That quote is from the abstract of @cite {-Structural Regular
+ Expressions}, a paper written by the one and only Rob Pike back in
+ 1987. It describes an idea by which we stop assuming that all data is
+ organized in lines, and instead use regular expressions to define the
+ shapes comprising our data.
+ }
+
+ p {-
+ I actually had read this paper some years ago and it had always sat in
+ the back of my mind. I had actually toyed around in the past with an
+ implementation of @code{-grep} that wasn’t strictly line-oriented, but
+ it was very bare-bones, and lacked basic faculties such as reporting the
+ positions of matches, something I desperately needed.
+ }
+
+ p {-
+ So over the following few days I made major changes, rewrote lots of the
+ code, and overall turned my tool — @code{-grab} — into a staple part of
+ my hackers toolbelt.
+ }
+
+ h2 #how {-How Grab Finds Text}
+ p {-
+ If you’re familiar with the UNIX environment, you’re probably used to
+ querying text with tools such as @code{-sed} and @code{-awk} using
+ regular expressions. These are the same regular expressions we as
+ programmers all know and love, but with one important — yet often
+ overlooked — characteristic: you cannot match the newline.
+ }
+
+ p {-
+ The @code{-grab} utility moves away from this limiting paradiagm; the
+ newline is treated no differently from another other character you want
+ to match. Want to match an entire paragraph of text? The pattern is as
+ simple as ‘@code{-[^\\n].‌+?(?=\\n\\n|$)}’. It may look
+ complicated if you’re new to regular expressions — m4_abbr(PCRE)s to be
+ specific — but it’s really quite simple. You just match a non-newline
+ character, and then as many characters as possible until reaching either
+ a double newline, or the end of input.
+ }
+
+ p {-
+ On its own this isn’t too amazing though. The great thing of
+ @code{-grep} is that it doesn’t just show you matches, but it shows you
+ them in the context of a complete line. @code{-grab} solves this in the
+ same way described in Rob Pike’s paper: chaining operations.
+ }
+
+ p {-
+ Say we want to iterate not over lines but over paragraphs. We can use
+ the following @em{-pattern}:
+ }
+
+ figure {
+ pre {= m4_fmt_code(x.pat.gsp) }
+ }
+
+ p {-
+ Here we’re using the ‘x’ operator. It iterates over all occurances of
+ the pattern. In this case we’re iterating over all paragraphs in our
+ input. Maybe we want to see all paragraphs which contain doubled words
+ (for example: ‘the the’), a common typo found in text files. For this
+ we can use the ‘g’ operator:
+ }
+
+ figure {
+ pre {= m4_fmt_code(g.pat.gsp) }
+ }
+
+ p {-
+ The fundamental difference between the two operators is that the
+ ‘x’ operator specifies the structure to iterate over. In the context of
+ @code{-grep} these are lines, but in @code{-grab} they can be whatever
+ you want. The ‘g’ operator on the otherhand doesn’t modify the
+ structure of the matches returned to you at all; it simply acts as a
+ filter selecting matches with match the given regular expression.
+ }
+
+ p {-
+ Here’s an interactive example:
+ }
+
+ figure {
+ pre {= m4_fmt_code(example-1.sh.gsp) }
+ }
+
+ p {-
+ This is almost perfect; there’s just one bit missing. In my interactive
+ example I’ve shown how you can use the power of @code{-grab} to find
+ paragraphs in your files containing doubled words. This is really handy
+ if you find yourself writing websites, documentation, or other long-form
+ written content.
+ }
+
+ p {-
+ Given my example though, how easily were you able to spot the doubled
+ words? It probably didn’t stick out to you right away, unlike if it had
+ been highlighted by some bright flashy color. It is for this reason
+ that the ‘h’ operator exists. This operator is unique in that it does
+ not change the given selections at all. Any matches made by previous
+ occurances of ‘x' and ‘g’ will be displayed the same with and without
+ the use of ‘h’.
+ }
+
+ p {-
+ The ‘h’ operators is purely for the user. By using this operator you
+ can specify a pattern for which matching text must be @em{-highlighted}.
+ Let’s apply it to the previous example and see how the doubled words are
+ made instantly obvious to the user:
+ }
+
+ figure {
+ pre {= m4_fmt_code(example-2.sh.gsp) }
+ }
+
+ p {-
+ There is an obvious problem here: the duplication of the regular
+ expression provided to the ‘g’ and ‘h’ operators. It is @em{-extremely}
+ common that you will want to highlight text that was just matched by a
+ ‘g’ operator. Like, @em{-really} common. So common in fact that the
+ ‘h’ operator supports a shorthand syntax for this exact situation:
+ @code {-h//}. Giving an empty regular expression as an argument to an
+ operator is illegal with the exception of the ‘h’ operator. When this
+ operator is given an empty argument, it assumes the regular expression
+ of the previous operator:
+ }
+
+ figure {
+ pre {= m4_fmt_code(example-3.sh.gsp) }
+ }
+
+ h2 #final {-Final Solution}
+ p {-
+ So… what was the final solution to my problem? How did I find all the
+ @code{-<date-input>} tags in my jobs codebase that were passed the
+ ‘browser’ attribute? Well here’s how:
+ }
+
+ figure {
+ pre {= m4_fmt_code(answer.sh.gsp) }
+ }
+
+ p {-
+ Quick, simple, and elegant. Just the way I like it!
+ }
+
+ h2 #more {-Additional Operators}
+ p {-
+ Here I’ve shown you the 3 main operators: ‘x’, ‘g’, and ‘h’. These are
+ not all however! Each operator also has a capital variant (‘X’, ‘G’,
+ ‘H’) which behaves the same but instead of working on text that matches
+ the given pattern, these operators match on text which @em{-doesn’t}
+ match the given pattern.
+ }
+
+ p {-
+ These operators allow for better pattern matching. For example a
+ pattern to match all numbers which contain a ‘3’ but which aren’t ‘1337’
+ could be written as @code{-x/[0-9]+/ g/3/ G/^1337$/}.
+ }
+ }
+
+ hr{}
+
+ footer { m4_footer }
+ }
+}
diff --git a/src/blog/grab/x.pat.gsp b/src/blog/grab/x.pat.gsp
new file mode 100644
index 0000000..1ac0e34
--- /dev/null
+++ b/src/blog/grab/x.pat.gsp
@@ -0,0 +1 @@
+@span .fn {-x}@span .op {-/}[^\\n].‌+?(?=\\n\\n|$)@span .op {-/}
diff --git a/src/blog/index.gsp b/src/blog/index.gsp
index 0673da3..fce8d08 100644
--- a/src/blog/index.gsp
+++ b/src/blog/index.gsp
@@ -28,6 +28,7 @@ html lang="en" {
p {-Posts:}
ul {
+ m4_article(grab, {-Making Grep Better})
m4_article(andy-val, {-Values in Andy})
m4_article(new-sh, {-Making a New Shell})
m4_article(extend, {-Extensible Scripting})
diff --git a/src/style.css b/src/style.css
index 19a32ab..3941394 100644
--- a/src/style.css
+++ b/src/style.css
@@ -50,8 +50,15 @@ pre, code, kbd, samp {
pre > code { color: var(--fg); }
:not(pre) > code { white-space: nowrap; }
-pre.js, pre.sh { tab-size: 4; }
-pre.pug, pre.gsp { tab-size: 2; }
+pre.js,
+pre.sh {
+ tab-size: 4;
+}
+pre.gsp,
+pre.pug,
+pre.vue {
+ tab-size: 2;
+}
h1 {
font-size: 1.8rem;
@@ -200,6 +207,11 @@ dl {
.gsp-val { color: var(--aqua); }
.gsp-op { color: var(--lesser); }
+.grab-file { color: #ECABF7; }
+.grab-hl { color: #FF4D51; font-weight: bold; }
+.grab-pos { color: #47B413; }
+.grab-sep { color: #24DFC4; }
+
.article {
display: flex;
justify-content: space-between;
@@ -264,6 +276,7 @@ dl {
abbr.led::before { content: 'Light-Emitting Diode'; }
abbr.nas::before { content: 'Network Attached Storage'; }
abbr.os::before { content: 'Operating System'; }
+ abbr.pcre::before { content: 'Perl Compatible Regular Expression'; }
abbr.pdf::before { content: 'Portable Document Format'; }
abbr.posix::before { content: 'Portable Operating System Interface'; }
abbr.qr::before { content: 'Quick Response'; }