From e239e8828c99de06df17960ab0682d1d2195fd23 Mon Sep 17 00:00:00 2001 From: Thomas Voss Date: Mon, 11 Sep 2023 05:38:40 +0200 Subject: Run ‘make check’ and fix various typos MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit --- src/prj/mmv/index.gsp | 24 +- src/prj/mmv/index.gsp.bak | 645 ++++++++++++++++++++++++++++++++++++++++++++++ 2 files changed, 657 insertions(+), 12 deletions(-) create mode 100644 src/prj/mmv/index.gsp.bak (limited to 'src/prj/mmv') diff --git a/src/prj/mmv/index.gsp b/src/prj/mmv/index.gsp index 3c2fd97..f049426 100644 --- a/src/prj/mmv/index.gsp +++ b/src/prj/mmv/index.gsp @@ -270,8 +270,8 @@ html lang="en" { } p {- If combined with the @code{--0} flag, then while input will be read - assuming a NUL-byte input-seperator, the encoded input files will be - written to the spawned process newline-seperated. + assuming a NUL-byte input-separator, the encoded input files will be + written to the spawned process newline-separated. } } } @@ -332,7 +332,7 @@ html lang="en" { p {- So we’re getting places, but we aren’t quite there yet. The issue we’re - getting now is that @code{-mmv} recieved 2 input files from the standard + getting now is that @code{-mmv} received 2 input files from the standard input, but @code{-rev} produced 3 output files. Why is that? Well let’s try our hand at a little bit of command-line debugging with @code{-sed}: @@ -365,7 +365,7 @@ html lang="en" { p {- In the @code{-sed} output, we can see that @samp{-$} represents the end of a line, and @samp{-\\000} represents the NUL-byte. All looks good - here, we have two inputs seperated by NUL-bytes. Now let’s try to throw + here, we have two inputs separated by NUL-bytes. Now let’s try to throw in @code{-rev}: } @@ -375,7 +375,7 @@ html lang="en" { p {- Well wouldn’t you know it? Since @code{-rev} @em{-also} works with - newline-seperated input, it reversed out NUL-byte seperators and now + newline-separated input, it reversed out NUL-byte separators and now gives us 3 outputs. Luckily the folks over at @em{-util-linux} provided us with the @code{--0} flag here too, so that we can properly handle NUL-delimited input. Combining all of this together we get a final @@ -393,7 +393,7 @@ html lang="en" { use doesn’t support NUL-bytes as nicely as we would like. In these cases, you may want to consider encoding your newline characters into the literal string ‘@code{-\\n}’ and then passing your input - newline-seperated to your given command with the @code{--e} flag. + newline-separated to your given command with the @code{--e} flag. } p {- @@ -415,7 +415,7 @@ html lang="en" { aside { p {- Notice how you still need to pass the @code{--0} flag to @code{-mmv} - know that our inputfiles may have embedded newlines. + know that our input files may have embedded newlines. } } @@ -429,8 +429,8 @@ html lang="en" { } p {- - After you exit your editor, @code{-mmv} will decode all occurances of - ‘@code{-\\n}’ back into a newline, and all occurances of ‘@code{-\\\\}’ + After you exit your editor, @code{-mmv} will decode all occurrences of + ‘@code{-\\n}’ back into a newline, and all occurrences of ‘@code{-\\\\}’ back into a backslash: } @@ -441,11 +441,11 @@ html lang="en" { h2 #i-flag {-Individual Execution} p {- The previous examples are great and all, but what do you do if your - mapping command doesn’t have the concept of an input seperator at all? + mapping command doesn’t have the concept of an input separator at all? This is where the @code{--i} flag comes into play. With the @code{--i} flag we can get @code{-mmv} to execute our mapping command for every input filename. This means that as long as we can work with a complete - buffer, we don’t need to worry about seperators. + buffer, we don’t need to worry about separators. } p {- @@ -524,7 +524,7 @@ html lang="en" { } li {- In the case that something goes wrong during execution (perhaps you - tried to move a file to a non-existant directory, or a syscall + tried to move a file to a non-existent directory, or a syscall failed), a backup of your input files is saved automatically by @code{-mmv} for recovery. } diff --git a/src/prj/mmv/index.gsp.bak b/src/prj/mmv/index.gsp.bak new file mode 100644 index 0000000..3c2fd97 --- /dev/null +++ b/src/prj/mmv/index.gsp.bak @@ -0,0 +1,645 @@ +html lang="en" { + head { m4_include(head.gsp) } + body { + header { + div { + h1 {-Moving Files the Right Way} + m4_include(nav.gsp) + } + + figure .quote { + blockquote { + p {= + I think the OpenBSD crowd is a bunch of masturbating monkeys, in + that they make such a big deal about concentrating on security to + the point where they pretty much admit that nothing else matters to + them. + } + } + figcaption {-Linus Torvalds} + } + } + + main { + p { + em {- + You can find the @code{-mmv} git repository over at + @a + href="https://git.sr.ht/~mango/mmv" + target="_blank" + {-sourcehut} + or + @a + href="https://github.com/Mango0x45/mmv" + target="_blank" + {-GitHub}. + } + } + + p {- + NOTE: As of the + @a href="https://git.sr.ht/~mango/mmv/refs/v1.2.0" {-v1.2.0} + release there is now also the @code{-mcp} utility. It behaves the same + as the @code{-mmv} utility but it copies files instead of moving them. + It also doesn’t support the ‘@code{--n}’ flag as it doesn’t need to deal + with backups. + } + + h2 {-Table of Contents} + + ul { + li {a href="#prologue" {-Prologue}} + li {a href="#moving" {-Advanced Moving and Pitfalls}} + li {a href="#mapping" {-Name Mapping with @code{-mmv}}} + li {a href="#newlines" {-Filenames with Embedded Newlines}} + ul { + li {a href="0-flag" {-The Simple Case}} + li {a href="#e-flag" {-Encoding Newlines}} + } + li {a href="#i-flag" {-Individual Execution}} + li {a href="#safety" {-Safety}} + li {a href="#examples" {-Examples}} + } + + h2 #prologue {-Prologue} + p {- + File moving and renaming is one of the most common tasks we undertake on + the command-line. We basically always do this with the @code{-mv} + utility, and it gets the job done most of the time. Want to rename one + file? Use @code{-mv}! Want to move a bunch of files into a directory? + Use @code{-mv}! How could mv ever go wrong? Well I’m glad you asked! + } + + h2 #moving {-Advanced Moving and Pitfalls} + p {- + Let’s start off nice and simple. You just inherited a C project that + uses the sacrilegious + @a + href="https://en.wikipedia.org/wiki/Camel_case" + target="_blank" + {-camelCase} + naming convention for its files: + } + + figure { + pre { m4_fmt_code(ls-files.sh.gsp) } + } + + p {- + This deeply upsets you, as it upsets me. So you decide you want to + switch all these files to use + @a + href="https://en.wikipedia.org/wiki/Snake_case" + target="_blank" + {-snake_case}, + like a normal person. Well how would you do this? You use @code{-mv}! + This is what you might end up doing: + } + + figure { + pre { m4_fmt_code(manual-mv.sh.gsp) } + } + + p {- + Well… it works I guess, but it’s a pretty shitty way of renaming these + files. Luckily we only had 5, but what if this was a much larger + project with many more files to rename? Things would get tedious. So + instead we can use a pipeline for this: + } + + figure { + pre { m4_fmt_code(camel-to-snake-naïve.sh.gsp) } + } + + aside { + p {- + The given example assumes your @code{-sed} implementation supports + ‘@code{-\\L}’ which is a non-standard m4_abbr(GNU) extension. + } + } + + p {- + That works and it gets the job done, but it’s not really ideal is + it? There are a couple of issues with this. + } + + ol { + li { + p {- + You’re writing more complicated code. This has the obvious drawback + of potentially being more error-prone, but also risks taking more + time to write than you’d like as you might have forgotten if + @code{-xargs} actually has an ‘@code{--L}’ option or not (which + would require reading the + @a + href="https://www.man7.org/linux/man-pages/man1/xargs.1.html" + target="_blank" + { + code {-xargs(1)} + } + manual). + } + } + li { + p {- + If you try to rename the file @em{-foo} to @em{-bar} but @em{-bar} + already exists, you end up deleting a file you may not have wanted + to. + } + } + li { + p {- + In a similar vein to the previous point, you need to be very careful + about schemes like renaming the file @em{-a} to @em{-b} and @em{-b} + to @em{-c}. You run the risk of turning @em{-a} into @em{-c} and + losing the file @em{-b} entirely. + } + } + li { + p {- + Moving symbolic links is its own whole can of worms. If a symlink + points to a relative location then you need to make sure you keep + pointing to the right place. If the symlink is absolute however + then you can leave it untouched. But what if the symlink points to + a file that you’re moving as part of your batch move operation? Now + you need to handle that too. + } + } + } + + h2 #mapping {-Name Mapping with @code{-mmv}} + + p {- + What is @code{-mmv}? It’s the solution to all your problems, that’s + what it is! @code{-mmv} takes as its argument(s) a utility and that + utilities arguments and uses that to create a mapping between old and + new filenames — similar to the @code{-map()} function found in many + programming languages. I think to best convey how the tool functions, I + should provide an example. Let’s try to do the same thing we did + previously where we tried to turn camelCase files to snake_case, but + using @code{-mmv}: + } + + figure { + pre { m4_fmt_code(camel-to-snake-smart.sh.gsp) } + } + + p {-Let me break down how this works.} + + p {- + @code{-mmv} starts by reading a series of filenames separated by + newlines from the standard input. Yes, sometimes filenames have + newlines in them and yes there is a way to handle them but I shall get + to that later. The filenames that @code{-mmv} reads from the standard + input will be referred to as the @em{-input files}. Once all the input + files have been read, the utility specified by the arguments is spawned; + in this case that would be @code{-sed} with the argument + @code{-'s/[A-Z]/\\L_&/g'}. The input files are then piped into + @code{-sed} the exact same way that they would have been if we ran the + above commands without @code{-mmv}, and the output of @code{-sed} then + forms what will be referred to as the @em{-output files}. Once a + complete list of output files is accumulated, each input file gets + renamed to its corresponding output file. + } + + p {- + Let’s look at a simpler example. Say we want to rename 2 files in the + current directory to use lowercase letters, we could use the following + command: + } + + figure { + pre { m4_fmt_code(mmv-tr.sh.gsp) } + } + + p {- + In the above example @code{-mmv} reads 2 lines from standard input, + those being @em{-LICENSE} and @em{-README}. Those are our 2 input files + now. The @code{-tr} utility is then spawned and the input files are + piped into it. We can simulate this in the shell: + } + + figure { + pre { m4_fmt_code(tr.sh.gsp) } + } + + p {- + As you can see above, @code{-tr} has produced 2 lines of output; these + are our 2 output files. Since we now have our 2 input files and 2 + output files, @code{-mmv} can go ahead and rename the files. In this + case it will rename @em{-LICENSE} to @em{-license} and @em{-README} to + @em{-readme}. For some examples, check the @a href="#examples" + {-examples} section of this page down below. + } + + h2 #newlines {-Filenames with Embedded Newlines} + + p {- + People are retarded, and as a result we have filenames with newlines in + them. All it would have taken to solve this issue for everyone was for + literally @strong{-anybody} during the early UNIX days to go “@em{-hey, + this is a bad idea!}”, but alas, we must deal with this. Newlines are + of course not the only special characters filenames can contain, but + they are the single most infuriating to deal with; the UNIX utilities + all being line-oriented really doesn’t work well with these files. + } + + p {- + So how does @code{-mmv} deal with special characters, and newlines in + particular? Well it does so by providing the user with the @code{--0} + and @code{--e} flags: + } + + dl { + dt { code{--0} } + dd { + p {- + Tell @code{-mmv} to expect its input to not be separated by newlines + (‘@code{-\\n}’), but by NUL bytes (‘@code{-\\0}’). NUL bytes are + the only characters not allowed in filenames besides forward + slashes, so they are an obvious choice for an alternative separator. + } + } + dt { code{--e} } + dd { + p {- + Encode newlines in filenames before passing them to the provided + utility. Newline characters are replaced by the literal string + ‘@code{-\\n}’ and backslashes by the literal string ‘@code{-\\\\}’. + After processing, the resulting output is decoded again. + } + p {- + If combined with the @code{--0} flag, then while input will be read + assuming a NUL-byte input-seperator, the encoded input files will be + written to the spawned process newline-seperated. + } + } + } + + h3 id="0-flag" {-The Simple Case} + + p {- + In order to better understand these flags and how they work let’s go + though another example. We have 2 files — one with and one without an + embedded newline — and our goal is to simply reverse these filenames. + In this example I am going to be displaying newlines in filenames with + the “@code{-$'\\n'}” syntax as this is how my shell displays embedded + newlines. + } + + p {- + We can start by just trying to naïvely pass these 2 files to @code{-mmv} + and use @code{-rev} to reverse the names, but this doesn’t work: + } + + figure { + pre { m4_fmt_code(mmv-rev.sh.gsp) } + } + + p {- + The reason this doesn’t work is because due to the line-oriented nature + of @code{-ls} and @code{-rev}, we are actually trying to rename the + files @em{-foo}, @em{-bar}, and @em{-baz} to the new filenames + @em{-zab}, @em{-rab}, and @em{-oof}. As can be seen in the following + diagram, the embedded newline is causing our input to be ambiguous and + @code{-mmv} can’t reliably proceed anymore @x-ref{-1}: + } + + figure { + object data="conflict.svg" type="image/svg+xml" {-} + } + + aside { + p data-ref="1" {- + The reason you get a cryptic “file not found” error message is because + @code{-mmv} tries to assert that all the input files actually exist + before doing anything. Since “foo” isn’t a real file, we error out. + } + } + + p {- + The first thing we need to do in order to proceed is to pass the + @code{--0} flag to @code{-mmv}. This will tell @code{-mmv} that we want + to use the NUL-byte as our input separator and not the newline. We also + need @code{-ls} to actually provide us with the filenames delimited by + NUL-bytes. Luckily m4_abbr(GNU) @code{-ls} gives us the @code{---zero} + flag to do just that: + } + + figure { + pre { m4_fmt_code(mmv-rev-zero.sh.gsp) } + } + + p {- + So we’re getting places, but we aren’t quite there yet. The issue we’re + getting now is that @code{-mmv} recieved 2 input files from the standard + input, but @code{-rev} produced 3 output files. Why is that? Well + let’s try our hand at a little bit of command-line debugging with + @code{-sed}: + } + + figure { + pre { m4_fmt_code(sed-debugging.sh.gsp) } + } + + p {- + If you aren’t quite sure what the above is doing, here’s a quick + summary: + } + + ul { + li {- + The @code{--U} flag given to @code{-ls} tells it not to sort our + output. This is purely just to keep this example clear to the reader. + } + li {- + The @code{--n} flag given to @code{-sed} tells it not to print the + input line automatically at the end of the provided script. + } + li {- + The @code{-l} command in @code{-sed} prints the current input in a + “visually unambiguous form”. + } + } + + p {- + In the @code{-sed} output, we can see that @samp{-$} represents the end + of a line, and @samp{-\\000} represents the NUL-byte. All looks good + here, we have two inputs seperated by NUL-bytes. Now let’s try to throw + in @code{-rev}: + } + + figure { + pre { m4_fmt_code(sed-debugging-rev.sh.gsp) } + } + + p {- + Well wouldn’t you know it? Since @code{-rev} @em{-also} works with + newline-seperated input, it reversed out NUL-byte seperators and now + gives us 3 outputs. Luckily the folks over at @em{-util-linux} provided + us with the @code{--0} flag here too, so that we can properly handle + NUL-delimited input. Combining all of this together we get a final + working product: + } + + figure { + pre { m4_fmt_code(reverse-embedded-newline.sh.gsp) } + } + + h3 #e-flag {-Encoding Newlines} + + p {- + Sometimes we want to rename a bunch of files, but the command we want to + use doesn’t support NUL-bytes as nicely as we would like. In these + cases, you may want to consider encoding your newline characters into + the literal string ‘@code{-\\n}’ and then passing your input + newline-seperated to your given command with the @code{--e} flag. + } + + p {- + For a real-world example, perhaps you want to edit some filenames in + vim, or whatever other editor you use. Well we can do this incredibly + easily with the @code{-vipe} utility from the + @a href="https://joeyh.name/code/moreutils/" {-moreutils} + collection. The @code{-vipe} command simply reads input from the + standard input, opens it up in your editor, and then prints the + resulting output to the standard output; perfect for @code{-mmv}! We do + not really want to deal with NUL-bytes in our text-editor though, so + let’s just encode our newlines: + } + + figure { + pre { m4_fmt_code(vipe.sh.gsp) } + } + + aside { + p {- + Notice how you still need to pass the @code{--0} flag to @code{-mmv} + know that our inputfiles may have embedded newlines. + } + } + + p {- + When running the above code example, you will see the following in your + editor: + } + + figure { + pre { m4_fmt_code(vim.gsp) } + } + + p {- + After you exit your editor, @code{-mmv} will decode all occurances of + ‘@code{-\\n}’ back into a newline, and all occurances of ‘@code{-\\\\}’ + back into a backslash: + } + + figure { + object data="e-flag.svg" type="image/svg+xml" {-} + } + + h2 #i-flag {-Individual Execution} + p {- + The previous examples are great and all, but what do you do if your + mapping command doesn’t have the concept of an input seperator at all? + This is where the @code{--i} flag comes into play. With the @code{--i} + flag we can get @code{-mmv} to execute our mapping command for every + input filename. This means that as long as we can work with a complete + buffer, we don’t need to worry about seperators. + } + + p {- + To be honest, I cannot really think of any situation where you might + actually need to do this. If you can think of one, please @a + href="mailto:mail@thomasvoss.com" {-email me} and I’ll update the + example on this page. Regardless, let’s imagine that we wanted to + rename some files so that their filenames are replaced with their + filename + @a + href="https://en.wikipedia.org/wiki/SHA-1" + target="_blank" + {-m4_abbr(SHA)-1 hash}. + On Linux we have the @code{-sha1sum} program which reads input from the + standard input and outputs the m4_abbr(SHA)-1 hash. This is how we + would use it with @code{-mmv}: + } + + figure { + pre { m4_fmt_code(sha1sum-long-example.sh.gsp) } + } + + p {- + Another approach is to invoke @code{-mmv} twice: + } + + figure { + pre { m4_fmt_code(sha1sum-short-example.sh.gsp) } + } + + p {- + If you are confused about why we need to make a call to @code{-awk}, + it’s because the @code{-sha1sum} program outputs 2 columns of data. The + first column is our hash and the second column is the filename where the + to-be-hashed data was read from. We don’t want the second column. + } + + p {- + Unlike in previous examples where one process was spawned to map all our + filenames, with the @code{--i} flag we are spawning a new instance for + each filename. If you struggle to visualize this, perhaps the following + diagrams help: + } + + figure { + figcaption {-Invoking @code{-mmv} without @code{--i}} + object data="without-i-flag.svg" type="image/svg+xml" {-} + } + + figure { + figcaption {-Invoking @code{-mmv} with @code{--i}} + object data="with-i-flag.svg" type="image/svg+xml" {-} + } + + h2 #safety {-Safety} + p {- + When compared to the standard @code{-for f in *; do mv $f …; done} or + @code{-ls | … | xargs -L2 mv} constructs, @code{-mmv} is significantly + more safe to use. These are some of the safety features that are built + into the tool: + } + + ol { + li {- + If the number of input- and output files differs, execution is aborted + before making any changes. + } + li {- + If an input file is renamed to the name of another input file, the + second input file is not lost (i.e. you can rename @em{-a} to @em{-b} + and @em{-b} to @em{-a} with no problem). + } + li {- + All input files must be unique and all output files must be unique. + Otherwise execution is aborted before making any changes. + } + li {- + In the case that something goes wrong during execution (perhaps you + tried to move a file to a non-existant directory, or a syscall + failed), a backup of your input files is saved automatically by + @code{-mmv} for recovery. + } + } + + p {- + Due to the way @code{-mmv} handles #2, when things do go wrong you may + find that all of your input files have disappeared. Don’t worry though, + @code{-mmv} takes a backup of your code before doing anything. If you + run @code{-mmv} with the @code{--v} option for verbose output, you’ll + notice it backing up your stuff in the @code{-$XDG_CACHE_DIR} directory: + } + + figure { + pre { m4_fmt_code(mmv-verbose.sh.gsp) } + } + + p {- + Upon successful execution the @code{-$XDG_CACHE_DIR/mmv/TIMESTAMP} + directory will be automatically removed, but it remains when things go + wrong so that you can recover any missing data. The names of the + backup-subdirectories in the @code{-$XDG_CACHE_DIR/mmv} directory are + timestamps of when the directories were created. This should make it + easier for you to figure out which directory you need to recover if you + happen to have multiple of these. + } + + h2 #examples {-Examples} + + aside { + p {- + All of these examples are ripped straight from the @code{-mmv(1)} + manual page. If you installed @code{-mmv} through a package manager or + via @code{-make install} then you should have the manual installed on + your system. + } + } + + p {-Swap the files @em{-foo} and @em{-bar}:} + figure { + pre { m4_fmt_code(examples/swap.sh.gsp) } + } + + p {- + Rename all files in the current directory to use hyphens (‘-’) instead + of spaces: + } + figure { + pre { m4_fmt_code(examples/hyphens.sh.gsp) } + } + + p {- + Rename a given list of movies to use lowercase letters and hyphens + instead of uppercase letters and spaces, and number them so that they’re + properly ordered in globs (e.g. rename @em{-The Return of the King.mp4} + to @em{-02-the-return-of-the-king.mp4}): + } + figure { + pre { m4_fmt_code(examples/number.sh.gsp) } + } + + p {- + Rename files interactively in your editor while encoding newline into + the literal string ‘@code{-\\n}’, making use of + @code { + a + href="https://linux.die.net/man/1/vipe" + target="_blank" + {-vipe(1)} + } + from @em{-moreutils}: + } + figure { + pre { m4_fmt_code(examples/vipe.sh.gsp) } + } + + p {- + Rename all C source code- and header files in a git repository + to use snake_case instead of camelCase using + the m4_abbr(GNU) + @code { + a + href="https://www.man7.org/linux/man-pages/man1/sed.1.html" + target="_blank" + {-sed(1)} + } + ‘@code{-\\n}’ extension: + } + figure { + pre { m4_fmt_code(examples/camel-to-snake.sh.gsp) } + } + + p {- + Lowercase all filenames within a directory hierarchy which may contain + newline characters: + } + figure { + pre { m4_fmt_code(examples/lowercase.sh.gsp) } + } + + p {- + Map filenames which may contain newlines in the current directory with + the command ‘@code{-cmd}’, which itself does not support nul-byte + separated entries. This only works assuming your mapping doesn’t + require any context outside of the given input filename (for example, + you would not be able to number your files as this requires knowledge of + the input files position in the input list): + } + figure { + pre { m4_fmt_code(examples/i-flag.sh.gsp) } + } + } + + hr{} + + footer { m4_footer } + } +} -- cgit v1.2.3