summaryrefslogblamecommitdiffhomepage
path: root/src/prj/mmv/index.gsp
blob: 185d93e5539b6d54786e033f885563e3c387a192 (plain) (tree)



































































                                                                                                                   
                                                                                                        












                                                                                                       
                                                                     












                                                                                                       
                                                                      








                                                                                                          
                                                                                  






































































                                                                                                                      
                                                                                 










                                                                                                        
                                                                                                 












                                                                                                        
                                                                   








                                                                                                        
                                                               














                                                                                                       
                                                                                                         





























                                                                                                                           
                                                                                                                    









                                                                                                        
                                                                                                         







                                                                                                         
                                                                    















                                                                                                         
                                                                                                                  
                                                                                                           
                                                                                                                  











                                                                                                        
                                                                   


                                                                                                              
                                                                                                        




                                                                                                   
                                                                          
















                                                                                                              
                                                                        




                                                                                                       
                                                                                                          


                                               
                                                                              


                                                                                                     
                                                                                                    





                                                                                                        
                                                                                     







                                                                                                        
                                                                                                 













                                                                                                        
                                                                 



                                                                                                            
                                                                                             







                                                                                                       
                                                             

                            
                                                                                                                








                                                                                                   
                                                                                                        

                                                                                                       
                                                                                  

















                                                                                                       
                                                                                 





                                                                                
                                                                                  
















































                                                                                                              
                                                                                                      












                                                                                                          
                                                                        























                                                                                                              
                                                                          





                                                                                                          
                                                                             







                                                                                                          
                                                                            












                                                                                                     
                                                                          













                                                                                                           
                                                                                    





                                                                                                      
                                                                               









                                                                                                        
                                                                            






                                    
html lang="en" {
	head { m4_include(head.gsp) }
	body {
		header {
			div {
				h1 {-Moving Files the Right Way}
				m4_include(nav.gsp)
			}

			figure .quote {
				blockquote {
					p {=
						I think the OpenBSD crowd is a bunch of masturbating monkeys, in
						that they make such a big deal about concentrating on security to
						the point where they pretty much admit that nothing else matters to
						them.
					}
				}
				figcaption {-Linus Torvalds}
			}
		}

		main {
			p {
				em {-
					You can find the @code{-mmv} git repository over at
					@a
						href="https://git.sr.ht/~mango/mmv"
						target="_blank"
					{-sourcehut}
					or
					@a
						href="https://github.com/Mango0x45/mmv"
					  target="_blank"
					{-GitHub}.
				}
			}

			p {-
				NOTE: As of the
				@a href="https://git.sr.ht/~mango/mmv/refs/v1.2.0" {-v1.2.0}
				release there is now also the @code{-mcp} utility.  It behaves the same
				as the @code{-mmv} utility but it copies files instead of moving them.
				It also doesn’t support the ‘@code{--n}’ flag as it doesn’t need to deal
				with backups.
			}

			h2 {-Table of Contents}

			ul {
				li {a href="#prologue" {-Prologue}}
				li {a href="#moving" {-Advanced Moving and Pitfalls}}
				li {a href="#mapping" {-Name Mapping with @code{-mmv}}}
				li {a href="#newlines" {-Filenames with Embedded Newlines}}
				ul {
					li {a href="0-flag" {-The Simple Case}}
					li {a href="#e-flag" {-Encoding Newlines}}
				}
				li {a href="#i-flag" {-Individual Execution}}
				li {a href="#safety" {-Safety}}
				li {a href="#examples" {-Examples}}
			}
			
			h2 #prologue {-Prologue}
			p {-
				File moving and renaming is one of the most common tasks we undertake on
				the command-line.  We basically always do this with the @code{-mv}
				utility, and it gets the job done most of the time.  Want to rename one
				file?  Use @code{-mv}!  Want to move a bunch of files into a directory?
				Use @code{-mv}! How could @code{-mv} ever go wrong?  Well I’m glad you
				asked!
			}

			h2 #moving {-Advanced Moving and Pitfalls}
			p {-
				Let’s start off nice and simple.  You just inherited a C project that
				uses the sacrilegious
				@a
					href="https://en.wikipedia.org/wiki/Camel_case"
					target="_blank"
				{-camelCase}
				naming convention for its files:
			}

			figure {
				pre {= m4_fmt_code(ls-files.sh.gsp) }
			}

			p {-
				This deeply upsets you, as it upsets me.  So you decide you want to
				switch all these files to use
				@a
					href="https://en.wikipedia.org/wiki/Snake_case"
					target="_blank"
				{-snake_case},
				like a normal person.  Well how would you do this?  You use @code{-mv}!
				This is what you might end up doing:
			}

			figure {
				pre {= m4_fmt_code(manual-mv.sh.gsp) }
			}

			p {-
				Well… it works I guess, but it’s a pretty shitty way of renaming these
				files.  Luckily we only had 5, but what if this was a much larger
				project with many more files to rename?  Things would get tedious.  So
				instead we can use a pipeline for this:
			}

			figure {
				pre {= m4_fmt_code(camel-to-snake-naïve.sh.gsp) }
			}

			aside {
				p {-
					The given example assumes your @code{-sed} implementation supports
					‘@code{-\\L}’ which is a non-standard m4_abbr(GNU) extension.
				}
			}

			p {-
				That works and it gets the job done, but it’s not really ideal is
				it?  There are a couple of issues with this.
			}

			ol {
				li {
					p {-
						You’re writing more complicated code.  This has the obvious drawback
						of potentially being more error-prone, but also risks taking more
						time to write than you’d like as you might have forgotten if
						@code{-xargs} actually has an ‘@code{--L}’ option or not (which
						would require reading the
						@a
							href="https://www.man7.org/linux/man-pages/man1/xargs.1.html"
							target="_blank"
						{
							code {-xargs(1)}
						}
						manual).
					}
				}
				li {
					p {-
						If you try to rename the file @em{-foo} to @em{-bar} but @em{-bar}
						already exists, you end up deleting a file you may not have wanted
						to.
					}
				}
				li {
					p {-
						In a similar vein to the previous point, you need to be very careful
						about schemes like renaming the file @em{-a} to @em{-b} and @em{-b}
						to @em{-c}.  You run the risk of turning @em{-a} into @em{-c} and
						losing the file @em{-b} entirely.
					}
				}
				li {
					p {-
						Moving symbolic links is its own whole can of worms.  If a symlink
						points to a relative location then you need to make sure you keep
						pointing to the right place.  If the symlink is absolute however
						then you can leave it untouched.  But what if the symlink points to
						a file that you’re moving as part of your batch move operation? Now
						you need to handle that too.
					}
				}
			}

			h2 #mapping {-Name Mapping with @code{-mmv}}

			p {-
				What is @code{-mmv}?  It’s the solution to all your problems, that’s
				what it is!  @code{-mmv} takes as its argument(s) a utility and that
				utilities arguments and uses that to create a mapping between old and
				new filenames — similar to the @code{-map()} function found in many
				programming languages.  I think to best convey how the tool functions, I
				should provide an example.  Let’s try to do the same thing we did
				previously where we tried to turn camelCase files to snake_case, but
				using @code{-mmv}:
			}

			figure {
				pre {= m4_fmt_code(camel-to-snake-smart.sh.gsp) }
			}

			p {-Let me break down how this works.}

			p {-
				@code{-mmv} starts by reading a series of filenames separated by
				newlines from the standard input.  Yes, sometimes filenames have
				newlines in them and yes there is a way to handle them but I shall get
				to that later.  The filenames that @code{-mmv} reads from the standard
				input will be referred to as the @em{-input files}.  Once all the input
				files have been read, the utility specified by the arguments is spawned;
				in this case that would be @code{-sed} with the argument
				@code{-'s/[A-Z]/‌\\L_&/g'}. The input files are then piped into
				@code{-sed} the exact same way that they would have been if we ran the
				above commands without @code{-mmv}, and the output of @code{-sed} then
				forms what will be referred to as the @em{-output files}.  Once a
				complete list of output files is accumulated, each input file gets
				renamed to its corresponding output file.
			}

			p {-
				Let’s look at a simpler example.  Say we want to rename 2 files in the
				current directory to use lowercase letters, we could use the following
				command:
			}
			
			figure {
				pre {= m4_fmt_code(mmv-tr.sh.gsp) }
			}

			p {-
				In the above example @code{-mmv} reads 2 lines from standard input,
				those being @em{-LICENSE} and @em{-README}.  Those are our 2 input files
				now. The @code{-tr} utility is then spawned and the input files are
				piped into it.  We can simulate this in the shell:
			}

			figure {
				pre {= m4_fmt_code(tr.sh.gsp) }
			}

			p {-
				As you can see above, @code{-tr} has produced 2 lines of output; these
				are our 2 output files.  Since we now have our 2 input files and 2
				output files, @code{-mmv} can go ahead and rename the files.  In this
				case it will rename @em{-LICENSE} to @em{-license} and @em{-README} to
				@em{-readme}.  For some examples, check the @a href="#examples"
				{-examples} section of this page down below.
			}

			h2 #newlines {-Filenames with Embedded Newlines}

			p {-
				People are retarded, and as a result we have filenames with newlines in
				them.  All it would have taken to solve this issue for everyone was for
				literally @strong{-anybody} during the early UNIX days to go ‘@em{-hey,
				this is a bad idea!}’, but alas, we must deal with this.  Newlines are
				of course not the only special characters filenames can contain, but
				they are the single most infuriating to deal with; the UNIX utilities
				all being line-oriented really doesn’t work well with these files.
			}

			p {-
				So how does @code{-mmv} deal with special characters, and newlines in
				particular?  Well it does so by providing the user with the @code{--0}
				and @code{--e} flags:
			}

			dl {
				dt { code{--0} }
				dd {
					p {-
						Tell @code{-mmv} to expect its input to not be separated by newlines
						(‘@code{-\\n}’), but by NUL bytes (‘@code{-\\0}’).  NUL bytes are
						the only characters not allowed in filenames besides forward
						slashes, so they are an obvious choice for an alternative separator.
					}
				}
				dt { code{--e} }
				dd {
					p {-
						Encode newlines in filenames before passing them to the provided
						utility.  Newline characters are replaced by the literal string
						‘@code{-\\n}’ and backslashes by the literal string ‘@code{-\\\\}’.
						After processing, the resulting output is decoded again.
					}
					p {-
						If combined with the @code{--0} flag, then while input will be read
						assuming a NUL-byte input-separator, the encoded input files will be
						written to the spawned process newline-separated.
					}
				}
			}

			h3 id="0-flag" {-The Simple Case}

			p {-
				In order to better understand these flags and how they work let’s go
				though another example.  We have 2 files — one with and one without an
				embedded newline — and our goal is to simply reverse these filenames.
				In this example I am going to be displaying newlines in filenames with
				the ‘@code{-$'\\n'}’ syntax as this is how my shell displays embedded
				newlines.
			}

			p {-
				We can start by just trying to naïvely pass these 2 files to @code{-mmv}
				and use @code{-rev} to reverse the names, but this doesn’t work:
			}

			figure {
				pre {= m4_fmt_code(mmv-rev.sh.gsp) }
			}

			p {-
				The reason this doesn’t work is because due to the line-oriented nature
				of @code{-ls} and @code{-rev}, we are actually trying to rename the
				files @em{-foo}, @em{-bar}, and @em{-baz} to the new filenames
				@em{-zab}, @em{-rab}, and @em{-oof}.  As can be seen in the following
				diagram, the embedded newline is causing our input to be ambiguous and
				@code{-mmv} can’t reliably proceed anymore @x-ref{-1}:
			}

			figure {
				object data="conflict.svg" type="image/svg+xml" {-}
			}

			aside {
				p data-ref="1" {-
					The reason you get a cryptic ‘file not found’ error message is because
					@code{-mmv} tries to assert that all the input files actually exist
					before doing anything.  Since ‘foo’ isn’t a real file, we error out.
				}
			}
			
			p {-
				The first thing we need to do in order to proceed is to pass the
				@code{--0} flag to @code{-mmv}.  This will tell @code{-mmv} that we want
				to use the NUL-byte as our input separator and not the newline.  We also
				need @code{-ls} to actually provide us with the filenames delimited by
				NUL-bytes. Luckily m4_abbr(GNU) @code{-ls} gives us the @code{---zero}
				flag to do just that:
			}

			figure {
			  pre {= m4_fmt_code(mmv-rev-zero.sh.gsp) }
			}

			p {-
				So we’re getting places, but we aren’t quite there yet.  The issue we’re
				getting now is that @code{-mmv} received 2 input files from the standard
				input, but @code{-rev} produced 3 output files.  Why is that?  Well
				let’s try our hand at a little bit of command-line debugging with
				@code{-sed}:
			}

			figure {
				pre {= m4_fmt_code(sed-debugging.sh.gsp) }
			}

			p {-
				If you aren’t quite sure what the above is doing, here’s a quick
				summary:
			}

			ul {
				li {-
					The @code{--U} flag given to @code{-ls} tells it not to sort our
					output.  This is purely just to keep this example clear to the reader.
				}
				li {-
					The @code{--n} flag given to @code{-sed} tells it not to print the
					input line automatically at the end of the provided script.
				}
				li {-
					The @code{-l} command in @code{-sed} prints the current input in a
					‘visually unambiguous form’.
				}
			}

			p {-
				In the @code{-sed} output, we can see that @samp{-$} represents the end
				of a line, and @samp{-\\000} represents the NUL-byte.  All looks good
				here, we have two inputs separated by NUL-bytes.  Now let’s try to throw
				in @code{-rev}:
			}

			figure {
				pre {= m4_fmt_code(sed-debugging-rev.sh.gsp) }
			}

			p {-
				Well wouldn’t you know it?  Since @code{-rev} @em{-also} works with
				newline-separated input, it reversed out NUL-byte separators and now
				gives us 3 outputs.  Luckily the folks over at @em{-util-linux} provided
				us with the @code{--0} flag here too, so that we can properly handle
				NUL-delimited input. Combining all of this together we get a final
				working product:
			}

			figure {
				pre {= m4_fmt_code(reverse-embedded-newline.sh.gsp) }
			}

			h3 #e-flag {-Encoding Newlines}

			p {-
				Sometimes we want to rename a bunch of files, but the command we want to
				use doesn’t support NUL-bytes as nicely as we would like.  In these
				cases, you may want to consider encoding your newline characters into
				the literal string ‘@code{-\\n}’ and then passing your input
				newline-separated to your given command with the @code{--e} flag.
			}

			p {-
				For a real-world example, perhaps you want to edit some filenames in
				vim, or whatever other editor you use.  Well we can do this incredibly
				easily with the @code{-vipe} utility from the
				@a href="https://joeyh.name/code/moreutils/" {-moreutils}
				collection.  The @code{-vipe} command simply reads input from the
				standard input, opens it up in your editor, and then prints the
				resulting output to the standard output; perfect for @code{-mmv}!  We do
				not really want to deal with NUL-bytes in our text-editor though, so
				let’s just encode our newlines:
			}

			figure {
				pre {= m4_fmt_code(vipe.sh.gsp) }
			}

			aside {
				p {-
					Notice how you still need to pass the @code{--0} flag to @code{-mmv}
					know that our input files may have embedded newlines.
				}
			}

			p {-
				When running the above code example, you will see the following in your
				editor:
			}

			figure {
				pre {= m4_fmt_code(vim.gsp) }
			}

			p {-
				After you exit your editor, @code{-mmv} will decode all occurrences of
				‘@code{-\\n}’ back into a newline, and all occurrences of ‘@code{-\\\\}’
				back into a backslash:
			}

			figure {
				object data="e-flag.svg" type="image/svg+xml" {-}
			}

			h2 #i-flag {-Individual Execution}
			p {-
				The previous examples are great and all, but what do you do if your
				mapping command doesn’t have the concept of an input separator at all?
				This is where the @code{--i} flag comes into play.  With the @code{--i}
				flag we can get @code{-mmv} to execute our mapping command for every
				input filename.  This means that as long as we can work with a complete
				buffer, we don’t need to worry about separators.
			}

			p {-
				To be honest, I cannot really think of any situation where you might
				actually need to do this.  If you can think of one, please @a
				href="mailto:mail@thomasvoss.com" {-email me} and I’ll update the
				example on this page.  Regardless, let’s imagine that we wanted to
				rename some files so that their filenames are replaced with their
				filename
				@a
					href="https://en.wikipedia.org/wiki/SHA-1"
					target="_blank"
				{-m4_abbr(SHA)-1 hash}.
				On Linux we have the @code{-sha1sum} program which reads input from the
				standard input and outputs the m4_abbr(SHA)-1 hash.  This is how we
				would use it with @code{-mmv}:
			}

			figure {
				pre {= m4_fmt_code(sha1sum-long-example.sh.gsp) }
			}

			p {-
				Another approach is to invoke @code{-mmv} twice:
			}

			figure {
				pre {= m4_fmt_code(sha1sum-short-example.sh.gsp) }
			}

			p {-
				If you are confused about why we need to make a call to @code{-awk},
				it’s because the @code{-sha1sum} program outputs 2 columns of data.  The
				first column is our hash and the second column is the filename where the
				to-be-hashed data was read from.  We don’t want the second column.
			}

			p {-
				Unlike in previous examples where one process was spawned to map all our
				filenames, with the @code{--i} flag we are spawning a new instance for
				each filename.  If you struggle to visualize this, perhaps the following
				diagrams help:
			}

			figure {
				figcaption {-Invoking @code{-mmv} without @code{--i}}
				object data="without-i-flag.svg" type="image/svg+xml" {-}
			}

			figure {
				figcaption {-Invoking @code{-mmv} with @code{--i}}
				object data="with-i-flag.svg" type="image/svg+xml" {-}
			}

			h2 #safety {-Safety}
			p {-
				When compared to the standard @code{-for f in *; do mv $f …; done} or
				@code{-ls | … | xargs -L2 mv} constructs, @code{-mmv} is significantly
				more safe to use. These are some of the safety features that are built
				into the tool:
			}

			ol {
				li {-
					If the number of input- and output files differs, execution is aborted
					before making any changes.
				}
				li {-
					If an input file is renamed to the name of another input file, the
					second input file is not lost (i.e. you can rename @em{-a} to @em{-b}
					and @em{-b} to @em{-a} with no problem).
				}
				li {-
					All input files must be unique and all output files must be unique.
					Otherwise execution is aborted before making any changes.
				}
				li {-
					In the case that something goes wrong during execution (perhaps you
					tried to move a file to a non-existent directory, or a syscall
					failed), a backup of your input files is saved automatically by
					@code{-mmv} for recovery.
				}
			}

			p {-
				Due to the way @code{-mmv} handles #2, when things do go wrong you may
				find that all of your input files have disappeared.  Don’t worry though,
				@code{-mmv} takes a backup of your code before doing anything.  If you
				run @code{-mmv} with the @code{--v} option for verbose output, you’ll
				notice it backing up your stuff in the @code{-$XDG_CACHE_DIR} directory:
			}

			figure {
				pre {= m4_fmt_code(mmv-verbose.sh.gsp) }
			}

			p {-
				Upon successful execution the @code{-$XDG_CACHE_DIR/mmv/TIMESTAMP}
				directory will be automatically removed, but it remains when things go
				wrong so that you can recover any missing data.  The names of the
				backup-subdirectories in the @code{-$XDG_CACHE_DIR/mmv} directory are
				timestamps of when the directories were created. This should make it
				easier for you to figure out which directory you need to recover if you
				happen to have multiple of these.
			}
			
			h2 #examples {-Examples}

			aside {
				p {-
					All of these examples are ripped straight from the @code{-mmv(1)}
					manual page. If you installed @code{-mmv} through a package manager or
					via @code{-make install} then you should have the manual installed on
					your system.
				}
			}

			p {-Swap the files @em{-foo} and @em{-bar}:}
			figure {
				pre {= m4_fmt_code(examples/swap.sh.gsp) }
			}

			p {-
				Rename all files in the current directory to use hyphens (‘-’) instead
				of spaces:
			}
			figure {
				pre {= m4_fmt_code(examples/hyphens.sh.gsp) }
			}

			p {-
				Rename a given list of movies to use lowercase letters and hyphens
				instead of uppercase letters and spaces, and number them so that they’re
				properly ordered in globs (e.g. rename @em{-The Return of the King.mp4}
				to @em{-02-the-return-of-the-king.mp4}):
			}
			figure {
				pre {= m4_fmt_code(examples/number.sh.gsp) }
			}

			p {-
				Rename files interactively in your editor while encoding newline into
				the literal string ‘@code{-\\n}’, making use of
				@code {
					a
						href="https://linux.die.net/man/1/vipe"
						target="_blank"
					{-vipe(1)}
				}
				from @em{-moreutils}:
			}
			figure {
				pre {= m4_fmt_code(examples/vipe.sh.gsp) }
			}

			p {-
				Rename all C source code- and header files in a git repository
				to use snake_case instead of camelCase using
				the m4_abbr(GNU)
				@code {
					a
						href="https://www.man7.org/linux/man-pages/man1/sed.1.html"
						target="_blank"
					{-sed(1)}
				}
				‘@code{-\\n}’ extension:
			}
			figure {
				pre {= m4_fmt_code(examples/camel-to-snake.sh.gsp) }
			}

			p {-
				Lowercase all filenames within a directory hierarchy which may contain
				newline characters:
			}
			figure {
				pre {= m4_fmt_code(examples/lowercase.sh.gsp) }
			}

			p {-
				Map filenames which may contain newlines in the current directory with
				the command ‘@code{-cmd}’, which itself does not support nul-byte
				separated entries.  This only works assuming your mapping doesn’t
				require any context outside of the given input filename (for example,
				you would not be able to number your files as this requires knowledge of
				the input files position in the input list):
			}
			figure {
				pre {= m4_fmt_code(examples/i-flag.sh.gsp) }
			}
		}

		hr{}

		footer { m4_footer }
	}
}