#!/usr/bin/perl -- Fluffy's Twenty Lines From Hell programming@sarten-x.com use strict;use warnings;my (%names, $names, @names);open (NAMES, '<', $ARGV[0]) or die('Error opening names file: $!');while () {s~//.*~~;s:\s*$::;do{ $names{$1}=[$2, split(m/,\s*/,$3)];($names,@names)=($1,split(m{,\s*},$3)) if ( ord lc $2 == + ('1'x3) );push @_,$1;} if(m{\s*(\S+)\s+((?:d|c|o)\S+)\s+(.*)});} close (NAMES);my (%data, $data, @data) = map { $_, '0' } @names;#-ao(); merge() open (DATA, '<', $ARGV[1]) or die('Error opening data file: $!');while (){ s/^\s*|\s*$//g;@data = split(m+\s*,\s*+,$_);$data = { map { $_[$_]||'', do {if ( defined($_[$_]) && ord @{$names{$_[$_]}}[0] > 99) {@ARGV = @{@names{$_[$_]}}[1.. scalar @{@names{$_[$_]}}-1];$, = $data[$_];if (grep /^$,$/, @ARGV) {$data[$_]} else{print "Invalid data for ".$_[$_].": $, in line $..\n";}} elsif (defined($_[ $_]) && ord($names{ $_[$_]}[0])/33==3) {@ARGV = sort {$a <=> $b} @{@names{$_[$_] }}[1 .. scalar @{@names{$_[$_]}}-1]; if ($data[$_]>=$ARGV[0]&&$data[$_]<=$ARGV[1 ]) {$data[$_]}else{print "Invalid data for ".$_[$_]. ": ".$data[$_]." in line ". $..".\n";}}}} 0..scalar(@data)};$data{${$data}{$names}}++;}close (DATA);foreach $_(@_){ @ARGV = @{$names{$_}}[1 .. scalar @{$names{$_}}-1];print "$_: ".@{$names {$_}}[0]." ".((ord(@{$names{$_}}[0])/9 > 11)?'{'.join(', ',@ARGV).'}':'['.join( ', ', map {sprintf("%.1f", $_)} @ARGV).']')."\n"; } print "\n";foreach $, (keys %data) {print $data{$,}.' '.$,."\n";} __END__ Good evening and welcome to Fluffy's Twenty Lines From Hell. I wrote this program because I was informed that there were large programs being used to do basic parsing. As writing parsers is a specialty of mine, I took it upon myself to write a fairly small submission of my own. Here it is. By the way, it's also somewhat obfuscated, just to make things more fun. There are really only three obfuscation tricks used here. The first should be obvious. I've removed almost all flow from the program by removing the extra white space. Let's expand the program to a reasonable form: use strict; use warnings; my (%names, $names, @names); open (NAMES, '<', $ARGV[0]) or die('Error opening names file: $!'); while () { s~//.*~~; s:\s*$::; do{ $names{$1}=[$2, split(m/,\s*/,$3)]; ($names,@names)=($1,split(m{,\s*},$3)) if (ord lc $2 == + ('1'x3) ); push @_,$1; } if(m{\s*(\S+)\s+((?:d|c|o)\S+)\s+(.*)}); } close (NAMES); my (%data, $data, @data) = map { $_, '0' } @names; #-ao(); merge() open (DATA, '<', $ARGV[1]) or die('Error opening data file: $!'); while () { s/^\s*|\s*$//g; @data = split(m+\s*,\s*+,$_); $data = { map { $_[$_]||'', do { if (defined($_[$_]) && ord @{$names{$_[$_]}}[0] > 99) { @ARGV = @{@names{$_[$_]}}[1 .. scalar @{@names{$_[$_]}}-1]; $, = $data[$_]; if (grep /^$,$/, @ARGV) { $data[$_] } else { print "Invalid data for " . $_[$_] . ": $, in line $..\n"; } } elsif (defined($_[$_]) && ord($names{ $_[$_]}[0])/33==3) { @ARGV = sort {$a <=> $b} @{@names{$_[$_]}}[1 .. scalar @{@names{$_[$_]}}-1]; if ($data[$_] >= $ARGV[0] && $data[$_] <= $ARGV[1]) { $data[$_] } else { print "Invalid data for ". $_[$_] . ": " . $data[$_] . " in line " . $. . ".\n"; } } }} 0..scalar(@data)}; $data{${$data}{$names}}++; } close (DATA); foreach $_ (@_) { @ARGV = @{$names{$_}}[1 .. scalar @{$names{$_}}-1]; print "$_: " . @{$names{$_}}[0] . " " . ( (ord(@{$names{$_}}[0])/9 > 11) ? '{'.join(', ', @ARGV).'}' : '['.join(', ', map {sprintf("%.1f", $_)} @ARGV).']' ) . "\n"; } print "\n"; foreach $, (keys %data) { print $data{$,}.' '.$,."\n"; } __END__ That's not so bad, is it? There's still a lot of "line noise" in there, but that's fine. We'll start tracing the program. use strict; use warnings; This is considered good Perl practice. It requires strict adherence to the rules of Perl, and enables warnings to be given for various unkosher practices. The next obfuscation comes here: my (%names, $names, @names); I am a horrible person. Yes, those are three variables with the same name. I also use a fourth instance of "names" as a filehandle. To make things even more fun, Perl decides which variable to use based on how it's used, rather than what symbol is in front of the name. The symbol declares what kind of data you want the variable to return. For instance, you can ask (of an array) for either a scalar or a subset. Let's move on. open (NAMES, '<', $ARGV[0]) or die('Error opening names file: $!'); This line is very straightforward, but needs only two small explanations for those of you who don't know Perl. First, @ARGV is the array that holds the command line parameters. Second, Perl has almost no concept of "statements". In Perl, everything is a function, which may or may not have side effects. This means each line is evaluated as an expression, and the very-low-precedence "or" operator (which short-circuits) can be used to run alternative commands. In other words, this line opens the first parameter for reading as the filehandle "NAMES", and terminates the program if there's an error. while () { Perl, owing to its AWK and sed heritage has some default variables, that do not need declaration, and often don't need to be written to be used. This loop is one of those times. In simple terms, it will loop on each line read in from the filehandle NAMES, which we just opened. It will assign that line to $_. s~//.*~~; s:\s*$::; I am a horrible person. The s/// operation performs substitutions by regular expression. Here, however, I'm changing the delimiter to ~ and :, both to make the program a bit more obscure, and also to avoid having to escape the first substitution's slashes. These lines remove comments and trailing whitespace, respectively, by replacing the matching sections of the line with nothing. do{ ... } if(m{\s*(\S+)\s+((?:d|c|o)\S+)\s+(.*)}); Again, I am a horrible person. This is a basic IF statement, but reversed, because Perl can do that. It checks for valid lines in the names file, using a regular expression. The groups in parentheses represent the first word, second word, and everything after the whitespace following the second word. Those groups will be assigned to the default variables $1, $2, and $3. Remember, comments and trailing whitespace have already been removed. $names{$1}=[$2, split(m/,\s*/,$3)]; ($names,@names)=($1,split(m{,\s*},$3)) if (ord lc $2 == + ('1'x3) ); push @_,$1; Once a valid line has been found and its data extracted, we'll store it in a data structure. Here's where I start abusing the similarly-named variables. In the first line, I'm creating a hash table (called %names and keyed by the feature name) that contains arrays of the feature type, followed by the elements of the third field (possible values), separated by commas. In effect, it puts the whole data point's description into one slot in the hash table. The second line illustrates the third form of obfuscation I used: Replacing sane functions with ones that work differently. Here, a simple comparison of "if lowercase $2 is equal to 'output'" has become "if the number of the first character of lowercase $2 is equal to the number formed by repeating the string '1' 3 times". Now, for those who haven't memorized the ASCII table, a lowercase 'o' is number 111. How convenient. You might want to find that table now, since we'll use it again later. This line's function is decent to understand. It will store the feature name into $names, and the possible values into @names, if the feature type is "output". The third line is short, simple, and sweet. It adds the name of the feature to the default array @_. This way, we preserve the order of the features, which is rather important for parsing the data file. } close (NAMES); That's it for reading the names file. We close the file, and we're done. We should review what our data looks like. We have: * $names contains the name of the output feature * @names contains the possible values of the output feature * %names is keyed by feature name, and contains (in an array) the feature's type and possible values. * @_ contains a list of the feature names, in order. my (%data, $data, @data) = map { $_, '0' } @names; #-ao(); merge() Again, I'm using the same name for everything. I am a horrible person. I'm also abusing a feature of perl, which is where you can assign to a list. The way it works, though, only the first variable here (the %data hash table) will get an assignment. The hash table, which is expecting a list assignment, consumes the whole assigned list, and nothing is left for the other variables. For more details, see perldata. Note that comments in Perl start with the "#" symbol. The end of this line does nothing, except serve to confuse the reader. Of note is the map function. It takes an array (@names), runs it through a given routine (as $_), and returns an array with the results. As it's used here, it's turning @names into the keys for the %data hash table. The values will all be "0", making an ideal start for counting output results. Amazingly enough, this is what we'll use it for. Now, we can start on the data file. open (DATA, '<', $ARGV[1]) or die('Error opening data file: $!'); This is exactly like our previous file-opening experience, but there is an important event to note: We are now entirely done using the given command-line arguments. while () { s/^\s*|\s*$//g; @data = split(m+\s*,\s*+,$_); Again using the default variable $_, we first remove leading and trailing whitespace from the line, then split the text into comma-delimited fields, also removing any leading or trailing whitespace from the field. Those fields are stored in @data. $data = { map { $_[$_]||'', do { ... }} 0..scalar(@data)}; Another bit of ugliness here. At this point, I ran out of variables to use, and was faced with a choice: Either use a new variable name, or commit a nice atrocity. I chose the latter. $data actually holds a reference to a hash table. This means we'll have to access it like a reference. Remember that map takes a list, and runs it as $_ through a block of code, returning another list. The list we're applying here is a list of numbers from 1 to however long @data is. In other words, looping through each feature in the record. In the map block, we take the number in $_, look up its name in @_ (by using the weird syntax "$_[$_]"), and use that as a key for our hash table. Just in case something bad happens (like having more data than named features), we've thrown in "||''", which will return an empty string for the key if one can't be found. The right side of the comma is what will be our hash table's value. It will ideally be $data[$_], referring to the list of data points split just a bit ago. However, as one of the program's requirements is to validate input, and I don't want to make a nice clean function for it, we'll do that here. The do{} function executes a block of code, and returns the value of the last expression. Let's look at the validation code. if (defined($_[$_]) && ord @{$names{$_[$_]}}[0] > 99) { First, we start off a big IF block. First, there's a quick test to ensure a name for the current data point exists. Then, there's more use of ASCII tables. Looking at the program requirements, both the "discrete" and "output" feature types are the same kind of data, so we can use the same validation routine for both of them. Using a bit of obfuscation, we're checking to see if ASCII code of the first letter ("ord") of the feature type (element 0 from the array stored in the %names hash table) for the feature name ("$_[$_]") is greater than 99. Since the ASCII code for "c" is 99, continuous data will be the only type to fail this test. @ARGV = @{@names{$_[$_]}}[1 .. scalar @{@names{$_[$_]}}-1]; Now, we're validating only discrete (and output) features. We look into the %names table from earlier, and pull out the array. However, using yet another feature of Perl, we're getting only a slice (subset) of the stored array. We get everything after the first element (because the first element stored the type). This leaves us with an array of the feature's possible values, which we store in @ARGV. We can do this because, as noted above, we're done using the original command-line arguments. $, = $data[$_]; For convenience, and to avoid confusing the Perl compiler, we're assigning the current data point to another pre-defined variable, $,. if (grep /^$,$/, @ARGV) { $data[$_] } else { print "Invalid data for " . $_[$_] . ": $, in line $..\n"; } Finally, we can do our real validation, using a function similar to map. We use grep which, in this case, returns a list of elements from the given array for which the regular expression matched. Practically speaking, it returns a true value if and only if $, is in the list of possible values. Otherwise, it prints out an error message. } elsif (defined($_[$_]) && ord($names{ $_[$_]}[0])/33==3) { Now, we need to validate continuous data. Again we have the test that a name exists, and now we check that the first letter of the feature type is 99, but to avoid looking similar to the previous condition, we apply some basic math first. @ARGV = sort {$a <=> $b} @{@names{$_[$_]}}[1 .. scalar @{@names{$_[$_]}}-1]; This is very similar to our last assignment to @ARGV, but we're also sorting the elements. The purpose of this is just in case somebody writes the names file incorrectly, with the maximum and minimum values swapped. if ($data[$_] >= $ARGV[0] && $data[$_] <= $ARGV[1]) { $data[$_] } else { print "Invalid data for ". $_[$_] . ": " . $data[$_] . " in line " . $. . ".\n"; } This is your basic range comparison. Nothing to see here. That completes our validation routines, which if you'll remember, are inside a do{} block. This is why we have "$data[$_]" when a validation returns true. Having that expression evaluated means that's what the do{} will return, and that's the value that will go in our hash table stored as a reference in $data. All that's left is to use it. $data{${$data}{$names}}++; That's exactly what we do here. Now that the whole record has been parsed and validated, we refer back to $names to get the name of the output feature, go to that spot in the hash table referred to by $data (using the ${} dereferencing operation), then use that as a key for the %data hash table, which holds our total counts for output. We then increment that value. Whew. } close (DATA); Again, we close our loop and our file. Again, it's time to look at the state of our data: * $names contains the name of the output feature * @names contains the possible values of the output feature * %names is keyed by feature name, and contains (in an array) the feature's type and possible values. * @_ contains a list of the feature names, in order. * %data is keyed by value, and contains a count of each value's occurence as the output feature in a record * @data contains the fields from the last viewed record, in order. * $data contains a reference to a hash table, keyed by name, of the fields from the last viewed record. Now, all that's left is to display some results. foreach $_ (@_) { This is a simple loop that goes through @_, assigning each feature name in turn to $_. @ARGV = @{$names{$_}}[1 .. scalar @{$names{$_}}-1]; With the name, we get the original description from the %names hash table, and pull out the possible values. We store them in @ARGV, continuing the trend of abusing variables. print "$_: " . @{$names{$_}}[0] . " " . ( The first part here is simple. It prints out the name and the type of the feature. The open parenthesis at the end implies we're in for more trouble, though. (ord(@{$names{$_}}[0])/9 > 11) ? '{'.join(', ', @ARGV).'}' : '['.join(', ', map {sprintf("%.1f", $_)} @ARGV).']' Perhaps not so much trouble after all. It's a simple inline condition, that checks the type of feature we're displaying now. For discrete features, it just joins the possible values, and returns them inside curly braces. For continuous features, we first use map to apply a sprintf pattern to each element, giving us a nice fixed-point display as seen in the sample output. ) . "\n"; } print "\n"; We print out a few newlines, and we're ready to display our output results. foreach $, (keys %data) { print $data{$,}.' '.$,."\n"; } Again, this is pretty straightforward. We loop through the keys of %data (which holds our output counts), and print out the associated value and key pair as seen in the sample. __END__ The final line of the program is a special token, that stops Perl from attempting to read further. It's quite useful for adding documentation to the end of a file, such as this. That's it, then. In summary, this program has nothing terribly bad in it. I have broken every best practice I easily could, but there's no pointer abuse, no self-modifying code, no whitespace magic, and no backwards logic. It's a genuine best-effort program, with some tricks thrown in. It does not only this specific job, but the data structures are all set up to be used for other tasks in the future. The program is, in many ways, good. I, however, am a horrible person. -Fluffy Appendix: Since the initial writing of this document, I have abused this program even more. It is now Fluffy's Eighteen Lines From Hell. Interpretation is left as an exercise for the reader. #!/usr/bin/perl -- Fluffy's Eighteen Lines From Hell programming@sarten-x.com use strict;use warnings;my(%names,${names},@names);open(NAMES,"<$ARGV[0]")or(die 'Error'.$".'opening'.$".'names'.$".'file:'.$".'$!');while(){s~\s*(//.*)?$ ~x;do{$names{$1}=[$2,split(/,\s*/,$3)];($names,@names)=($1,split(m{,\s*},$3))if (ord(lc$2)==+('1'x3));push@_,$1}if(m{\s*(\S+)\s+([dco]\S+)\s+(.*)})};close(NAMES );my(%data,$data,@data)=map{$_,'0'}@names;open(DATA,"<$ARGV[1]")or(die'Error'.$" .'opening'.$".'data'.$".'file:'.$".'$!');while(){s!^\s*|\s*$!!g;@data=#@_= split(m+\s*,\s*+,$_);$data={map{$_[$_]||'',do{if(defined($_[$_])&&ord@{$names{$_ [$_]}}[0]>99){@ARGV=@{@names{$_[$_]}}[1..scalar@{@names{$_[$_]}}-1];$,=$data[$_] ;if(grep/^$,$/,@ARGV){$data[$_]}else{print'Invalid'.$".'data'.$".'for'.$".$_[$_] .':'.$".$,.$".'in'.$".'line'.$".$..".\n"}}elsif(defined($_[$_])&&ord($names{$_[$ _]}[0])/33==3){@ARGV=sort{$a<=>$b}@{@names{$_[$_]}}[1..scalar@{@names{$_[$_]}}-1 ];if($data[$_]>=$ARGV[0]&&$data[$_]<=$ARGV[1]){$data[$_]}else{print'Invalid'.$". 'data'.$".'for'.$".$_[$_].':'.$".$data[$_].$".'in'.$".'line'.$".$..".\n"}}}}0..# scalar(@data)};$data{${$data}{$names}}++}close(DATA);foreach(@_){@ARGV=@{$names{ $_}}[1..scalar@{$names{$_}}-1];print"$_:".$".@{$names{$_}}[0].$".((ord(@{$names{ $_}}[0])/9>11)?'{'.join(','.$",@ARGV).'}':'['.join(','.$",map{sprintf("%.1f",$_) }@ARGV).']')."\n"}print"\n";foreach$,(keys%data){print"$data{$,}\40$,\n"}__END__