#! /usr/local/bin/perl -w use strict; use lib '/home/litherm/lib/perl5/site_perl/'; use POSIX qw/floor/; use Text::Diff; use Text::Diff::Table; use Tk; use Tk::DialogBox; use Tk::FileSelect; use Tk::ROText; use Tk::Toplevel; # These are segments to ignore in the diffs. If you include an MSH segment # the number here should be one lower than normal. i.e. if you want to ignore # MSH-9 this should include an entry like '"MSH-8" => 1' - remember that's # for MSH ONLY. Also remember to put a comma after it unless it's the last # item in the list. use constant IGNORE => { "MSH-6" => 1, "MSH-9" => 1, "OBR-22" => 1 }; # Define some styles for consistant appearance between the screen and # the html dump. use constant STYLES => { header => { background => 'dimgrey', foreground => 'white' }, segment => { background => 'gainsboro', foreground => 'black' }, diff => { background => 'turquoise', foreground => 'black' }, removed => { background => 'salmon', foreground => 'black' }, added => { background => 'lightblue', foreground => 'black' }, normal => { background => 'white', foreground => 'black' }, 'warn' => { background => 'yellow', foreground => 'black' }, segdiff => { background => 'red', foreground => 'white' } }; my $w = {}; my $cfg = {}; &init_files($cfg, @ARGV); gen_tk($w, $cfg); MainLoop; exit; sub gen_tk { my $w = shift; my $cfg = shift; # Major window components $w->{main} = MainWindow->new(-title => 'HL7 Diff'); $w->{mframe} = $w->{main}->Frame->pack(-fill => 'x'); $w->{dframe} = $w->{main}->Frame->pack(-fill => 'both', -expand => 1); # Menu bar components $w->{mframe}->Label(-text => '1)')->pack(-side => 'left'); $w->{infile} = $w->{mframe}->Entry( -textvariable => \$cfg->{infile}, -background => 'white' )->pack(-side => 'left'); $w->{mframe}->Button( -text => 'Browse', -command => [ \&browse, $w, $cfg, 'infile' ] )->pack(-side => 'left'); $w->{mframe}->Label(-text => '2)')->pack(-side => 'left'); $w->{outfile} = $w->{mframe}->Entry( -textvariable => \$cfg->{outfile}, -background => 'white' )->pack(-side => 'left'); $w->{mframe}->Button( -text => 'Browse', -command => [ \&browse, $w, $cfg, 'outfile' ] )->pack(-side => 'left'); $w->{godiff} = $w->{mframe}->Button( -text => 'Diff!', -command => [ \&diff_records, $w, $cfg ] )->pack(-side => 'right'); $w->{godump} = $w->{mframe}->Button( -text => 'Save', -command => [ \&dump_text, $w, $cfg ], -state => 'disabled' )->pack(-side => 'right'); # Text window for displaying $w->{text} = $w->{dframe}->Scrolled( 'ROText', -width => 80, -height => 40, -scrollbars => 'osoe', -background => 'white', -wrap => 'none' )->pack(-fill => 'both', -expand => 1); # Configure some tags for highlighting text in window for (keys %{&STYLES}) { $w->{text}->tagConfigure( $_, -background => &STYLES->{$_}->{background}, -foreground => &STYLES->{$_}->{foreground} ); } } sub browse { my $w = shift; my $cfg = shift; my $dir = shift; my $dialog = $w->{main}->FileSelect(-title => 'Enter a file to diff:'); $cfg->{$dir} = $dialog->Show; $w->{main}->update; } sub dump_text { my $w = shift; my $cfg = shift; my $dialog = $w->{main}->FileSelect(-title => 'Enter a new file to save this data:'); my $file = $dialog->Show; return unless $file; if (-e $file) { $dialog = $w->{main}->DialogBox(-title => 'Error', -buttons => ["OK", "Cancel"]); $dialog->Label(-text => "File exists - replace?")->pack; my $popup = $dialog->Show; return unless $popup eq "OK"; if (!-w $file) { $dialog = $w->{main}->DialogBox(-title => 'Error', -buttons => ["OK"]); $dialog->Label(-text => "Can't write file, sorry.")->pack; my $popup = $dialog->Show; return; } } open FILE, ">$file" || die "Could not open $file : $!\n"; select FILE or die "Select failed one: $!\n"; print "HL7 Diff Report
\n";
	$w->{text}->dump('-all', -command => \&dump_parse, '1.0', 'end');
	print "
\n"; select STDOUT or die "Select failed two: $!\n"; close FILE || die "Could not close $file : $!\n"; } sub dump_parse { my $key = shift; my $value = shift; my $index = shift; if ($key eq "tagon") { print "{$value}->{foreground}; print "; background-color:".&STYLES->{$value}->{background}; print "\">"; } elsif ($key eq "tagoff") { print ""; } elsif ($key eq "text") { print $value; } } sub diff_records { my $w = shift; my $cfg = shift; &test_files($w, $cfg) || return; $w->{text}->delete('1.0', 'end'); $cfg->{counter} = 0; $w->{dialog} = $w->{main}->Toplevel(-title => 'Difference Engine'); $w->{diffstage} = $w->{dialog}->Label(-text => 'Stage one diff')->pack; my $tempText = "Stage one is the line by line diff.\n"; $tempText .= "I have no way of predicting how long\n"; $tempText .= "this will take, nor how much memory.\n"; $w->{diffdesc} = $w->{dialog}->Label(-text => $tempText)->pack; $w->{diffstat} = $w->{dialog}->Label(-text => "???")->pack; $w->{difftime} = $w->{dialog}->Label(-text => "00:00:00")->pack; $w->{dialog}->update; $w->{dialog}->repeat(1000, [ \&counter, $w, $cfg ]); my $diffTable = diff $cfg->{infile}, $cfg->{outfile}, {STYLE => 'Table', CONTEXT => 0}; $w->{diffstage}->configure(-text => 'Stage two diff'); $tempText = "Stage two is the field by field diff.\n"; $tempText .= "Unless there are many lines with lots\n"; $tempText .= "of differences, this should not take as\n"; $tempText .= "the first diff did."; $w->{diffdesc}->configure(-text => $tempText); $w->{dialog}->update; $cfg->{x} = 1; $cfg->{y} = 0; my $counter = 0; my $type = ''; my @size = (0, 0, 0, 0); for (split /\n/, $diffTable) { $counter++; $w->{diffstat}->configure(-text => sprintf("Line %06d", $counter)); # Need to figure out the format of the output table. It should either be: # three column: +---+------------------+---------------------+ # or # four column: +---+----------------+---+-------------------+ my ($line1, $line2, $rec1, $rec2, $sep1, $sep2); if ($_ =~ /^[*+|](-+)[*+|](-+)[*+|](-+)[*+|]$/) { $type = 'three'; $size[0] = length $1; $size[1] = length $2; $size[2] = length $3; } elsif ($_ =~ /^[*+|](-+)[*+|](-+)[*+|](-+)[*+|](-+)[*+|]$/) { $type = 'four'; $size[0] = length $1; $size[1] = length $2; $size[2] = length $3; $size[3] = length $4; } elsif ($type eq 'three') { $sep1 = substr $_, 0, 1; $line1 = substr $_, 1, $size[0]; $rec1 = substr $_, 2 + $size[0], $size[1]; $rec2 = substr $_, 3 + $size[0] + $size[1], $size[2]; $rec1 =~ s/\s+$//; $rec2 =~ s/\s+$//; if ($line1 =~ /\d+/ && $sep1 eq '*') { &diff_segments($w, $cfg, $line1, $rec1, $line1, $rec2); } } elsif ($type eq 'four') { $sep1 = substr $_, 0, 1; $line1 = substr $_, 1, $size[0]; $rec1 = substr $_, 2 + $size[0], $size[1]; $sep2 = substr $_, 2 + $size[0] + $size[1], 1; $line2 = substr $_, 3 + $size[0] + $size[1], $size[2]; $rec2 = substr $_, 4 + $size[0] + $size[1] + $size[2], $size[3]; $rec1 =~ s/\s+$//; $rec2 =~ s/\s+$//; if (($line1 =~ /\d+/ || $line2 =~ /\d+/) && ($sep1 eq '*' || $sep2 eq '*')) { &diff_segments($w, $cfg, $line1, $rec1, $line2, $rec2); } } else { die "Can not parse output of Text::Diff::Table"; } } $w->{dialog}->destroy; $w->{godump}->configure(-state => 'normal'); $w->{main}->update; } sub diff_segments { my $w = shift; my $cfg = shift; my ($line1, $rec1, $line2, $rec2) = @_; my $split1 = length($rec1) > 2 ? substr($rec1, 3, 1) : ''; my $split2 = length($rec2) > 2 ? substr($rec2, 3, 1) : ''; my $seglist1 = ''; my $seglist2 = ''; my $maxseg1 = 0; my $maxseg2 = 0; $cfg->{record} = $cfg->{x}; delete $cfg->{diffs}; $w->{text}->insert( "$cfg->{x}.0", "File 1, Record $line1 : File 2, Record $line2\n", "header" ); $cfg->{x}++; # Figure out what segments we need and the order. for (split /\\r/, $rec1) { $seglist1 .= "$_\n"; $maxseg1++; } for (split /\\r/, $rec2) { $seglist2 .= "$_\n"; $maxseg2++; } if (!$rec1) { $w->{text}->insert("$cfg->{x}.0", "File 1 does not contain this record.\n"); $cfg->{x}++; $w->{text}->insert("$cfg->{x}.0", "Record appears as follows in file 2.\n", "added"); $cfg->{x}++; $w->{text}->insert("$cfg->{x}.0", $seglist2); $cfg->{x} += $maxseg2; return; } elsif (!$rec2) { $w->{text}->insert("$cfg->{x}.0", "File 2 does not contain this record.\n"); $cfg->{x}++; $w->{text}->insert("$cfg->{x}.0", "Record appears as follows in file 1.\n", "removed"); $cfg->{x}++; $w->{text}->insert("$cfg->{x}.0", $seglist1); $cfg->{x} += $maxseg1; return; } my $diffTable = diff \$seglist1, \$seglist2, {STYLE => 'Table', CONTEXT => ($maxseg1 > $maxseg2) ? $maxseg1 : $maxseg2}; my $type = ''; my @size = (0, 0, 0, 0); for (split /\n/, $diffTable) { # Need to figure out the format of the output table. It should either be: # three column: +---+------------------+---------------------+ # or # four column: +---+----------------+---+-------------------+ my ($seg1, $seg2); if ($_ =~ /^[*+|](-+)[*+|](-+)[*+|](-+)[*+|]$/) { $type = 'three'; $size[0] = length $1; $size[1] = length $2; $size[2] = length $3; } elsif ($_ =~ /^[*+|](-+)[*+|](-+)[*+|](-+)[*+|](-+)[*+|]$/) { $type = 'four'; $size[0] = length $1; $size[1] = length $2; $size[2] = length $3; $size[3] = length $4; } elsif ($type eq 'three') { $seg1 = substr $_, 2 + $size[0], $size[1]; $seg2 = substr $_, 3 + $size[0] + $size[1], $size[2]; $seg1 =~ s/\s+$//; $seg2 =~ s/\s+$//; &parse_fields($w, $cfg, $seg1, $seg2); } elsif ($type eq 'four') { $seg1 = substr $_, 2 + $size[0], $size[1]; $seg2 = substr $_, 4 + $size[0] + $size[1] + $size[2], $size[3]; $seg1 =~ s/\s+$//; $seg2 =~ s/\s+$//; &parse_fields($w, $cfg, $seg1, $seg2); } else { die "Can not parse output of Text::Diff::Table"; } } # Check here to see if we need to keep this record for (@{$cfg->{diffs}}) { if (!&IGNORE->{$_}) { return; } } # If we are still here, the only difference where in the IGNORE hash and the # diff should be deleted. $w->{text}->delete("$cfg->{record}.0", 'end'); $w->{text}->insert('end', "\n"); $cfg->{x} = $cfg->{record} + 1; $cfg->{y} = 0; } sub parse_fields { my $w = shift; my $cfg = shift; my ($seg1, $seg2) = @_; my @seg1 = split /\|/, $seg1; my @seg2 = split /\|/, $seg2; $seg1[0] = "Missing segment detected" unless $seg1[0]; $seg2[0] = "Missing segment detected" unless $seg2[0]; $seg1[0] = "Empty segment detected (\\r\\r)" if $seg1[0] eq "\\n"; $seg2[0] = "Empty segment detected (\\r\\r)" if $seg2[0] eq "\\n"; my $maxseg = (scalar @seg1 > scalar @seg2) ? scalar @seg1 : scalar @seg2; $cfg->{y} = 0; if (($seg1[0] ne $seg2[0]) || (length $seg1[0] > 3) || (length $seg2[0] > 3)) { $w->{text}->insert("$cfg->{x}.$cfg->{y}", "Segment mis-match detected!\n", 'warn'); $cfg->{x}++; push @{$cfg->{diffs}}, "SEGMENT"; } $w->{text}->insert("$cfg->{x}.$cfg->{y}", " \n1: \n2: \n"); $cfg->{x}++; $cfg->{y} = 3; $w->{text}->insert("$cfg->{x}.$cfg->{y}", $seg1[0]); $cfg->{x}++; $w->{text}->insert("$cfg->{x}.$cfg->{y}", $seg2[0]); $cfg->{x} -= 2; $cfg->{y} = 6; my $test = 0; for (my $i = 1; $i < $maxseg; $i++) { $seg1[$i] = "" unless $seg1[$i]; $seg2[$i] = "" unless $seg2[$i]; my $color1 = 'normal'; my $color2 = 'normal'; if ($seg1[$i] ne "" && $seg2[$i] eq "") { $test++; $color1 = 'removed'; push @{$cfg->{diffs}}, "REMOVED"; } elsif ($seg1[$i] eq "" && $seg2[$i] ne "") { $test++; $color2 = 'added'; push @{$cfg->{diffs}}, "ADDED"; } elsif ($seg1[$i] ne $seg2[$i]) { $test++; $color1 = 'diff'; $color2 = 'diff'; push @{$cfg->{diffs}}, "$seg1[0]-$i"; } my $maxy = length $i; $maxy = ($maxy > length $seg1[$i]) ? $maxy : length $seg1[$i]; $maxy = ($maxy > length $seg2[$i]) ? $maxy : length $seg2[$i]; my $temp = $i; $temp++ if $seg1[0] eq "MSH"; my $count = $temp . " " x ($maxy - length $temp); $seg1[$i] = $seg1[$i] . " " x ($maxy - length $seg1[$i]); $seg2[$i] = $seg2[$i] . " " x ($maxy - length $seg2[$i]); $w->{text}->insert("$cfg->{x}.$cfg->{y}", "|$count", 'segment'); $cfg->{x}++; $w->{text}->insert("$cfg->{x}.$cfg->{y}", "|$seg1[$i]", $color1) unless length $seg1[0] > 3; $cfg->{x}++; $w->{text}->insert("$cfg->{x}.$cfg->{y}", "|$seg2[$i]", $color2) unless length $seg2[0] > 3; $cfg->{x} -= 2; $cfg->{y} += $maxy + 1; } if ($test) { $w->{text}->delete("$cfg->{x}.0", "$cfg->{x}.4"); $w->{text}->insert("$cfg->{x}.0", "--->", "segdiff"); } $cfg->{x} += 3; } sub test_files { my $w = shift; my $cfg = shift; if (!$cfg->{infile} || !$cfg->{outfile}) { my $dialog = $w->{main}->DialogBox(-title => 'Error', -buttons => ['OK']); $dialog->Label(-text => "Please select both a\nfile 1 and a file 2.")->pack; $dialog->Show; return 0; } if (!-r $cfg->{infile}) { my $dialog = $w->{main}->DialogBox(-title => 'Error', -buttons => ['OK']); $dialog->Label(-text => "Could not open file:\n$cfg->{infile}\n$!")->pack; $dialog->Show; return 0; } if (!-r $cfg->{outfile}) { my $dialog = $w->{main}->DialogBox(-title => 'Error', -buttons => ['OK']); $dialog->Label(-text => "Could not open file:\n$cfg->{outfile}\n$!")->pack; $dialog->Show; return 0; } return 1; } sub counter { my $w = shift; my $cfg = shift; $cfg->{counter}++; my $sec = $cfg->{counter}; my ($hour, $min); $hour = floor($sec/3600); $sec -= $hour * 3600; $min = floor($sec/60); $sec -= $min * 60; $w->{difftime}->configure(-text => sprintf('%02d:%02d:%02d', $hour, $min, $sec)); $w->{dialog}->update; } sub init_files { my $cfg = shift; my @argv = @_; my $argv = "\t".join("\t", @argv)."\t"; if ($argv =~ /-h/) { print "usage: $0 \n\n"; print "The file arguments are optional.\n"; print "$0 -h ; Will display helpful information.\n"; exit; } $cfg->{infile} = $argv[0] || ""; $cfg->{outfile} = $argv[1] || ""; }