#! /usr/local/bin/perl # This program parses the keysym definitions from a X11 source distribution # and one or more XCompose specifications files (XCompose files mentioned # later on the command line will override settings from those mentioned # earlier). It then produces the same bindings (more or less) in the format # suitable for a personal DefaultKeyBinding.dict file. use strict; use warnings; use autodie; use lib q(/Users/itz/perl5/lib/perl5); use feature qw(unicode_strings state); use Getopt::Long qw(:config require_order no_bundling); use Tree::Trie; use File::Slurp qw(write_file); $main::keysymdef = qq($ENV{'HOME'}/.local/share/X11/keysymdef.h); $main::dumpsyms = 0; $main::dumpevents = 0; $main::multikey = 0xf70c; # F9 key as mapped by Apple $main::output = ''; # Note: I ignore the keysyms numbers proper and instead I grab the Unicode # point from the comments later on the line. The two coincide *only* in # the Latin-1 range, and the Unicode data is what MacOS in fact expects # in keybindings. If others had success with grabbing the keysym numbers # it was by sheer luck. sub parse_keysymdef { our $keysymdef; open(my $fh, '<', $keysymdef); my (%keysyms, $kw, $xk, $kn, $cs, $uc); while (<$fh>) { ($kw, $xk, $kn, $cs, $uc) = split; next unless defined $uc && $kw eq '#define' && $kn =~ m(^0x[0-9a-f]*$)i && $cs eq '/*'; next unless $xk =~ m(^XK_(\S+)$); my $name = $1; next unless $uc =~ m(^U[+]([0-9a-f]{4})$)i; my $code = hex($1); $keysyms{$name} = $code; } $fh->close(); return \%keysyms; } sub dump_keysyms { my ($keysyms) = @_; while (my ($k, $v) = each %{$keysyms}) { $v = '0x' . sprintf('%04x', $v); print($k, ' ', $v, "\n"); } } # The check for dead keys in fact does nothing, as all bindings # which contain dead key events are eliminated by other conditions already. # But I include it anyway to be safe. sub decode_event { my ($keysyms, $e) = @_; return undef if $e =~ m(^dead_); my $rawcode = ( $e =~ m(^U([0-9a-f]{4})$)i ? hex($1) : undef ); return ${$keysyms}{$e} // $rawcode; } # By the time this is called, Multi_key is remapped to the pseudo-Unicode # of the desired compose keystroke. This happens in main(). sub parse_xcompose { my %eventmap; my ($keysyms) = @_; for my $xcompose (@ARGV) { open(my $fh, '< :encoding(UTF-8)', $xcompose); while (<$fh>) { my @fields = split; next unless @fields; next unless $fields[0] eq ''; my @events = ( ); push(@events, $1), shift(@fields) while @fields && $fields[0] =~ m(^<(.*)>$); next unless $#events; @events = map { decode_event($keysyms, $_) } @events; next if grep { not defined($_) } @events; next unless @fields && $fields[0] eq ':'; shift(@fields); # Note the dot is NOT quantified on the next line. # Some bindings in the X11 Compose file ARE multi-character # but that is WEIRD next unless @fields && $fields[0] =~ m(^"(.)"$); my $result = ($1 eq q(\\\\) ? q(\\) : $1); my $eventrep = join(' ', map { '0x' . sprintf('%04x', $_) } @events); $eventmap{$eventrep} = $result; } $fh->close(); } return \%eventmap; } sub dump_eventmap { my ($eventmap) = @_; while (my ($eventrep, $result) = each %{$eventmap}) { print($eventrep, ' : "', $result, '"', "\n"); } } sub build_trie { my ($eventmap) = @_; my $eventtrie = Tree::Trie->new; while (my ($eventrep, $result) = each %{$eventmap}) { my $eventstr = join('', map { chr(hex($_)) } (split(' ', $eventrep))); $eventtrie->add_data($eventstr => $result); } return $eventtrie; } sub wrap_result { my ($result) = @_; return ["insertText:", $result]; } sub tail { my ($subject) = @_; $subject =~ m(^.*(.)$) or die(qq(Boom! Tail of an empty string\n)); return $1; } sub build_multihash { our ($multikey); my ($eventtrie) = @_; my %multihash = (); my @que = (\%multihash, chr($multikey)); QUE: while (@que) { my ($thishash, $prefix) = (shift(@que), shift(@que)); my @suffixes = $eventtrie->lookup($prefix, 1); die(qq(Error: ambiguous prefix ${prefix}\n)) if ($#suffixes && grep { $_ eq '' } @suffixes); my $label = tail($prefix); if ($suffixes[0] eq '') { my $result = $eventtrie->lookup_data($prefix); $thishash->{$label} = wrap_result($result); next QUE; } my $newhash = { }; $thishash->{$label} = $newhash; push(@que, $newhash, $prefix . $_) for @suffixes; } return \%multihash; } # I tried two CPAN packages for the task of translating complex data # into the plist/dict format. Mac::PropertyList requires the inner parts # of a data structure to be already translated before joining them # on the outer level, which is a nonstarter. Data::PropertyList can # translate multilevel structures in one go, but insists on dumping # literal, unescaped string data. That may work (I have not checked) # but looks way too scary with all those weird non-Latin characters # in there. # So I was left with implementing this myself. Luckily for this application # the structure is very restricted (for example I know that arrayrefs # only contain strings) so I can take shortcuts. sub unicode_escape_char { my ($c) = @_; return "\\U" . uc (sprintf('%04x', ord($c))); } sub plist_escape_char { my ($c) = @_; return $c if ord($c) > 0x20 && ord($c) < 0x7f && $c !~ m([@~#"'^]) && $c ne '$' && $c ne '\\'; return unicode_escape_char($c); } # The parentheses around map are necessary, # otherwise the subsequent quote is taken as another datum for map sub plist_show_string { my ($s) = @_; state %cached; $cached{$s} = join('', '"', (map { plist_escape_char($_) } split('', $s)), '"' ) unless defined $cached{$s}; return $cached{$s}; } sub plist_show_arrayref { my ($a) = @_; return '(' . join(', ', map { plist_show_string($_) } @{$a}) . ')'; } sub plist_show_anyref { my ($r, $l) = @_; return plist_show_arrayref($r) if ref($r) =~ m(^ARRAY); return plist_show_hashref($r, $l) if ref($r) =~ m(^HASH); die(qq(Unexpected reference type\n")); } sub plist_show_binding { my ($k, $v, $l) = @_; return ( plist_show_string($k) . ' = ' . plist_show_anyref($v, $l + 2) . ';' ); } sub plist_show_hashref { my ($h, $l) = @_; my $sep = "\n" . (' ' x $l); return ( '{' . $sep . join($sep, map { plist_show_binding($_, $h->{$_}, $l) } sort(keys(%{$h}))) . "\n" . (' ' x ($l - 2)) . '}' ); } sub main { our ($keysymdef, $dumpsyms, $dumpevents, $multikey, $output); GetOptions( 'keysyms=s' => \$keysymdef, 'dumpsyms' => \$dumpsyms, 'dumpevents' => \$dumpevents, 'multikey=i' => \$multikey, 'output=s' => \$output ) or die(qq(Error in command line arguments\n)); my $keysyms = parse_keysymdef(); $keysyms->{'Multi_key'} = $multikey; dump_keysyms($keysyms), return 1 if $dumpsyms; die(qq(At least one input file must be specified\n)) unless @ARGV; my $eventmap = parse_xcompose($keysyms); dump_eventmap($eventmap), return 1 if $dumpevents; my $eventtrie = build_trie($eventmap); my $multihash = build_multihash($eventtrie); print(plist_show_hashref($multihash, 2)), return 1 unless $output; write_file($output, plist_show_hashref($multihash, 2)); return 1; } main(); __END__