· 6 years ago · Jan 15, 2020, 07:52 AM
1#!/usr/bin/perl
2use warnings;
3use strict;
4
5#Code validates, decodes, and encodes bitcoin addresses.
6#See examples at bottom.
7#Author: Len Schulwitz + friends at http://rosettacode.org/wiki/Bitcoin/address_validation
8#E-mail: My last name at gmail.com.
9#AS IS CODE! USE AT YOUR OWN RISK!
10
11#SHA-256 necessary for bitcoin address validation checksum check.
12use Digest::SHA qw(sha256);
13
14# The set of characters used in bech32 encoding.
15my @CHARSET = ('q','p','z','r','y','9','x','8','g','f','2','t','v','d','w','0','s','3','j','n','5','4','k','h','c','e','6','m','u','a','7','l');
16# These numbers are used in the bech32 polymod function.
17my @GENERATOR = (0x3b6a57b2, 0x26508e6d, 0x1ea119fa, 0x3d4233dd, 0x2a1462b3);
18#The base58 characters used by Bitcoin.
19my @b58 = qw{
20 1 2 3 4 5 6 7 8 9
21 A B C D E F G H J K L M N P Q R S T U V W X Y Z
22 a b c d e f g h i j k m n o p q r s t u v w x y z
23};
24#Used to decode base58 encoded bitcoin addresses (i.e. standard bitcoin addresses).
25my %b58 = map { $b58[$_] => $_ } 0 .. 57;
26#The reverse hash, used to base58 encode addresses represented as binary decimals.
27my %reverseb58 = reverse %b58;
28
29#Encodes a base58 encoded bitcoin address from array of binary decimals.
30sub base58 {
31 my @binary_address_to_encode = @{$_[0]};
32 die "Subroutine base58 needs binary decimal array to encode!\n" unless @binary_address_to_encode;
33 #This adds slightly more processing than is necessary, but will ensure all bytes are encoded.
34 my $base58_encoded_array_size = 2 * scalar @binary_address_to_encode;
35 my @base58_encoded_address;
36 #Counts number of leading 0's in decimal address.
37 my $leading_zeroes = length $1 if join('', @binary_address_to_encode) =~ /^(0*)/;
38 #Cycle through each binary decimal character, encoding to Base58.
39 for my $dec_char ( @binary_address_to_encode ) {
40 #Cycle through each index (i.e. base58 encoded character) of array holding base58 encoded result.
41 for (my $encoded_character_index = $base58_encoded_array_size; $encoded_character_index--; ) {
42 #See Satoshi's base58.cpp code for details.
43 $dec_char += 256 * ($base58_encoded_address[$encoded_character_index] // 0);
44 $base58_encoded_address[$encoded_character_index] = $dec_char % 58;
45 $dec_char /= 58;
46 }
47 }
48 #Generate encoded address with extra leading ones
49 my $encoded_address_with_leading_1s = join('', map { $reverseb58{$_} } @base58_encoded_address);
50 #Truncate address so that the number of leading zero bytes in the binary address are equal to the number of leading ones in the base58 encoded address.
51 if ($encoded_address_with_leading_1s =~ /(1{$leading_zeroes}[^1].*)/){
52 #Return matching base58 encoded bitcoin address.
53 return $1;
54 }
55 #If encoding only zero bytes...
56 elsif ($encoded_address_with_leading_1s =~ /(1{$leading_zeroes})/){
57 return $1;
58 }
59 else{
60 die "Unexpected error in subroutine base58!\n";
61 }
62}
63
64#Decodes bitcoin address from its Base58 encoding into an array of binary decimals.
65sub unbase58 {
66 my $bitcoin_address = $_[0];
67 die "Subroutine unbase58 needs base58 encoded bitcoin address to decode!\n" unless defined $bitcoin_address;
68 print "Cannot Decode! Invalid Base58 Character(s)!\n" and exit unless $bitcoin_address =~ /^[1-9A-HJ-NP-Za-km-z]*$/;
69 #This is overkill, but it allows for plenty of room to store decoded bytes.
70 my $decoded_array_size = length($bitcoin_address);
71 my @decoded_binary_address; #Array that will hold bytes of Base58 decoded address.
72 #Counts number of leading 1's in bitcoin address.
73 my $leading_ones = length($1) if $bitcoin_address =~ /^(1*)/;
74 #Cycle through each character of address, decoding from Base58.
75 for my $b58_char ( map { $b58{$_} } $bitcoin_address =~ /./g ) {
76 #Cycle through each index (i.e decimal byte) of array holding base58 decoded result.
77 for (my $decoded_byte_index = $decoded_array_size; $decoded_byte_index--; ) {
78 #See Satoshi's base58.cpp code for encoding details.
79 $b58_char += 58 * ($decoded_binary_address[$decoded_byte_index] // 0);
80 $decoded_binary_address[$decoded_byte_index] = $b58_char % 256;
81 $b58_char /= 256;
82 }
83 }
84 #Counts number of leading zeroes in decoded binary decimal array.
85 my $leading_zeroes = length($1) if join('', @decoded_binary_address) =~ /^(0*)/;
86 #If leading zeroes of decoded address don't equal leading ones of encoded address, trim them off.
87 for (1 .. $leading_zeroes - $leading_ones){
88 shift @decoded_binary_address;
89 }
90 return @decoded_binary_address;
91}
92
93#Dies if address is bad, otherwise, returns address type.
94#See https://en.bitcoin.it/wiki/List_of_address_prefixes for valid address types.
95sub check_bitcoin_address {
96 my $base58_address = shift;
97 #Bech32 check.
98 my $bech32_check_return;
99 if ($base58_address =~ /^bc/i || $base58_address =~ /^tb/i){ # Probably bech32.
100 # The eval loop catches 'die' conditions, which indicate an invalid bech32 address.
101 eval { $bech32_check_return = check_bech32_address($base58_address)};
102 if ($@){
103 return "INVALID BECH32";
104 } else { # Valid bech32 address.
105 return "BECH32:$bech32_check_return";
106 }
107 }
108 my @decoded_binary_address = unbase58 $base58_address;
109 #See if last 4 bytes of the 25-byte base58 decoded bitcoin address (i.e. the checksum) match the double sha256 hash of the first 21 bytes.
110 print "Invalid bitcoin address! Address is not 25 bytes!\n" and exit if scalar @decoded_binary_address != 25;
111 print "Invalid Bitcoin address! Bad SHA-256 checksum!\n" and exit unless (pack 'C*', @decoded_binary_address[21..24]) eq substr sha256(sha256 pack 'C*', @decoded_binary_address[0..20]), 0, 4;
112
113 #Standard bitcoin address.
114 if ($base58_address =~ /^1/){
115 return "Standard Public";
116 }
117 #Multi-signature bitcoin address.
118 elsif ($base58_address =~ /^3/){
119 return "Multi-Signature";
120 }
121 #Testnet standard bitcoin address.
122 elsif ($base58_address =~ /^m/ or $base58_address =~ /^n/){
123 return "Testnet Public";
124 }
125 #Testnet multi-signature bitcoin address.
126 elsif ($base58_address =~ /^2/){
127 return "Testnet Multi-Signature";
128 }
129 #If address is valid but not a recognized type, it is abnormal.
130 else{
131 return "Abnormal";
132 }
133}
134
135#Converts standard bitcoin address to binary form as hexadecimal.
136sub decodebase58tohex {
137 #Takes standard base58 encoded bitcoin address
138 my $std_bitcoin_address = $_[0];
139 die "Subroutine decodebase58tohex needs base58 bitcoin address as input!\n" unless (defined $std_bitcoin_address and length $std_bitcoin_address != 0);
140 #Base58 decodes address to binary decimal form.
141 my @decoded_binary_address = unbase58($std_bitcoin_address);
142 #Converts binary to hexadecimal.
143 my $hex_binary_address = '';
144 foreach(@decoded_binary_address){
145 $hex_binary_address .= sprintf("%02X", $_);
146 }
147 return $hex_binary_address;
148}
149
150#Converts binary bitcoin address input as hexadecimal to standard Base58 address.
151sub encodebase58fromhex {
152 #Takes hexadecimal representation of 25-byte binary address.
153 my $hex_binary_address = $_[0];
154 die "Subroutine encodebase58fromhex needs binary address represented with hex characters as input!" unless (defined $hex_binary_address and length $hex_binary_address != 0);
155 print "Cannot Encode! Invalid Hexadecimal Character(s)!\n" and exit unless $hex_binary_address =~ /^[a-f0-9]*$/i;
156 #If odd number of hex characters, let's assume that we can prepend a zero, so that we have an array of full bytes (i.e. no ambiguous hanging nibble).
157 if( $hex_binary_address =~ m/^[a-f0-9]([a-f0-9][a-f0-9])*$/i ){
158 $hex_binary_address = '0' . $hex_binary_address;
159 }
160 #Converts to binary decimal form.
161 my @binary_address_to_encode = $hex_binary_address =~ /../g;
162 for( 0 .. scalar(@binary_address_to_encode)-1 ){
163 $binary_address_to_encode[$_] = hex($binary_address_to_encode[$_]);
164 }
165 #Base58 encodes and returns standard form bitcoin address.
166 my $std_bitcoin_address = base58(\@binary_address_to_encode);
167 return $std_bitcoin_address;
168}
169
170# Bech32 functionality.
171#Dies if the address is bad, otherwise returns the type of bech32 address.
172sub check_bech32_address {
173 my $bech32_address = shift;
174 #Match all the characters before the last '1'.
175 $bech32_address =~ /^(.*)1/;
176 my $human_readable_part = $1; #$1 refers to group 1 of the regex above - everything until the last '1'.
177 #A successful return from the decode sub guarantees some sort of bech32 address.
178 #Otherwise, it will die.
179 my ($witness_version, $decoded_hex_data_ref) = decode_segwit_address($human_readable_part, $bech32_address);
180 my @decoded_hex_data = @{$decoded_hex_data_ref};
181 #Logic block.
182 if ($witness_version == 0) {
183 #Mainnet bech32 address.
184 if ($bech32_address=~ /^bc1/i){
185 #Mainnet Pay to Witness Private Key Hash
186 if ( scalar @decoded_hex_data == 20 ){
187 return "Mainnet P2WPKH";
188 #Mainnet Pay to Witness Script Hash
189 }elsif( scalar @decoded_hex_data == 32){
190 return "Mainnet P2WSH";
191 #This line should be unreachable. Something went wrong.
192 }else{ return "This addreess length is invalid for witness version '00'!";}
193 }
194 #Testnet bech32 address.
195 elsif ($bech32_address =~ /^tb1/i){
196 #Testnet Pay to Witness Private Key Hash
197 if ( scalar @decoded_hex_data == 20 ){
198 return "Testnet P2WPKH";
199 #Testnet Pay to Witness Script Hash
200 }elsif( scalar @decoded_hex_data == 32){
201 return "Testnet P2WSH";
202 #Something went wrong.
203 }else{ return "This addreess length is invalid for witness version '00'!";}
204 }else{
205 return "Unknown human readable part!";
206 }
207 }else{
208 return "Valid bech32, but the witness version '$witness_version' is unspecified for the current release of bitcoin.";
209 }
210}
211
212# Segwit address decode.
213sub decode_segwit_address {
214 my $hrp = $_[0]; #Human Readable Part (hrp).
215 my $addr_to_decode = $_[1];
216 my ($hrp_string, $data_ref) = decode_bech32($addr_to_decode);
217 my @data_squashed_bits = @{$data_ref};
218 die "Cannot decode Segwit address. The program (data) seems to be empty!" if (scalar @data_squashed_bits < 1);
219 die "Cannot decode Segwit address. Witness versions above 16 are not specified!" if ($data_squashed_bits[0] > 16);
220 #removes the first element of array. In this case, the witness version.
221 my $witness_version = shift @data_squashed_bits;
222 #Convert from 5 sig bits to 8 sig bits.
223 my $program_ref = convertbits(\@data_squashed_bits, 5, 8, 0);
224 my @program = @{$program_ref}; # 'program' is the technical term for the data part of a segwit address.
225 die "Cannot decode Segwit address. The program (data) is empty!" if (scalar @program == 0);
226 die "Cannot decode Segwit address. The program (data) is too short!" if (scalar @program < 2);
227 die "Cannot decode Segwit address. The program (data) is too long!" if (scalar @program > 40);
228 #This check is recommended by BIP173.
229 die "Cannot decode. Segwit addresses with witness version '0' must be either 20 or 32 bytes long!"
230 if ($witness_version == 0 && scalar @program != 20 && scalar @program != 32);
231
232 return ($witness_version, \@program);
233}
234
235# Data is an array with the preceeding char (witness version) removed.
236# frombits and tobits are ints.
237# pad is a boolean
238# function convertbits (data, frombits, tobits, pad)
239sub convertbits {
240 my $data_ref = $_[0];
241 my $frombits = $_[1]; # The number of significant bits to convert from.
242 my $tobits = $_[2]; # The number of significant bits to convert to.
243 my $pad_bool = $_[3];
244 my $test;
245
246 my @data_to_convert = @{$data_ref};
247 # convertbits code converted from https://github.com/sipa/bech32/blob/master/ref/javascript/segwit_addr.js
248 # I don't have a great understanding of how it works-- bitmagic.
249 my $acc = 0;
250 my $bits = 0;
251 my @ret;
252 my $maxv = (1 << $tobits) - 1;
253 for (my $p = 0; $p < scalar @data_to_convert; ++$p) {
254 my $value = hex($data_to_convert[$p]);
255 die "Cannot convert bits from negative values!" if ($value < 0);
256 die "Cannot convert bits. One or more values in the data array are too big!" if (($value >> $frombits) != 0);
257 $acc = ($acc << $frombits) | $value;
258 $bits += $frombits;
259 while ($bits >= $tobits) {
260 $bits -= $tobits;
261 push @ret, (($acc >> $bits) & $maxv);
262 }
263 }
264 if ($pad_bool) {
265 if ($bits > 0) {
266 push @ret, (($acc << ($tobits - $bits)) & $maxv);
267 }
268 } elsif ($bits >= $frombits || (($acc << ($tobits - $bits)) & $maxv)) {
269 die "Cannot convert bits! The bitmagic failed somehow.";
270 }
271 # Convert back to hex.
272 foreach (@ret){ $_ = sprintf("%.2x", $_); }
273 return \@ret;
274}
275
276# This sub takes a (presumably) bech32 encoded string and decodes it to a 5 bit 'squashed' byte array.
277sub decode_bech32 {
278 my $bech32_encoded_string = shift;
279 my @bech32_encoded = split (//, $bech32_encoded_string, length($bech32_encoded_string));
280 my $p; # p for pointer
281 my $d; # d for decimal value of the decoded bech32 char.
282 my $has_lowercase = 0; #set to false
283 my $has_uppercase = 0; #set to false
284 for ($p = 0; $p < scalar @bech32_encoded; ++$p) {
285 #Check if the chars in @bech32_encoded are 'Basic Latin' unicode chars.
286 #A good list can be found here: https://en.wikipedia.org/wiki/List_of_Unicode_characters
287 die "Cannot decode bech32 string: One or more characters are improper unicode!"
288 if (ord($bech32_encoded[$p]) < 33 || ord($bech32_encoded[$p]) > 126);
289 #Set upper and/or lowercase flags. Valid addresses must NOT be mixed case.
290 if (ord($bech32_encoded[$p]) >= 97 && ord($bech32_encoded[$p]) <= 122) { $has_lowercase = 1; }
291 if (ord($bech32_encoded[$p]) >= 65 && ord($bech32_encoded[$p]) <= 90) { $has_uppercase = 1; }
292 }
293 die "Cannot decode bech32: Address must not be mixed-case!" if ($has_lowercase && $has_uppercase);
294 #Convert @bech32_encoded to lowercase
295 $_ = lc for @bech32_encoded;
296 my $pos; # pos is the index that corresponds to the final instance of '1', indicating the end of the human readable part.
297 # We're trying to find the value of the $pos here.
298 for ($pos = 0; $pos < scalar @bech32_encoded; $pos++){
299 # Loops through until it finds a '1'
300 if ( $bech32_encoded[$pos] eq '1' ) { last; }
301 }
302 die "Cannot decode bech32: Human Readable Part is too short!" if ($pos < 1 );
303 die "Cannot decode bech32: Data + checksum is too short!" if ($pos + 7 > scalar @bech32_encoded);
304 die "Cannot decode bech32: Address is too long!" if (scalar @bech32_encoded >90);
305
306 # Copy the human readable part to @hrp
307 my @hrp;
308 # This for loop will correctly place the data from @bech32_encoded into @hrp.
309 for($p = 0; $p < $pos; $p++ ){
310 $hrp[$p] = $bech32_encoded[$p];
311 }
312 my @decoded_hex_data;
313 #For each of the chars in @bech32_encoded, find the hex value (index) of the bech32 char in CHARSET and save.
314 for ($p = $pos + 1; $p < scalar @bech32_encoded; ++$p) {
315 $d = -1;
316 for (my $i = 0; $i < scalar @CHARSET; $i++) {
317 if ($CHARSET[$i] eq $bech32_encoded[$p]){
318 #d is the index of the char in CHARSET and also the corresponding value when converted to hex.
319 $d = $i;
320 last;
321 }
322 }
323 die "Cannot decode bech32: Invalid bech32 character detected!" if ($d eq '-1');
324 push @decoded_hex_data, $d;
325 }
326 my $hrp_str = join('', @hrp);
327 my $vfyChk = verifyChecksum($hrp_str, \@decoded_hex_data); # Passes in the decoded hex representation of the bech32 char
328 die "Cannot decode bech32: Invalid checksum!" if (!$vfyChk);
329 my @data_to_ret;
330 for ($p = 0; $p < scalar @decoded_hex_data - 6; $p++){
331 $data_to_ret[$p] = $decoded_hex_data[$p];
332 }
333 #Convert the values in the return array to 2 digit hex values.
334 foreach (@data_to_ret){ $_ = sprintf("%.2x", $_); }
335 return ($hrp_str, \@data_to_ret);
336}
337
338# This sub handles one of the polymod functions. It takes the hrp string and expands it into an array.
339# And then stacks that onto the passed in hex_data array.
340# The combined array is then passed into the polymod function, which by bitmagic determines if the checksum is good or bad.
341sub verifyChecksum {
342 my $hrp_str = $_[0]; # The human readable part string.
343 my $hex_data_to_checksum_ref = $_[1]; # Pass in the reference to the data array.
344 # Copy the values referenced in the hex data array to the @hex_data array.
345 # We need to find out if this hex_data array is precisely the segwit 'program'.
346 my @hex_data_to_checksum = @{$hex_data_to_checksum_ref};
347 my $checksum_verified; # We're using this as a boolean variable.
348 my $hrp_expanded_ref = hrpExpand($hrp_str);
349 my @hrp_expanded = @{$hrp_expanded_ref};
350 push @hrp_expanded, @hex_data_to_checksum; # [ hrp_exp values, hex_data values ]
351 my $verified = polymod(\@hrp_expanded);
352 # Return 'true': the checksum has been verified.
353 if ( $verified == 1 ){ $checksum_verified = 1;}
354 # Return 'false': the checksum failed to verify.
355 else{ $checksum_verified = 0;}
356 return $checksum_verified;
357}
358
359# Expand a human readable part for use in checksum computation.
360# There will be N number of h bits representing the higher ord, and N number of l bits representing the lower ord.
361# So hrpExpand will return an array that looks like this: [N number of h chars], 0, [N number of l chars]
362# Reference BIP173 for additional information: https://github.com/bitcoin/bips/blob/master/bip-0173.mediawiki
363sub hrpExpand {
364 my $hrp_str = shift;
365 # Convert the human readable part string into an array of chars.
366 my @human_readable_part = split(//, $hrp_str, length($hrp_str));
367 my @ret; #Initialize the return array.
368 my $i; # i for index.
369 # scalar here returns the number of values in the @human_readable_part array.
370 for ($i = 0; $i < scalar @human_readable_part; ++$i) {
371 # >> 5 is a right bit shift of 5 places, effectively dividing by 32 (2^5).
372 # xxxxyyyy => 0000xxxx
373 push @ret, ord($human_readable_part[$i]) >> 5;
374 }
375 # A '0' needs to be in the middle according to BIP173.
376 push @ret, 0;
377 for ($i = 0; $i < scalar @human_readable_part; ++$i) {
378 # yyyyxxxx => 0000xxxx
379 # & 31 is a bit multiplication by 00001111, effectively zeroing out the 4 highest bits.
380 push @ret, ord($human_readable_part[$i]) & 31; # And now the low bits.
381 }
382 return \@ret; # The backslash indicates that we are returning a reference to the @ret array.
383}
384
385# Consult https://github.com/bitcoin/bitcoin/blob/master/src/bech32.cpp for how the polymod function works.
386sub polymod {
387 my $val_ref = shift;
388 my @values = @{$val_ref}; # Convert the array reference $val_ref to a proper array; @values.
389 my $chk = 1;
390 for (my $p = 0; $p < scalar @values; ++$p) {
391 my $top = $chk >> 25;
392 $chk = ($chk & 0x1ffffff) << 5 ^ $values[$p];
393 for (my $i = 0; $i < 5; ++$i) {
394 if (($top >> $i) & 1) {
395 $chk ^= $GENERATOR[$i];
396 }
397 }
398 }
399 return $chk;
400}
401
402#Sample test taken from https://en.bitcoin.it/wiki/Technical_background_of_version_1_Bitcoin_addresses.
403my $base58_encoded_address = "16UwLL9Risc3QfPqBUvKofHmBQ7wMtjvM";
404print "\nRunning tests for bitcoin address $base58_encoded_address\n";
405print "Bitcoin address is valid. Address type: '", check_bitcoin_address($base58_encoded_address), "'.\n";
406my $binary_address = decodebase58tohex($base58_encoded_address);
407print "Binary hexadecimal representation is: $binary_address\n";
408my $reencoded_base58 = encodebase58fromhex($binary_address);
409print "Re-encoded back to Base58 is: $reencoded_base58\n\n";
410#Bech32 test.
411my $bech32_encoded_address = "bc1qrp33g0q5c5txsp9arysrx4k6zdkfs4nce4xj0gdcccefvpysxf3qccfmv3";
412print "Running test for bech32 address $bech32_encoded_address\n";
413print "Valid ", check_bitcoin_address($bech32_encoded_address), ". For Bech32 encode/decode visit:https://slowli.github.io/bech32-buffer/\n\n";