python / base85 / ascii85
So python’s base64
does not have a b85decode
function? Adobe uses it
the ASCII-85 encoding in PDF and PostScript files.
Here is a quick and dirty one hacked together. See the wikipedia article for the ASCII-85 (base85) specs.
Prologue; we only need sys
to print a warning.
# vim: set ts=8 sw=4 sts=4 et ai:
# Example base85 decoder, Walter Doekes 2012
import sys
Split the data up into 5-character chunks; 5 characters encode 4 octets.
def _chunk(data):
# TODO: remove white space
idx = 0
while len(data) > idx:
if data[idx] in 'yz':
next = data[idx]
idx += 1
if next == 'z':
yield '!!!!!', 0 # 4xNUL
else:
assert False # FIXME: 4xSPACE
else:
next = data[idx:idx+5]
idx += len(next)
padding = 5 - len(next)
if padding:
next = next.ljust(5, 'u')
if next >= 's8W-!':
print >> sys.stderr, 'Garbage in input: %s' % (next,)
return
yield next, padding
Translate the 5 characters into a 32-bit number and then to the octets.
def _decode(chunk, padding):
assert len(chunk) == 5
num = (((((ord(chunk[0]) - 33)
* 85 + ord(chunk[1]) - 33)
* 85 + ord(chunk[2]) - 33)
* 85 + ord(chunk[3]) - 33)
* 85 + ord(chunk[4]) - 33)
return ('%c%c%c%c' % (
chr(num >> 24),
chr((num >> 16) & 0xff),
chr((num >> 8) & 0xff),
chr(num & 0xff)
))[0:4-padding]
Putting it all together.
b85decode = (lambda x: ''.join(_decode(*i) for i in _chunk(x)))
Optional; some code to test the thing.
input = (r'9jqo^BlbD-BleB1DJ+*+F(f,q/0JhKF<GL>Cj@.4Gp$d7F!,L7@<6@)/0JD'
r'EF<G%<+EV:2F!,O<DJ+*.@<*K0@<6L(Df-\0Ec5e;DffZ(EZee.Bl.9pF"A'
r'GXBPCsi+DGm>@3BB/F*&OCAfu2/AKYi(DIb:@FD,*)+C]U=@3BN#EcYf8AT'
r'D3s@q?d$AftVqCh[NqF<G:8+EV:.+Cf>-FD5W8ARlolDIal(DId<j@<?3r@'
r":F%a+D58'ATD4$Bl@l3De:,-DJs`8ARoFb/0JMK@qB4^F!,R<AKZ&-DfTqB"
r"G%G>uD.RTpAKYo'+CT/5+Cei#DII?(E,9)oF*2M7/c")
expected = ('Man is distinguished, not only by his reason, but by '
'this singular passion from other animals, which is a '
'lust of the mind, that by a perseverance of delight in '
'the continued and indefatigable generation of knowledge, '
'exceeds the short vehemence of any carnal pleasure.')
def format72(text):
'''Not needed at all, but python generators are so cool, I cannot
resist.'''
gen = iter(text.split())
word = gen.next()
while True:
yield word
linelen = len(word)
while True:
word = gen.next()
if linelen + len(word) > 71:
yield '\n'
break
yield ' '
yield word
linelen += len(word) + 1
decoded = b85decode(input)
print ''.join(format72(decoded))
assert decoded == expected
The output should look like this:
Man is distinguished, not only by his reason, but by this singular
passion from other animals, which is a lust of the mind, that by a
perseverance of delight in the continued and indefatigable generation of
knowledge, exceeds the short vehemence of any carnal pleasure.