     5  package main
     7  import (
     8  	"encoding/xml"
     9  	"fmt"
    10  	"io"
    11  	"sort"
    12  	"strconv"
    13  	"strings"
    14  	"unicode/utf16"
    15  )
    17  // binaryXML converts XML into Android's undocumented binary XML format.
    18  //
    19  // The best source of information on this format seems to be the source code
    20  // in AOSP frameworks-base. Android "resource" types seem to describe the
    21  // encoded bytes, in particular:
    22  //
    23  //	ResChunk_header
    24  //	ResStringPool_header
    25  //	ResXMLTree_node
    26  //
    27  // These are defined in:
    28  //
    29  //
    30  //
    31  // The rough format of the file is a resource chunk containing a sequence of
    32  // chunks. Each chunk is made up of a header and a body. The header begins with
    33  // the contents of the ResChunk_header struct, which includes the size of both
    34  // the header and the body.
    35  //
    36  // Both the header and body are 4-byte aligned.
    37  //
    38  // Values are encoded as little-endian.
    39  //
    40  // The android source code for encoding is done in the aapt tool. Its source
    41  // code lives in AOSP:
    42  //
    43  //
    44  //
    45  // A sample layout:
    46  //
    47  //	File Header (ResChunk_header, type XML)
    48  //	Chunk: String Pool (type STRING_POOL)
    49  //	Sequence of strings, each with the format:
    50  //		uint16 length
    51  //		uint16 extended_length -- only if top bit set on length
    52  //		UTF-16LE string
    53  //		two zero bytes
    54  //	Resource Map
    55  //		The [i]th 4-byte entry in the resource map corresponds with
    56  //		the [i]th string from the string pool. The 4-bytes are a
    57  //		Resource ID constant defined:
    58  //
    59  //		This appears to be a way to map strings onto enum values.
    60  //	Chunk: Namespace Start (ResXMLTree_node; ResXMLTree_namespaceExt)
    61  //	Chunk: Element Start
    62  //		ResXMLTree_node
    63  //		ResXMLTree_attrExt
    64  //		ResXMLTree_attribute (repeated attributeCount times)
    65  //	Chunk: Element End
    66  //		(ResXMLTree_node; ResXMLTree_endElementExt)
    67  //	...
    68  //	Chunk: Namespace End
    69  func binaryXML(r io.Reader) ([]byte, error) {
    70  	lr := &lineReader{r: r}
    71  	d := xml.NewDecoder(lr)
    73  	pool := new(binStringPool)
    74  	depth := 0
    75  	elements := []chunk{}
    76  	namespaceEnds := make(map[int][]binEndNamespace)
    78  	for {
    79  		line := lr.line(d.InputOffset())
    80  		tok, err := d.Token()
    81  		if err != nil {
    82  			if err == io.EOF {
    83  				break
    84  			}
    85  			return nil, err
    86  		}
    87  		switch tok := tok.(type) {
    88  		case xml.StartElement:
    89  			// Intercept namespace definitions.
    90  			var attr []*binAttr
    91  			for _, a := range tok.Attr {
    92  				if a.Name.Space == "xmlns" {
    93  					elements = append(elements, binStartNamespace{
    94  						line:   line,
    95  						prefix: pool.get(a.Name.Local),
    96  						url:    pool.get(a.Value),
    97  					})
    98  					namespaceEnds[depth] = append([]binEndNamespace{{
    99  						line:   line,
   100  						prefix: pool.get(a.Name.Local),
   101  						url:    pool.get(a.Value),
   102  					}}, namespaceEnds[depth]...)
   103  					continue
   104  				}
   105  				ba, err := pool.getAttr(a)
   106  				if err != nil {
   107  					return nil, fmt.Errorf("%d: %s: %v", line, a.Name.Local, err)
   108  				}
   109  				attr = append(attr, ba)
   110  			}
   112  			depth++
   113  			elements = append(elements, &binStartElement{
   114  				line: line,
   115  				ns:   pool.getNS(tok.Name.Space),
   116  				name: pool.get(tok.Name.Local),
   117  				attr: attr,
   118  			})
   119  		case xml.EndElement:
   120  			elements = append(elements, &binEndElement{
   121  				line: line,
   122  				ns:   pool.getNS(tok.Name.Space),
   123  				name: pool.get(tok.Name.Local),
   124  			})
   125  			depth--
   126  			if nsEnds := namespaceEnds[depth]; len(nsEnds) > 0 {
   127  				delete(namespaceEnds, depth)
   128  				for _, nsEnd := range nsEnds {
   129  					elements = append(elements, nsEnd)
   130  				}
   131  			}
   132  		case xml.CharData:
   133  			// The aapt tool appears to "compact" leading and
   134  			// trailing whitepsace. See XMLNode::removeWhitespace in
   135  			//
   136  			if len(tok) == 0 {
   137  				continue
   138  			}
   139  			start, end := 0, len(tok)
   140  			for start < len(tok) && isSpace(tok[start]) {
   141  				start++
   142  			}
   143  			for end > start && isSpace(tok[end-1]) {
   144  				end--
   145  			}
   146  			if start == end {
   147  				continue // all whitespace, skip it
   148  			}
   150  			// Preserve one character of whitespace.
   151  			if start > 0 {
   152  				start--
   153  			}
   154  			if end < len(tok) {
   155  				end++
   156  			}
   158  			elements = append(elements, &binCharData{
   159  				line: line,
   160  				data: pool.get(string(tok[start:end])),
   161  			})
   162  		case xml.Comment:
   163  			// Ignored by Anroid Binary XML format.
   164  		case xml.ProcInst:
   165  			// Ignored by Anroid Binary XML format?
   166  		case xml.Directive:
   167  			// Ignored by Anroid Binary XML format.
   168  		default:
   169  			return nil, fmt.Errorf("apk: unexpected token: %v (%T)", tok, tok)
   170  		}
   171  	}
   173  	sortPool(pool)
   174  	for _, e := range elements {
   175  		if e, ok := e.(*binStartElement); ok {
   176  			sortAttr(e, pool)
   177  		}
   178  	}
   180  	resMap := &binResMap{pool}
   182  	size := 8 + pool.size() + resMap.size()
   183  	for _, e := range elements {
   184  		size += e.size()
   185  	}
   187  	b := make([]byte, 0, size)
   188  	b = appendHeader(b, headerXML, size)
   189  	b = pool.append(b)
   190  	b = resMap.append(b)
   191  	for _, e := range elements {
   192  		b = e.append(b)
   193  	}
   195  	return b, nil
   196  }
   198  func isSpace(b byte) bool {
   199  	switch b {
   200  	case '\t', '\n', '\v', '\f', '\r', ' ', 0x85, 0xA0:
   201  		return true
   202  	}
   203  	return false
   204  }
   206  type headerType uint16
   208  const (
   209  	headerXML            headerType = 0x0003
   210  	headerStringPool                = 0x0001
   211  	headerResourceMap               = 0x0180
   212  	headerStartNamespace            = 0x0100
   213  	headerEndNamespace              = 0x0101
   214  	headerStartElement              = 0x0102
   215  	headerEndElement                = 0x0103
   216  	headerCharData                  = 0x0104
   217  )
   219  func appendU16(b []byte, v uint16) []byte {
   220  	return append(b, byte(v), byte(v>>8))
   221  }
   223  func appendU32(b []byte, v uint32) []byte {
   224  	return append(b, byte(v), byte(v>>8), byte(v>>16), byte(v>>24))
   225  }
   227  func appendHeader(b []byte, typ headerType, size int) []byte {
   228  	b = appendU16(b, uint16(typ))
   229  	b = appendU16(b, 8)
   230  	b = appendU16(b, uint16(size))
   231  	b = appendU16(b, 0)
   232  	return b
   233  }
   235  // Attributes of the form android:key are mapped to resource IDs, which are
   236  // embedded into the Binary XML format.
   237  //
   238  //
   239  var resourceCodes = map[string]uint32{
   240  	"versionCode":      0x0101021b,
   241  	"versionName":      0x0101021c,
   242  	"minSdkVersion":    0x0101020c,
   243  	"windowFullscreen": 0x0101020d,
   244  	"label":            0x01010001,
   245  	"hasCode":          0x0101000c,
   246  	"debuggable":       0x0101000f,
   247  	"name":             0x01010003,
   248  	"configChanges":    0x0101001f,
   249  	"value":            0x01010024,
   250  }
   252  //
   253  var configChanges = map[string]uint32{
   254  	"mcc":                0x0001,
   255  	"mnc":                0x0002,
   256  	"locale":             0x0004,
   257  	"touchscreen":        0x0008,
   258  	"keyboard":           0x0010,
   259  	"keyboardHidden":     0x0020,
   260  	"navigation":         0x0040,
   261  	"orientation":        0x0080,
   262  	"screenLayout":       0x0100,
   263  	"uiMode":             0x0200,
   264  	"screenSize":         0x0400,
   265  	"smallestScreenSize": 0x0800,
   266  	"layoutDirection":    0x2000,
   267  	"fontScale":          0x40000000,
   268  }
   270  type lineReader struct {
   271  	off   int64
   272  	lines []int64
   273  	r     io.Reader
   274  }
   276  func (r *lineReader) Read(p []byte) (n int, err error) {
   277  	n, err = r.r.Read(p)
   278  	for i := 0; i < n; i++ {
   279  		if p[i] == '\n' {
   280  			r.lines = append(r.lines,
   281  		}
   282  	}
   283 += int64(n)
   284  	return n, err
   285  }
   287  func (r *lineReader) line(pos int64) int {
   288  	return sort.Search(len(r.lines), func(i int) bool {
   289  		return pos < r.lines[i]
   290  	}) + 1
   291  }
   293  type bstring struct {
   294  	ind uint32
   295  	str string
   296  	enc []byte // 2-byte length, utf16le, 2-byte zero
   297  }
   299  type chunk interface {
   300  	size() int
   301  	append([]byte) []byte
   302  }
   304  type binResMap struct {
   305  	pool *binStringPool
   306  }
   308  func (p *binResMap) append(b []byte) []byte {
   309  	b = appendHeader(b, headerResourceMap, p.size())
   310  	for _, bstr := range p.pool.s {
   311  		c, ok := resourceCodes[bstr.str]
   312  		if !ok {
   313  			break
   314  		}
   315  		b = appendU32(b, c)
   316  	}
   317  	return b
   318  }
   320  func (p *binResMap) size() int {
   321  	count := 0
   322  	for _, bstr := range p.pool.s {
   323  		if _, ok := resourceCodes[bstr.str]; !ok {
   324  			break
   325  		}
   326  		count++
   327  	}
   328  	return 8 + 4*count
   329  }
   331  type binStringPool struct {
   332  	s []*bstring
   333  	m map[string]*bstring
   334  }
   336  func (p *binStringPool) get(str string) *bstring {
   337  	if p.m == nil {
   338  		p.m = make(map[string]*bstring)
   339  	}
   340  	res := p.m[str]
   341  	if res != nil {
   342  		return res
   343  	}
   344  	res = &bstring{
   345  		ind: uint32(len(p.s)),
   346  		str: str,
   347  	}
   348  	p.s = append(p.s, res)
   349  	p.m[str] = res
   351  	if len(str)>>16 > 0 {
   352  		panic(fmt.Sprintf("string lengths over 1<<15 not yet supported, got len %d for string that starts %q", len(str), str[:100]))
   353  	}
   354  	strUTF16 := utf16.Encode([]rune(str))
   355  	res.enc = appendU16(nil, uint16(len(strUTF16)))
   356  	for _, w := range strUTF16 {
   357  		res.enc = appendU16(res.enc, w)
   358  	}
   359  	res.enc = appendU16(res.enc, 0)
   360  	return res
   361  }
   363  func (p *binStringPool) getNS(ns string) *bstring {
   364  	if ns == "" {
   365  		// Register empty string for inclusion in output (like aapt),
   366  		// but do not reference it from namespace elements.
   367  		p.get("")
   368  		return nil
   369  	}
   370  	return p.get(ns)
   371  }
   373  func (p *binStringPool) getAttr(attr xml.Attr) (*binAttr, error) {
   374  	a := &binAttr{
   375  		ns:   p.getNS(attr.Name.Space),
   376  		name: p.get(attr.Name.Local),
   377  	}
   378  	if attr.Name.Space != "" {
   379 = p.get(attr.Value)
   380  		return a, nil
   381  	}
   383  	// Some android attributes have interesting values.
   384  	switch attr.Name.Local {
   385  	case "versionCode", "minSdkVersion":
   386  		v, err := strconv.Atoi(attr.Value)
   387  		if err != nil {
   388  			return nil, err
   389  		}
   390 = int(v)
   391  	case "hasCode", "debuggable":
   392  		v, err := strconv.ParseBool(attr.Value)
   393  		if err != nil {
   394  			return nil, err
   395  		}
   396 = v
   397  	case "configChanges":
   398  		v := uint32(0)
   399  		for _, c := range strings.Split(attr.Value, "|") {
   400  			v |= configChanges[c]
   401  		}
   402 = v
   403  	default:
   404 = p.get(attr.Value)
   405  	}
   406  	return a, nil
   407  }
   409  const stringPoolPreamble = 0 +
   410  	8 + // chunk header
   411  	4 + // string count
   412  	4 + // style count
   413  	4 + // flags
   414  	4 + // strings start
   415  	4 + // styles start
   416  	0
   418  func (p *binStringPool) unpaddedSize() int {
   419  	strLens := 0
   420  	for _, s := range p.s {
   421  		strLens += len(s.enc)
   422  	}
   423  	return stringPoolPreamble + 4*len(p.s) + strLens
   424  }
   426  func (p *binStringPool) size() int {
   427  	size := p.unpaddedSize()
   428  	size += size % 0x04
   429  	return size
   430  }
   432  // overloaded for testing.
   433  var (
   434  	sortPool = func(p *binStringPool) {
   435  		sort.Sort(p)
   437  		// Move resourceCodes to the front.
   438  		s := make([]*bstring, 0)
   439  		m := make(map[string]*bstring)
   440  		for str := range resourceCodes {
   441  			bstr := p.m[str]
   442  			if bstr == nil {
   443  				continue
   444  			}
   445  			bstr.ind = uint32(len(s))
   446  			s = append(s, bstr)
   447  			m[str] = bstr
   448  			delete(p.m, str)
   449  		}
   450  		for _, bstr := range p.m {
   451  			bstr.ind = uint32(len(s))
   452  			s = append(s, bstr)
   453  		}
   454  		p.s = s
   455  		p.m = m
   456  	}
   457  	sortAttr = func(e *binStartElement, p *binStringPool) {}
   458  )
   460  func (b *binStringPool) Len() int           { return len(b.s) }
   461  func (b *binStringPool) Less(i, j int) bool { return b.s[i].str < b.s[j].str }
   462  func (b *binStringPool) Swap(i, j int) {
   463  	b.s[i], b.s[j] = b.s[j], b.s[i]
   464  	b.s[i].ind, b.s[j].ind = b.s[j].ind, b.s[i].ind
   465  }
   467  func (p *binStringPool) append(b []byte) []byte {
   468  	stringsStart := uint32(stringPoolPreamble + 4*len(p.s))
   469  	b = appendU16(b, uint16(headerStringPool))
   470  	b = appendU16(b, 0x1c) // chunk header size
   471  	b = appendU16(b, uint16(p.size()))
   472  	b = appendU16(b, 0)
   473  	b = appendU32(b, uint32(len(p.s)))
   474  	b = appendU32(b, 0) // style count
   475  	b = appendU32(b, 0) // flags
   476  	b = appendU32(b, stringsStart)
   477  	b = appendU32(b, 0) // styles start
   479  	off := 0
   480  	for _, bstr := range p.s {
   481  		b = appendU32(b, uint32(off))
   482  		off += len(bstr.enc)
   483  	}
   484  	for _, bstr := range p.s {
   485  		b = append(b, bstr.enc...)
   486  	}
   488  	for i := p.unpaddedSize() % 0x04; i > 0; i-- {
   489  		b = append(b, 0)
   490  	}
   491  	return b
   492  }
   494  type binStartElement struct {
   495  	line int
   496  	ns   *bstring
   497  	name *bstring
   498  	attr []*binAttr
   499  }
   501  func (e *binStartElement) size() int {
   502  	return 8 + // chunk header
   503  		4 + // line number
   504  		4 + // comment
   505  		4 + // ns
   506  		4 + // name
   507  		2 + 2 + 2 + // attribute start, size, count
   508  		2 + 2 + 2 + // id/class/style index
   509  		len(e.attr)*(4+4+4+4+4)
   510  }
   512  func (e *binStartElement) append(b []byte) []byte {
   513  	b = appendU16(b, uint16(headerStartElement))
   514  	b = appendU16(b, 0x10) // chunk header size
   515  	b = appendU16(b, uint16(e.size()))
   516  	b = appendU16(b, 0)
   517  	b = appendU32(b, uint32(e.line))
   518  	b = appendU32(b, 0xffffffff) // comment
   519  	if e.ns == nil {
   520  		b = appendU32(b, 0xffffffff)
   521  	} else {
   522  		b = appendU32(b, e.ns.ind)
   523  	}
   524  	b = appendU32(b,
   525  	b = appendU16(b, 0x14) // attribute start
   526  	b = appendU16(b, 0x14) // attribute size
   527  	b = appendU16(b, uint16(len(e.attr)))
   528  	b = appendU16(b, 0) // ID index (none)
   529  	b = appendU16(b, 0) // class index (none)
   530  	b = appendU16(b, 0) // style index (none)
   531  	for _, a := range e.attr {
   532  		b = a.append(b)
   533  	}
   534  	return b
   535  }
   537  type binAttr struct {
   538  	ns   *bstring
   539  	name *bstring
   540  	data interface{} // either int (INT_DEC) or *bstring (STRING)
   541  }
   543  func (a *binAttr) append(b []byte) []byte {
   544  	if a.ns != nil {
   545  		b = appendU32(b, a.ns.ind)
   546  	} else {
   547  		b = appendU32(b, 0xffffffff)
   548  	}
   549  	b = appendU32(b,
   550  	switch v := {
   551  	case int:
   552  		b = appendU32(b, 0xffffffff) // raw value
   553  		b = appendU16(b, 8)          // size
   554  		b = append(b, 0)             // unused padding
   555  		b = append(b, 0x10)          // INT_DEC
   556  		b = appendU32(b, uint32(v))
   557  	case bool:
   558  		b = appendU32(b, 0xffffffff) // raw value
   559  		b = appendU16(b, 8)          // size
   560  		b = append(b, 0)             // unused padding
   561  		b = append(b, 0x12)          // INT_BOOLEAN
   562  		if v {
   563  			b = appendU32(b, 0xffffffff)
   564  		} else {
   565  			b = appendU32(b, 0)
   566  		}
   567  	case uint32:
   568  		b = appendU32(b, 0xffffffff) // raw value
   569  		b = appendU16(b, 8)          // size
   570  		b = append(b, 0)             // unused padding
   571  		b = append(b, 0x11)          // INT_HEX
   572  		b = appendU32(b, uint32(v))
   573  	case *bstring:
   574  		b = appendU32(b, v.ind) // raw value
   575  		b = appendU16(b, 8)     // size
   576  		b = append(b, 0)        // unused padding
   577  		b = append(b, 0x03)     // STRING
   578  		b = appendU32(b, v.ind)
   579  	default:
   580  		panic(fmt.Sprintf("unexpected attr type: %T (%v)", v, v))
   581  	}
   582  	return b
   583  }
   585  type binEndElement struct {
   586  	line int
   587  	ns   *bstring
   588  	name *bstring
   589  	attr []*binAttr
   590  }
   592  func (*binEndElement) size() int {
   593  	return 8 + // chunk header
   594  		4 + // line number
   595  		4 + // comment
   596  		4 + // ns
   597  		4 // name
   598  }
   600  func (e *binEndElement) append(b []byte) []byte {
   601  	b = appendU16(b, uint16(headerEndElement))
   602  	b = appendU16(b, 0x10) // chunk header size
   603  	b = appendU16(b, uint16(e.size()))
   604  	b = appendU16(b, 0)
   605  	b = appendU32(b, uint32(e.line))
   606  	b = appendU32(b, 0xffffffff) // comment
   607  	if e.ns == nil {
   608  		b = appendU32(b, 0xffffffff)
   609  	} else {
   610  		b = appendU32(b, e.ns.ind)
   611  	}
   612  	b = appendU32(b,
   613  	return b
   614  }
   616  type binStartNamespace struct {
   617  	line   int
   618  	prefix *bstring
   619  	url    *bstring
   620  }
   622  func (binStartNamespace) size() int {
   623  	return 8 + // chunk header
   624  		4 + // line number
   625  		4 + // comment
   626  		4 + // prefix
   627  		4 // url
   628  }
   630  func (e binStartNamespace) append(b []byte) []byte {
   631  	b = appendU16(b, uint16(headerStartNamespace))
   632  	b = appendU16(b, 0x10) // chunk header size
   633  	b = appendU16(b, uint16(e.size()))
   634  	b = appendU16(b, 0)
   635  	b = appendU32(b, uint32(e.line))
   636  	b = appendU32(b, 0xffffffff) // comment
   637  	b = appendU32(b, e.prefix.ind)
   638  	b = appendU32(b, e.url.ind)
   639  	return b
   640  }
   642  type binEndNamespace struct {
   643  	line   int
   644  	prefix *bstring
   645  	url    *bstring
   646  }
   648  func (binEndNamespace) size() int {
   649  	return 8 + // chunk header
   650  		4 + // line number
   651  		4 + // comment
   652  		4 + // prefix
   653  		4 // url
   654  }
   656  func (e binEndNamespace) append(b []byte) []byte {
   657  	b = appendU16(b, uint16(headerEndNamespace))
   658  	b = appendU16(b, 0x10) // chunk header size
   659  	b = appendU16(b, uint16(e.size()))
   660  	b = appendU16(b, 0)
   661  	b = appendU32(b, uint32(e.line))
   662  	b = appendU32(b, 0xffffffff) // comment
   663  	b = appendU32(b, e.prefix.ind)
   664  	b = appendU32(b, e.url.ind)
   665  	return b
   666  }
   668  type binCharData struct {
   669  	line int
   670  	data *bstring
   671  }
   673  func (*binCharData) size() int {
   674  	return 8 + // chunk header
   675  		4 + // line number
   676  		4 + // comment
   677  		4 + // data
   678  		8 // junk
   679  }
   681  func (e *binCharData) append(b []byte) []byte {
   682  	b = appendU16(b, uint16(headerCharData))
   683  	b = appendU16(b, 0x10) // chunk header size
   684  	b = appendU16(b, 0x1c) // size
   685  	b = appendU16(b, 0)
   686  	b = appendU32(b, uint32(e.line))
   687  	b = appendU32(b, 0xffffffff) // comment
   688  	b = appendU32(b,
   689  	b = appendU16(b, 0x08)
   690  	b = appendU16(b, 0)
   691  	b = appendU16(b, 0)
   692  	b = appendU16(b, 0)
   693  	return b
   694  }